linux/arch/x86/coco/sev/core.c

// SPDX-License-Identifier: GPL-2.0-only
/*
 * AMD Memory Encryption Support
 *
 * Copyright (C) 2019 SUSE
 *
 * Author: Joerg Roedel <jroedel@suse.de>
 */

#define pr_fmt(fmt)	"SEV: " fmt

#include <linux/sched/debug.h>	/* For show_regs() */
#include <linux/percpu-defs.h>
#include <linux/cc_platform.h>
#include <linux/printk.h>
#include <linux/mm_types.h>
#include <linux/set_memory.h>
#include <linux/memblock.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/cpumask.h>
#include <linux/efi.h>
#include <linux/platform_device.h>
#include <linux/io.h>
#include <linux/psp-sev.h>
#include <linux/dmi.h>
#include <uapi/linux/sev-guest.h>
#include <crypto/gcm.h>

#include <asm/init.h>
#include <asm/cpu_entry_area.h>
#include <asm/stacktrace.h>
#include <asm/sev.h>
#include <asm/sev-internal.h>
#include <asm/insn-eval.h>
#include <asm/fpu/xcr.h>
#include <asm/processor.h>
#include <asm/realmode.h>
#include <asm/setup.h>
#include <asm/traps.h>
#include <asm/svm.h>
#include <asm/smp.h>
#include <asm/cpu.h>
#include <asm/apic.h>
#include <asm/cpuid/api.h>
#include <asm/cmdline.h>
#include <asm/msr.h>

/* AP INIT values as documented in the APM2  section "Processor Initialization State" */
#define AP_INIT_CS_LIMIT		0xffff
#define AP_INIT_DS_LIMIT		0xffff
#define AP_INIT_LDTR_LIMIT		0xffff
#define AP_INIT_GDTR_LIMIT		0xffff
#define AP_INIT_IDTR_LIMIT		0xffff
#define AP_INIT_TR_LIMIT		0xffff
#define AP_INIT_RFLAGS_DEFAULT		0x2
#define AP_INIT_DR6_DEFAULT		0xffff0ff0
#define AP_INIT_GPAT_DEFAULT		0x0007040600070406ULL
#define AP_INIT_XCR0_DEFAULT		0x1
#define AP_INIT_X87_FTW_DEFAULT		0x5555
#define AP_INIT_X87_FCW_DEFAULT		0x0040
#define AP_INIT_CR0_DEFAULT		0x60000010
#define AP_INIT_MXCSR_DEFAULT		0x1f80

static const char * const sev_status_feat_names[] = {
	[MSR_AMD64_SEV_ENABLED_BIT]		= "SEV",
	[MSR_AMD64_SEV_ES_ENABLED_BIT]		= "SEV-ES",
	[MSR_AMD64_SEV_SNP_ENABLED_BIT]		= "SEV-SNP",
	[MSR_AMD64_SNP_VTOM_BIT]		= "vTom",
	[MSR_AMD64_SNP_REFLECT_VC_BIT]		= "ReflectVC",
	[MSR_AMD64_SNP_RESTRICTED_INJ_BIT]	= "RI",
	[MSR_AMD64_SNP_ALT_INJ_BIT]		= "AI",
	[MSR_AMD64_SNP_DEBUG_SWAP_BIT]		= "DebugSwap",
	[MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT]	= "NoHostIBS",
	[MSR_AMD64_SNP_BTB_ISOLATION_BIT]	= "BTBIsol",
	[MSR_AMD64_SNP_VMPL_SSS_BIT]		= "VmplSSS",
	[MSR_AMD64_SNP_SECURE_TSC_BIT]		= "SecureTSC",
	[MSR_AMD64_SNP_VMGEXIT_PARAM_BIT]	= "VMGExitParam",
	[MSR_AMD64_SNP_IBS_VIRT_BIT]		= "IBSVirt",
	[MSR_AMD64_SNP_VMSA_REG_PROT_BIT]	= "VMSARegProt",
	[MSR_AMD64_SNP_SMT_PROT_BIT]		= "SMTProt",
};

/*
 * For Secure TSC guests, the BSP fetches TSC_INFO using SNP guest messaging and
 * initializes snp_tsc_scale and snp_tsc_offset. These values are replicated
 * across the APs VMSA fields (TSC_SCALE and TSC_OFFSET).
 */
static u64 snp_tsc_scale __ro_after_init;
static u64 snp_tsc_offset __ro_after_init;
static unsigned long snp_tsc_freq_khz __ro_after_init;

DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);

/*
 * SVSM related information:
 *   When running under an SVSM, the VMPL that Linux is executing at must be
 *   non-zero. The VMPL is therefore used to indicate the presence of an SVSM.
 */
u8 snp_vmpl __ro_after_init;
EXPORT_SYMBOL_GPL(snp_vmpl);

static u64 __init get_snp_jump_table_addr(void)
{
	struct snp_secrets_page *secrets;
	void __iomem *mem;
	u64 addr;

	mem = ioremap_encrypted(sev_secrets_pa, PAGE_SIZE);
	if (!mem) {
		pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n");
		return 0;
	}

	secrets = (__force struct snp_secrets_page *)mem;

	addr = secrets->os_area.ap_jump_table_pa;
	iounmap(mem);

	return addr;
}

static u64 __init get_jump_table_addr(void)
{
	struct ghcb_state state;
	unsigned long flags;
	struct ghcb *ghcb;
	u64 ret = 0;

	if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
		return get_snp_jump_table_addr();

	local_irq_save(flags);

	ghcb = __sev_get_ghcb(&state);

	vc_ghcb_invalidate(ghcb);
	ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE);
	ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE);
	ghcb_set_sw_exit_info_2(ghcb, 0);

	sev_es_wr_ghcb_msr(__pa(ghcb));
	VMGEXIT();

	if (ghcb_sw_exit_info_1_is_valid(ghcb) &&
	    ghcb_sw_exit_info_2_is_valid(ghcb))
		ret = ghcb->save.sw_exit_info_2;

	__sev_put_ghcb(&state);

	local_irq_restore(flags);

	return ret;
}

static inline void __pval_terminate(u64 pfn, bool action, unsigned int page_size,
				    int ret, u64 svsm_ret)
{
	WARN(1, "PVALIDATE failure: pfn: 0x%llx, action: %u, size: %u, ret: %d, svsm_ret: 0x%llx\n",
	     pfn, action, page_size, ret, svsm_ret);

	sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
}

static void svsm_pval_terminate(struct svsm_pvalidate_call *pc, int ret, u64 svsm_ret)
{
	unsigned int page_size;
	bool action;
	u64 pfn;

	pfn = pc->entry[pc->cur_index].pfn;
	action = pc->entry[pc->cur_index].action;
	page_size = pc->entry[pc->cur_index].page_size;

	__pval_terminate(pfn, action, page_size, ret, svsm_ret);
}

static void pval_pages(struct snp_psc_desc *desc)
{
	struct psc_entry *e;
	unsigned long vaddr;
	unsigned int size;
	unsigned int i;
	bool validate;
	u64 pfn;
	int rc;

	for (i = 0; i <= desc->hdr.end_entry; i++) {
		e = &desc->entries[i];

		pfn = e->gfn;
		vaddr = (unsigned long)pfn_to_kaddr(pfn);
		size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
		validate = e->operation == SNP_PAGE_STATE_PRIVATE;

		rc = pvalidate(vaddr, size, validate);
		if (!rc)
			continue;

		if (rc == PVALIDATE_FAIL_SIZEMISMATCH && size == RMP_PG_SIZE_2M) {
			unsigned long vaddr_end = vaddr + PMD_SIZE;

			for (; vaddr < vaddr_end; vaddr += PAGE_SIZE, pfn++) {
				rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
				if (rc)
					__pval_terminate(pfn, validate, RMP_PG_SIZE_4K, rc, 0);
			}
		} else {
			__pval_terminate(pfn, validate, size, rc, 0);
		}
	}
}

static u64 svsm_build_ca_from_pfn_range(u64 pfn, u64 pfn_end, bool action,
					struct svsm_pvalidate_call *pc)
{
	struct svsm_pvalidate_entry *pe;

	/* Nothing in the CA yet */
	pc->num_entries = 0;
	pc->cur_index   = 0;

	pe = &pc->entry[0];

	while (pfn < pfn_end) {
		pe->page_size = RMP_PG_SIZE_4K;
		pe->action    = action;
		pe->ignore_cf = 0;
		pe->pfn       = pfn;

		pe++;
		pfn++;

		pc->num_entries++;
		if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT)
			break;
	}

	return pfn;
}

static int svsm_build_ca_from_psc_desc(struct snp_psc_desc *desc, unsigned int desc_entry,
				       struct svsm_pvalidate_call *pc)
{
	struct svsm_pvalidate_entry *pe;
	struct psc_entry *e;

	/* Nothing in the CA yet */
	pc->num_entries = 0;
	pc->cur_index   = 0;

	pe = &pc->entry[0];
	e  = &desc->entries[desc_entry];

	while (desc_entry <= desc->hdr.end_entry) {
		pe->page_size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
		pe->action    = e->operation == SNP_PAGE_STATE_PRIVATE;
		pe->ignore_cf = 0;
		pe->pfn       = e->gfn;

		pe++;
		e++;

		desc_entry++;
		pc->num_entries++;
		if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT)
			break;
	}

	return desc_entry;
}

static void svsm_pval_pages(struct snp_psc_desc *desc)
{
	struct svsm_pvalidate_entry pv_4k[VMGEXIT_PSC_MAX_ENTRY];
	unsigned int i, pv_4k_count = 0;
	struct svsm_pvalidate_call *pc;
	struct svsm_call call = {};
	unsigned long flags;
	bool action;
	u64 pc_pa;
	int ret;

	/*
	 * This can be called very early in the boot, use native functions in
	 * order to avoid paravirt issues.
	 */
	flags = native_local_irq_save();

	/*
	 * The SVSM calling area (CA) can support processing 510 entries at a
	 * time. Loop through the Page State Change descriptor until the CA is
	 * full or the last entry in the descriptor is reached, at which time
	 * the SVSM is invoked. This repeats until all entries in the descriptor
	 * are processed.
	 */
	call.caa = svsm_get_caa();

	pc = (struct svsm_pvalidate_call *)call.caa->svsm_buffer;
	pc_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer);

	/* Protocol 0, Call ID 1 */
	call.rax = SVSM_CORE_CALL(SVSM_CORE_PVALIDATE);
	call.rcx = pc_pa;

	for (i = 0; i <= desc->hdr.end_entry;) {
		i = svsm_build_ca_from_psc_desc(desc, i, pc);

		do {
			ret = svsm_perform_call_protocol(&call);
			if (!ret)
				continue;

			/*
			 * Check if the entry failed because of an RMP mismatch (a
			 * PVALIDATE at 2M was requested, but the page is mapped in
			 * the RMP as 4K).
			 */

			if (call.rax_out == SVSM_PVALIDATE_FAIL_SIZEMISMATCH &&
			    pc->entry[pc->cur_index].page_size == RMP_PG_SIZE_2M) {
				/* Save this entry for post-processing at 4K */
				pv_4k[pv_4k_count++] = pc->entry[pc->cur_index];

				/* Skip to the next one unless at the end of the list */
				pc->cur_index++;
				if (pc->cur_index < pc->num_entries)
					ret = -EAGAIN;
				else
					ret = 0;
			}
		} while (ret == -EAGAIN);

		if (ret)
			svsm_pval_terminate(pc, ret, call.rax_out);
	}

	/* Process any entries that failed to be validated at 2M and validate them at 4K */
	for (i = 0; i < pv_4k_count; i++) {
		u64 pfn, pfn_end;

		action  = pv_4k[i].action;
		pfn     = pv_4k[i].pfn;
		pfn_end = pfn + 512;

		while (pfn < pfn_end) {
			pfn = svsm_build_ca_from_pfn_range(pfn, pfn_end, action, pc);

			ret = svsm_perform_call_protocol(&call);
			if (ret)
				svsm_pval_terminate(pc, ret, call.rax_out);
		}
	}

	native_local_irq_restore(flags);
}

static void pvalidate_pages(struct snp_psc_desc *desc)
{
	if (snp_vmpl)
		svsm_pval_pages(desc);
	else
		pval_pages(desc);
}

static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc)
{
	int cur_entry, end_entry, ret = 0;
	struct snp_psc_desc *data;
	struct es_em_ctxt ctxt;

	vc_ghcb_invalidate(ghcb);

	/* Copy the input desc into GHCB shared buffer */
	data = (struct snp_psc_desc *)ghcb->shared_buffer;
	memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc)));

	/*
	 * As per the GHCB specification, the hypervisor can resume the guest
	 * before processing all the entries. Check whether all the entries
	 * are processed. If not, then keep retrying. Note, the hypervisor
	 * will update the data memory directly to indicate the status, so
	 * reference the data->hdr everywhere.
	 *
	 * The strategy here is to wait for the hypervisor to change the page
	 * state in the RMP table before guest accesses the memory pages. If the
	 * page state change was not successful, then later memory access will
	 * result in a crash.
	 */
	cur_entry = data->hdr.cur_entry;
	end_entry = data->hdr.end_entry;

	while (data->hdr.cur_entry <= data->hdr.end_entry) {
		ghcb_set_sw_scratch(ghcb, (u64)__pa(data));

		/* This will advance the shared buffer data points to. */
		ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0);

		/*
		 * Page State Change VMGEXIT can pass error code through
		 * exit_info_2.
		 */
		if (WARN(ret || ghcb->save.sw_exit_info_2,
			 "SNP: PSC failed ret=%d exit_info_2=%llx\n",
			 ret, ghcb->save.sw_exit_info_2)) {
			ret = 1;
			goto out;
		}

		/* Verify that reserved bit is not set */
		if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) {
			ret = 1;
			goto out;
		}

		/*
		 * Sanity check that entry processing is not going backwards.
		 * This will happen only if hypervisor is tricking us.
		 */
		if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry,
"SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n",
			 end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) {
			ret = 1;
			goto out;
		}
	}

out:
	return ret;
}

static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr,
				       unsigned long vaddr_end, int op)
{
	struct ghcb_state state;
	bool use_large_entry;
	struct psc_hdr *hdr;
	struct psc_entry *e;
	unsigned long flags;
	unsigned long pfn;
	struct ghcb *ghcb;
	int i;

	hdr = &data->hdr;
	e = data->entries;

	memset(data, 0, sizeof(*data));
	i = 0;

	while (vaddr < vaddr_end && i < ARRAY_SIZE(data->entries)) {
		hdr->end_entry = i;

		if (is_vmalloc_addr((void *)vaddr)) {
			pfn = vmalloc_to_pfn((void *)vaddr);
			use_large_entry = false;
		} else {
			pfn = __pa(vaddr) >> PAGE_SHIFT;
			use_large_entry = true;
		}

		e->gfn = pfn;
		e->operation = op;

		if (use_large_entry && IS_ALIGNED(vaddr, PMD_SIZE) &&
		    (vaddr_end - vaddr) >= PMD_SIZE) {
			e->pagesize = RMP_PG_SIZE_2M;
			vaddr += PMD_SIZE;
		} else {
			e->pagesize = RMP_PG_SIZE_4K;
			vaddr += PAGE_SIZE;
		}

		e++;
		i++;
	}

	/* Page validation must be rescinded before changing to shared */
	if (op == SNP_PAGE_STATE_SHARED)
		pvalidate_pages(data);

	local_irq_save(flags);

	if (sev_cfg.ghcbs_initialized)
		ghcb = __sev_get_ghcb(&state);
	else
		ghcb = boot_ghcb;

	/* Invoke the hypervisor to perform the page state changes */
	if (!ghcb || vmgexit_psc(ghcb, data))
		sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);

	if (sev_cfg.ghcbs_initialized)
		__sev_put_ghcb(&state);

	local_irq_restore(flags);

	/* Page validation must be performed after changing to private */
	if (op == SNP_PAGE_STATE_PRIVATE)
		pvalidate_pages(data);

	return vaddr;
}

static void set_pages_state(unsigned long vaddr, unsigned long npages, int op)
{
	struct snp_psc_desc desc;
	unsigned long vaddr_end;

	/* Use the MSR protocol when a GHCB is not available. */
	if (!boot_ghcb)
		return early_set_pages_state(vaddr, __pa(vaddr), npages, op);

	vaddr = vaddr & PAGE_MASK;
	vaddr_end = vaddr + (npages << PAGE_SHIFT);

	while (vaddr < vaddr_end)
		vaddr = __set_pages_state(&desc, vaddr, vaddr_end, op);
}

void snp_set_memory_shared(unsigned long vaddr, unsigned long npages)
{
	if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
		return;

	set_pages_state(vaddr, npages, SNP_PAGE_STATE_SHARED);
}

void snp_set_memory_private(unsigned long vaddr, unsigned long npages)
{
	if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
		return;

	set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
}

void snp_accept_memory(phys_addr_t start, phys_addr_t end)
{
	unsigned long vaddr, npages;

	if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
		return;

	vaddr = (unsigned long)__va(start);
	npages = (end - start) >> PAGE_SHIFT;

	set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
}

static int vmgexit_ap_control(u64 event, struct sev_es_save_area *vmsa, u32 apic_id)
{
	bool create = event != SVM_VMGEXIT_AP_DESTROY;
	struct ghcb_state state;
	unsigned long flags;
	struct ghcb *ghcb;
	int ret = 0;

	local_irq_save(flags);

	ghcb = __sev_get_ghcb(&state);

	vc_ghcb_invalidate(ghcb);

	if (create)
		ghcb_set_rax(ghcb, vmsa->sev_features);

	ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION);
	ghcb_set_sw_exit_info_1(ghcb,
				((u64)apic_id << 32)	|
				((u64)snp_vmpl << 16)	|
				event);
	ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa));

	sev_es_wr_ghcb_msr(__pa(ghcb));
	VMGEXIT();

	if (!ghcb_sw_exit_info_1_is_valid(ghcb) ||
	    lower_32_bits(ghcb->save.sw_exit_info_1)) {
		pr_err("SNP AP %s error\n", (create ? "CREATE" : "DESTROY"));
		ret = -EINVAL;
	}

	__sev_put_ghcb(&state);

	local_irq_restore(flags);

	return ret;
}

static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa)
{
	int ret;

	if (snp_vmpl) {
		struct svsm_call call = {};
		unsigned long flags;

		local_irq_save(flags);

		call.caa = this_cpu_read(svsm_caa);
		call.rcx = __pa(va);

		if (make_vmsa) {
			/* Protocol 0, Call ID 2 */
			call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU);
			call.rdx = __pa(caa);
			call.r8  = apic_id;
		} else {
			/* Protocol 0, Call ID 3 */
			call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU);
		}

		ret = svsm_perform_call_protocol(&call);

		local_irq_restore(flags);
	} else {
		/*
		 * If the kernel runs at VMPL0, it can change the VMSA
		 * bit for a page using the RMPADJUST instruction.
		 * However, for the instruction to succeed it must
		 * target the permissions of a lesser privileged (higher
		 * numbered) VMPL level, so use VMPL1.
		 */
		u64 attrs = 1;

		if (make_vmsa)
			attrs |= RMPADJUST_VMSA_PAGE_BIT;

		ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs);
	}

	return ret;
}

static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id)
{
	int err;

	err = snp_set_vmsa(vmsa, NULL, apic_id, false);
	if (err)
		pr_err("clear VMSA page failed (%u), leaking page\n", err);
	else
		free_page((unsigned long)vmsa);
}

static void set_pte_enc(pte_t *kpte, int level, void *va)
{
	struct pte_enc_desc d = {
		.kpte	   = kpte,
		.pte_level = level,
		.va	   = va,
		.encrypt   = true
	};

	prepare_pte_enc(&d);
	set_pte_enc_mask(kpte, d.pfn, d.new_pgprot);
}

static void unshare_all_memory(void)
{
	unsigned long addr, end, size, ghcb;
	struct sev_es_runtime_data *data;
	unsigned int npages, level;
	bool skipped_addr;
	pte_t *pte;
	int cpu;

	/* Unshare the direct mapping. */
	addr = PAGE_OFFSET;
	end  = PAGE_OFFSET + get_max_mapped();

	while (addr < end) {
		pte = lookup_address(addr, &level);
		size = page_level_size(level);
		npages = size / PAGE_SIZE;
		skipped_addr = false;

		if (!pte || !pte_decrypted(*pte) || pte_none(*pte)) {
			addr += size;
			continue;
		}

		/*
		 * Ensure that all the per-CPU GHCBs are made private at the
		 * end of the unsharing loop so that the switch to the slower
		 * MSR protocol happens last.
		 */
		for_each_possible_cpu(cpu) {
			data = per_cpu(runtime_data, cpu);
			ghcb = (unsigned long)&data->ghcb_page;

			/* Handle the case of a huge page containing the GHCB page */
			if (addr <= ghcb && ghcb < addr + size) {
				skipped_addr = true;
				break;
			}
		}

		if (!skipped_addr) {
			set_pte_enc(pte, level, (void *)addr);
			snp_set_memory_private(addr, npages);
		}
		addr += size;
	}

	/* Unshare all bss decrypted memory. */
	addr = (unsigned long)__start_bss_decrypted;
	end  = (unsigned long)__start_bss_decrypted_unused;
	npages = (end - addr) >> PAGE_SHIFT;

	for (; addr < end; addr += PAGE_SIZE) {
		pte = lookup_address(addr, &level);
		if (!pte || !pte_decrypted(*pte) || pte_none(*pte))
			continue;

		set_pte_enc(pte, level, (void *)addr);
	}
	addr = (unsigned long)__start_bss_decrypted;
	snp_set_memory_private(addr, npages);

	__flush_tlb_all();
}

/* Stop new private<->shared conversions */
void snp_kexec_begin(void)
{
	if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
		return;

	if (!IS_ENABLED(CONFIG_KEXEC_CORE))
		return;

	/*
	 * Crash kernel ends up here with interrupts disabled: can't wait for
	 * conversions to finish.
	 *
	 * If race happened, just report and proceed.
	 */
	if (!set_memory_enc_stop_conversion())
		pr_warn("Failed to stop shared<->private conversions\n");
}

/*
 * Shutdown all APs except the one handling kexec/kdump and clearing
 * the VMSA tag on AP's VMSA pages as they are not being used as
 * VMSA page anymore.
 */
static void shutdown_all_aps(void)
{
	struct sev_es_save_area *vmsa;
	int apic_id, this_cpu, cpu;

	this_cpu = get_cpu();

	/*
	 * APs are already in HLT loop when enc_kexec_finish() callback
	 * is invoked.
	 */
	for_each_present_cpu(cpu) {
		vmsa = per_cpu(sev_vmsa, cpu);

		/*
		 * The BSP or offlined APs do not have guest allocated VMSA
		 * and there is no need  to clear the VMSA tag for this page.
		 */
		if (!vmsa)
			continue;

		/*
		 * Cannot clear the VMSA tag for the currently running vCPU.
		 */
		if (this_cpu == cpu) {
			unsigned long pa;
			struct page *p;

			pa = __pa(vmsa);
			/*
			 * Mark the VMSA page of the running vCPU as offline
			 * so that is excluded and not touched by makedumpfile
			 * while generating vmcore during kdump.
			 */
			p = pfn_to_online_page(pa >> PAGE_SHIFT);
			if (p)
				__SetPageOffline(p);
			continue;
		}

		apic_id = cpuid_to_apicid[cpu];

		/*
		 * Issue AP destroy to ensure AP gets kicked out of guest mode
		 * to allow using RMPADJUST to remove the VMSA tag on it's
		 * VMSA page.
		 */
		vmgexit_ap_control(SVM_VMGEXIT_AP_DESTROY, vmsa, apic_id);
		snp_cleanup_vmsa(vmsa, apic_id);
	}

	put_cpu();
}

void snp_kexec_finish(void)
{
	struct sev_es_runtime_data *data;
	unsigned long size, addr;
	unsigned int level, cpu;
	struct ghcb *ghcb;
	pte_t *pte;

	if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
		return;

	if (!IS_ENABLED(CONFIG_KEXEC_CORE))
		return;

	shutdown_all_aps();

	unshare_all_memory();

	/*
	 * Switch to using the MSR protocol to change per-CPU GHCBs to
	 * private. All the per-CPU GHCBs have been switched back to private,
	 * so can't do any more GHCB calls to the hypervisor beyond this point
	 * until the kexec'ed kernel starts running.
	 */
	boot_ghcb = NULL;
	sev_cfg.ghcbs_initialized = false;

	for_each_possible_cpu(cpu) {
		data = per_cpu(runtime_data, cpu);
		ghcb = &data->ghcb_page;
		pte = lookup_address((unsigned long)ghcb, &level);
		size = page_level_size(level);
		/* Handle the case of a huge page containing the GHCB page */
		addr = (unsigned long)ghcb & page_level_mask(level);
		set_pte_enc(pte, level, (void *)addr);
		snp_set_memory_private(addr, (size / PAGE_SIZE));
	}
}

#define __ATTR_BASE		(SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK)
#define INIT_CS_ATTRIBS		(__ATTR_BASE | SVM_SELECTOR_READ_MASK | SVM_SELECTOR_CODE_MASK)
#define INIT_DS_ATTRIBS		(__ATTR_BASE | SVM_SELECTOR_WRITE_MASK)

#define INIT_LDTR_ATTRIBS	(SVM_SELECTOR_P_MASK | 2)
#define INIT_TR_ATTRIBS		(SVM_SELECTOR_P_MASK | 3)

static void *snp_alloc_vmsa_page(int cpu)
{
	struct page *p;

	/*
	 * Allocate VMSA page to work around the SNP erratum where the CPU will
	 * incorrectly signal an RMP violation #PF if a large page (2MB or 1GB)
	 * collides with the RMP entry of VMSA page. The recommended workaround
	 * is to not use a large page.
	 *
	 * Allocate an 8k page which is also 8k-aligned.
	 */
	p = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1);
	if (!p)
		return NULL;

	split_page(p, 1);

	/* Free the first 4k. This page may be 2M/1G aligned and cannot be used. */
	__free_page(p);

	return page_address(p + 1);
}

static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip, unsigned int cpu)
{
	struct sev_es_save_area *cur_vmsa, *vmsa;
	struct svsm_ca *caa;
	u8 sipi_vector;
	int ret;
	u64 cr4;

	/*
	 * The hypervisor SNP feature support check has happened earlier, just check
	 * the AP_CREATION one here.
	 */
	if (!(sev_hv_features & GHCB_HV_FT_SNP_AP_CREATION))
		return -EOPNOTSUPP;

	/*
	 * Verify the desired start IP against the known trampoline start IP
	 * to catch any future new trampolines that may be introduced that
	 * would require a new protected guest entry point.
	 */
	if (WARN_ONCE(start_ip != real_mode_header->trampoline_start,
		      "Unsupported SNP start_ip: %lx\n", start_ip))
		return -EINVAL;

	/* Override start_ip with known protected guest start IP */
	start_ip = real_mode_header->sev_es_trampoline_start;
	cur_vmsa = per_cpu(sev_vmsa, cpu);

	/*
	 * A new VMSA is created each time because there is no guarantee that
	 * the current VMSA is the kernels or that the vCPU is not running. If
	 * an attempt was done to use the current VMSA with a running vCPU, a
	 * #VMEXIT of that vCPU would wipe out all of the settings being done
	 * here.
	 */
	vmsa = (struct sev_es_save_area *)snp_alloc_vmsa_page(cpu);
	if (!vmsa)
		return -ENOMEM;

	/* If an SVSM is present, the SVSM per-CPU CAA will be !NULL */
	caa = per_cpu(svsm_caa, cpu);

	/* CR4 should maintain the MCE value */
	cr4 = native_read_cr4() & X86_CR4_MCE;

	/* Set the CS value based on the start_ip converted to a SIPI vector */
	sipi_vector		= (start_ip >> 12);
	vmsa->cs.base		= sipi_vector << 12;
	vmsa->cs.limit		= AP_INIT_CS_LIMIT;
	vmsa->cs.attrib		= INIT_CS_ATTRIBS;
	vmsa->cs.selector	= sipi_vector << 8;

	/* Set the RIP value based on start_ip */
	vmsa->rip		= start_ip & 0xfff;

	/* Set AP INIT defaults as documented in the APM */
	vmsa->ds.limit		= AP_INIT_DS_LIMIT;
	vmsa->ds.attrib		= INIT_DS_ATTRIBS;
	vmsa->es		= vmsa->ds;
	vmsa->fs		= vmsa->ds;
	vmsa->gs		= vmsa->ds;
	vmsa->ss		= vmsa->ds;

	vmsa->gdtr.limit	= AP_INIT_GDTR_LIMIT;
	vmsa->ldtr.limit	= AP_INIT_LDTR_LIMIT;
	vmsa->ldtr.attrib	= INIT_LDTR_ATTRIBS;
	vmsa->idtr.limit	= AP_INIT_IDTR_LIMIT;
	vmsa->tr.limit		= AP_INIT_TR_LIMIT;
	vmsa->tr.attrib		= INIT_TR_ATTRIBS;

	vmsa->cr4		= cr4;
	vmsa->cr0		= AP_INIT_CR0_DEFAULT;
	vmsa->dr7		= DR7_RESET_VALUE;
	vmsa->dr6		= AP_INIT_DR6_DEFAULT;
	vmsa->rflags		= AP_INIT_RFLAGS_DEFAULT;
	vmsa->g_pat		= AP_INIT_GPAT_DEFAULT;
	vmsa->xcr0		= AP_INIT_XCR0_DEFAULT;
	vmsa->mxcsr		= AP_INIT_MXCSR_DEFAULT;
	vmsa->x87_ftw		= AP_INIT_X87_FTW_DEFAULT;
	vmsa->x87_fcw		= AP_INIT_X87_FCW_DEFAULT;

	/* SVME must be set. */
	vmsa->efer		= EFER_SVME;

	/*
	 * Set the SNP-specific fields for this VMSA:
	 *   VMPL level
	 *   SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits)
	 */
	vmsa->vmpl		= snp_vmpl;
	vmsa->sev_features	= sev_status >> 2;

	/* Populate AP's TSC scale/offset to get accurate TSC values. */
	if (cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) {
		vmsa->tsc_scale = snp_tsc_scale;
		vmsa->tsc_offset = snp_tsc_offset;
	}

	/* Switch the page over to a VMSA page now that it is initialized */
	ret = snp_set_vmsa(vmsa, caa, apic_id, true);
	if (ret) {
		pr_err("set VMSA page failed (%u)\n", ret);
		free_page((unsigned long)vmsa);

		return -EINVAL;
	}

	/* Issue VMGEXIT AP Creation NAE event */
	ret = vmgexit_ap_control(SVM_VMGEXIT_AP_CREATE, vmsa, apic_id);
	if (ret) {
		snp_cleanup_vmsa(vmsa, apic_id);
		vmsa = NULL;
	}

	/* Free up any previous VMSA page */
	if (cur_vmsa)
		snp_cleanup_vmsa(cur_vmsa, apic_id);

	/* Record the current VMSA page */
	per_cpu(sev_vmsa, cpu) = vmsa;

	return ret;
}

void __init snp_set_wakeup_secondary_cpu(void)
{
	if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
		return;

	/*
	 * Always set this override if SNP is enabled. This makes it the
	 * required method to start APs under SNP. If the hypervisor does
	 * not support AP creation, then no APs will be started.
	 */
	apic_update_callback(wakeup_secondary_cpu, wakeup_cpu_via_vmgexit);
}

int __init sev_es_setup_ap_jump_table(struct real_mode_header *rmh)
{
	u16 startup_cs, startup_ip;
	phys_addr_t jump_table_pa;
	u64 jump_table_addr;
	u16 __iomem *jump_table;

	jump_table_addr = get_jump_table_addr();

	/* On UP guests there is no jump table so this is not a failure */
	if (!jump_table_addr)
		return 0;

	/* Check if AP Jump Table is page-aligned */
	if (jump_table_addr & ~PAGE_MASK)
		return -EINVAL;

	jump_table_pa = jump_table_addr & PAGE_MASK;

	startup_cs = (u16)(rmh->trampoline_start >> 4);
	startup_ip = (u16)(rmh->sev_es_trampoline_start -
			   rmh->trampoline_start);

	jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE);
	if (!jump_table)
		return -EIO;

	writew(startup_ip, &jump_table[0]);
	writew(startup_cs, &jump_table[1]);

	iounmap(jump_table);

	return 0;
}

/*
 * This is needed by the OVMF UEFI firmware which will use whatever it finds in
 * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu
 * runtime GHCBs used by the kernel are also mapped in the EFI page-table.
 *
 * When running under SVSM the CA page is needed too, so map it as well.
 */
int __init sev_es_efi_map_ghcbs_cas(pgd_t *pgd)
{
	unsigned long address, pflags, pflags_enc;
	struct sev_es_runtime_data *data;
	int cpu;
	u64 pfn;

	if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
		return 0;

	pflags = _PAGE_NX | _PAGE_RW;
	pflags_enc = cc_mkenc(pflags);

	for_each_possible_cpu(cpu) {
		data = per_cpu(runtime_data, cpu);

		address = __pa(&data->ghcb_page);
		pfn = address >> PAGE_SHIFT;

		if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags))
			return 1;

		if (snp_vmpl) {
			address = per_cpu(svsm_caa_pa, cpu);
			if (!address)
				return 1;

			pfn = address >> PAGE_SHIFT;
			if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags_enc))
				return 1;
		}
	}

	return 0;
}

static void snp_register_per_cpu_ghcb(void)
{
	struct sev_es_runtime_data *data;
	struct ghcb *ghcb;

	data = this_cpu_read(runtime_data);
	ghcb = &data->ghcb_page;

	snp_register_ghcb_early(__pa(ghcb));
}

void setup_ghcb(void)
{
	if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
		return;

	/*
	 * Check whether the runtime #VC exception handler is active. It uses
	 * the per-CPU GHCB page which is set up by sev_es_init_vc_handling().
	 *
	 * If SNP is active, register the per-CPU GHCB page so that the runtime
	 * exception handler can use it.
	 */
	if (initial_vc_handler == (unsigned long)kernel_exc_vmm_communication) {
		if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
			snp_register_per_cpu_ghcb();

		sev_cfg.ghcbs_initialized = true;

		return;
	}

	/*
	 * Make sure the hypervisor talks a supported protocol.
	 * This gets called only in the BSP boot phase.
	 */
	if (!sev_es_negotiate_protocol())
		sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);

	/*
	 * Clear the boot_ghcb. The first exception comes in before the bss
	 * section is cleared.
	 */
	memset(&boot_ghcb_page, 0, PAGE_SIZE);

	/* Alright - Make the boot-ghcb public */
	boot_ghcb = &boot_ghcb_page;

	/* SNP guest requires that GHCB GPA must be registered. */
	if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
		snp_register_ghcb_early(__pa(&boot_ghcb_page));
}

#ifdef CONFIG_HOTPLUG_CPU
static void sev_es_ap_hlt_loop(void)
{
	struct ghcb_state state;
	struct ghcb *ghcb;

	ghcb = __sev_get_ghcb(&state);

	while (true) {
		vc_ghcb_invalidate(ghcb);
		ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP);
		ghcb_set_sw_exit_info_1(ghcb, 0);
		ghcb_set_sw_exit_info_2(ghcb, 0);

		sev_es_wr_ghcb_msr(__pa(ghcb));
		VMGEXIT();

		/* Wakeup signal? */
		if (ghcb_sw_exit_info_2_is_valid(ghcb) &&
		    ghcb->save.sw_exit_info_2)
			break;
	}

	__sev_put_ghcb(&state);
}

/*
 * Play_dead handler when running under SEV-ES. This is needed because
 * the hypervisor can't deliver an SIPI request to restart the AP.
 * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the
 * hypervisor wakes it up again.
 */
static void sev_es_play_dead(void)
{
	play_dead_common();

	/* IRQs now disabled */

	sev_es_ap_hlt_loop();

	/*
	 * If we get here, the VCPU was woken up again. Jump to CPU
	 * startup code to get it back online.
	 */
	soft_restart_cpu();
}
#else  /* CONFIG_HOTPLUG_CPU */
#define sev_es_play_dead	native_play_dead
#endif /* CONFIG_HOTPLUG_CPU */

#ifdef CONFIG_SMP
static void __init sev_es_setup_play_dead(void)
{
	smp_ops.play_dead = sev_es_play_dead;
}
#else
static inline void sev_es_setup_play_dead(void) { }
#endif

static void __init alloc_runtime_data(int cpu)
{
	struct sev_es_runtime_data *data;

	data = memblock_alloc_node(sizeof(*data), PAGE_SIZE, cpu_to_node(cpu));
	if (!data)
		panic("Can't allocate SEV-ES runtime data");

	per_cpu(runtime_data, cpu) = data;

	if (snp_vmpl) {
		struct svsm_ca *caa;

		/* Allocate the SVSM CA page if an SVSM is present */
		caa = memblock_alloc_or_panic(sizeof(*caa), PAGE_SIZE);

		per_cpu(svsm_caa, cpu) = caa;
		per_cpu(svsm_caa_pa, cpu) = __pa(caa);
	}
}

static void __init init_ghcb(int cpu)
{
	struct sev_es_runtime_data *data;
	int err;

	data = per_cpu(runtime_data, cpu);

	err = early_set_memory_decrypted((unsigned long)&data->ghcb_page,
					 sizeof(data->ghcb_page));
	if (err)
		panic("Can't map GHCBs unencrypted");

	memset(&data->ghcb_page, 0, sizeof(data->ghcb_page));

	data->ghcb_active = false;
	data->backup_ghcb_active = false;
}

void __init sev_es_init_vc_handling(void)
{
	int cpu;

	BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE);

	if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
		return;

	if (!sev_es_check_cpu_features())
		panic("SEV-ES CPU Features missing");

	/*
	 * SNP is supported in v2 of the GHCB spec which mandates support for HV
	 * features.
	 */
	if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) {
		sev_hv_features = get_hv_features();

		if (!(sev_hv_features & GHCB_HV_FT_SNP))
			sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
	}

	/* Initialize per-cpu GHCB pages */
	for_each_possible_cpu(cpu) {
		alloc_runtime_data(cpu);
		init_ghcb(cpu);
	}

	/* If running under an SVSM, switch to the per-cpu CA */
	if (snp_vmpl) {
		struct svsm_call call = {};
		unsigned long flags;
		int ret;

		local_irq_save(flags);

		/*
		 * SVSM_CORE_REMAP_CA call:
		 *   RAX = 0 (Protocol=0, CallID=0)
		 *   RCX = New CA GPA
		 */
		call.caa = svsm_get_caa();
		call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA);
		call.rcx = this_cpu_read(svsm_caa_pa);
		ret = svsm_perform_call_protocol(&call);
		if (ret)
			panic("Can't remap the SVSM CA, ret=%d, rax_out=0x%llx\n",
			      ret, call.rax_out);

		sev_cfg.use_cas = true;

		local_irq_restore(flags);
	}

	sev_es_setup_play_dead();

	/* Secondary CPUs use the runtime #VC handler */
	initial_vc_handler = (unsigned long)kernel_exc_vmm_communication;
}

/*
 * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are
 * enabled, as the alternative (fallback) logic for DMI probing in the legacy
 * ROM region can cause a crash since this region is not pre-validated.
 */
void __init snp_dmi_setup(void)
{
	if (efi_enabled(EFI_CONFIG_TABLES))
		dmi_setup();
}

static void dump_cpuid_table(void)
{
	const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
	int i = 0;

	pr_info("count=%d reserved=0x%x reserved2=0x%llx\n",
		cpuid_table->count, cpuid_table->__reserved1, cpuid_table->__reserved2);

	for (i = 0; i < SNP_CPUID_COUNT_MAX; i++) {
		const struct snp_cpuid_fn *fn = &cpuid_table->fn[i];

		pr_info("index=%3d fn=0x%08x subfn=0x%08x: eax=0x%08x ebx=0x%08x ecx=0x%08x edx=0x%08x xcr0_in=0x%016llx xss_in=0x%016llx reserved=0x%016llx\n",
			i, fn->eax_in, fn->ecx_in, fn->eax, fn->ebx, fn->ecx,
			fn->edx, fn->xcr0_in, fn->xss_in, fn->__reserved);
	}
}

/*
 * It is useful from an auditing/testing perspective to provide an easy way
 * for the guest owner to know that the CPUID table has been initialized as
 * expected, but that initialization happens too early in boot to print any
 * sort of indicator, and there's not really any other good place to do it,
 * so do it here.
 *
 * If running as an SNP guest, report the current VM privilege level (VMPL).
 */
static int __init report_snp_info(void)
{
	const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();

	if (cpuid_table->count) {
		pr_info("Using SNP CPUID table, %d entries present.\n",
			cpuid_table->count);

		if (sev_cfg.debug)
			dump_cpuid_table();
	}

	if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
		pr_info("SNP running at VMPL%u.\n", snp_vmpl);

	return 0;
}
arch_initcall(report_snp_info);

static void update_attest_input(struct svsm_call *call, struct svsm_attest_call *input)
{
	/* If (new) lengths have been returned, propagate them up */
	if (call->rcx_out != call->rcx)
		input->manifest_buf.len = call->rcx_out;

	if (call->rdx_out != call->rdx)
		input->certificates_buf.len = call->rdx_out;

	if (call->r8_out != call->r8)
		input->report_buf.len = call->r8_out;
}

int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call,
			      struct svsm_attest_call *input)
{
	struct svsm_attest_call *ac;
	unsigned long flags;
	u64 attest_call_pa;
	int ret;

	if (!snp_vmpl)
		return -EINVAL;

	local_irq_save(flags);

	call->caa = svsm_get_caa();

	ac = (struct svsm_attest_call *)call->caa->svsm_buffer;
	attest_call_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer);

	*ac = *input;

	/*
	 * Set input registers for the request and set RDX and R8 to known
	 * values in order to detect length values being returned in them.
	 */
	call->rax = call_id;
	call->rcx = attest_call_pa;
	call->rdx = -1;
	call->r8 = -1;
	ret = svsm_perform_call_protocol(call);
	update_attest_input(call, input);

	local_irq_restore(flags);

	return ret;
}
EXPORT_SYMBOL_GPL(snp_issue_svsm_attest_req);

static int snp_issue_guest_request(struct snp_guest_req *req)
{
	struct snp_req_data *input = &req->input;
	struct ghcb_state state;
	struct es_em_ctxt ctxt;
	unsigned long flags;
	struct ghcb *ghcb;
	int ret;

	req->exitinfo2 = SEV_RET_NO_FW_CALL;

	/*
	 * __sev_get_ghcb() needs to run with IRQs disabled because it is using
	 * a per-CPU GHCB.
	 */
	local_irq_save(flags);

	ghcb = __sev_get_ghcb(&state);
	if (!ghcb) {
		ret = -EIO;
		goto e_restore_irq;
	}

	vc_ghcb_invalidate(ghcb);

	if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
		ghcb_set_rax(ghcb, input->data_gpa);
		ghcb_set_rbx(ghcb, input->data_npages);
	}

	ret = sev_es_ghcb_hv_call(ghcb, &ctxt, req->exit_code, input->req_gpa, input->resp_gpa);
	if (ret)
		goto e_put;

	req->exitinfo2 = ghcb->save.sw_exit_info_2;
	switch (req->exitinfo2) {
	case 0:
		break;

	case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_BUSY):
		ret = -EAGAIN;
		break;

	case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN):
		/* Number of expected pages are returned in RBX */
		if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
			input->data_npages = ghcb_get_rbx(ghcb);
			ret = -ENOSPC;
			break;
		}
		fallthrough;
	default:
		ret = -EIO;
		break;
	}

e_put:
	__sev_put_ghcb(&state);
e_restore_irq:
	local_irq_restore(flags);

	return ret;
}

/**
 * snp_svsm_vtpm_probe() - Probe if SVSM provides a vTPM device
 *
 * Check that there is SVSM and that it supports at least TPM_SEND_COMMAND
 * which is the only request used so far.
 *
 * Return: true if the platform provides a vTPM SVSM device, false otherwise.
 */
static bool snp_svsm_vtpm_probe(void)
{
	struct svsm_call call = {};

	/* The vTPM device is available only if a SVSM is present */
	if (!snp_vmpl)
		return false;

	call.caa = svsm_get_caa();
	call.rax = SVSM_VTPM_CALL(SVSM_VTPM_QUERY);

	if (svsm_perform_call_protocol(&call))
		return false;

	/* Check platform commands contains TPM_SEND_COMMAND - platform command 8 */
	return call.rcx_out & BIT_ULL(8);
}

/**
 * snp_svsm_vtpm_send_command() - Execute a vTPM operation on SVSM
 * @buffer: A buffer used to both send the command and receive the response.
 *
 * Execute a SVSM_VTPM_CMD call as defined by
 * "Secure VM Service Module for SEV-SNP Guests" Publication # 58019 Revision: 1.00
 *
 * All command request/response buffers have a common structure as specified by
 * the following table:
 *     Byte      Size       In/Out    Description
 *     Offset    (Bytes)
 *     0x000     4          In        Platform command
 *                          Out       Platform command response size
 *
 * Each command can build upon this common request/response structure to create
 * a structure specific to the command. See include/linux/tpm_svsm.h for more
 * details.
 *
 * Return: 0 on success, -errno on failure
 */
int snp_svsm_vtpm_send_command(u8 *buffer)
{
	struct svsm_call call = {};

	call.caa = svsm_get_caa();
	call.rax = SVSM_VTPM_CALL(SVSM_VTPM_CMD);
	call.rcx = __pa(buffer);

	return svsm_perform_call_protocol(&call);
}
EXPORT_SYMBOL_GPL(snp_svsm_vtpm_send_command);

static struct platform_device sev_guest_device = {
	.name		= "sev-guest",
	.id		= -1,
};

static struct platform_device tpm_svsm_device = {
	.name		= "tpm-svsm",
	.id		= -1,
};

static int __init snp_init_platform_device(void)
{
	if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
		return -ENODEV;

	if (platform_device_register(&sev_guest_device))
		return -ENODEV;

	if (snp_svsm_vtpm_probe() &&
	    platform_device_register(&tpm_svsm_device))
		return -ENODEV;

	pr_info("SNP guest platform devices initialized.\n");
	return 0;
}
device_initcall(snp_init_platform_device);

void sev_show_status(void)
{
	int i;

	pr_info("Status: ");
	for (i = 0; i < MSR_AMD64_SNP_RESV_BIT; i++) {
		if (sev_status & BIT_ULL(i)) {
			if (!sev_status_feat_names[i])
				continue;

			pr_cont("%s ", sev_status_feat_names[i]);
		}
	}
	pr_cont("\n");
}

void __init snp_update_svsm_ca(void)
{
	if (!snp_vmpl)
		return;

	/* Update the CAA to a proper kernel address */
	boot_svsm_caa = &boot_svsm_ca_page;
}

#ifdef CONFIG_SYSFS
static ssize_t vmpl_show(struct kobject *kobj,
			 struct kobj_attribute *attr, char *buf)
{
	return sysfs_emit(buf, "%d\n", snp_vmpl);
}

static struct kobj_attribute vmpl_attr = __ATTR_RO(vmpl);

static struct attribute *vmpl_attrs[] = {
	&vmpl_attr.attr,
	NULL
};

static struct attribute_group sev_attr_group = {
	.attrs = vmpl_attrs,
};

static int __init sev_sysfs_init(void)
{
	struct kobject *sev_kobj;
	struct device *dev_root;
	int ret;

	if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
		return -ENODEV;

	dev_root = bus_get_dev_root(&cpu_subsys);
	if (!dev_root)
		return -ENODEV;

	sev_kobj = kobject_create_and_add("sev", &dev_root->kobj);
	put_device(dev_root);

	if (!sev_kobj)
		return -ENOMEM;

	ret = sysfs_create_group(sev_kobj, &sev_attr_group);
	if (ret)
		kobject_put(sev_kobj);

	return ret;
}
arch_initcall(sev_sysfs_init);
#endif // CONFIG_SYSFS

static void free_shared_pages(void *buf, size_t sz)
{
	unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
	int ret;

	if (!buf)
		return;

	ret = set_memory_encrypted((unsigned long)buf, npages);
	if (ret) {
		WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n");
		return;
	}

	__free_pages(virt_to_page(buf), get_order(sz));
}

static void *alloc_shared_pages(size_t sz)
{
	unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
	struct page *page;
	int ret;

	page = alloc_pages(GFP_KERNEL_ACCOUNT, get_order(sz));
	if (!page)
		return NULL;

	ret = set_memory_decrypted((unsigned long)page_address(page), npages);
	if (ret) {
		pr_err("failed to mark page shared, ret=%d\n", ret);
		__free_pages(page, get_order(sz));
		return NULL;
	}

	return page_address(page);
}

static u8 *get_vmpck(int id, struct snp_secrets_page *secrets, u32 **seqno)
{
	u8 *key = NULL;

	switch (id) {
	case 0:
		*seqno = &secrets->os_area.msg_seqno_0;
		key = secrets->vmpck0;
		break;
	case 1:
		*seqno = &secrets->os_area.msg_seqno_1;
		key = secrets->vmpck1;
		break;
	case 2:
		*seqno = &secrets->os_area.msg_seqno_2;
		key = secrets->vmpck2;
		break;
	case 3:
		*seqno = &secrets->os_area.msg_seqno_3;
		key = secrets->vmpck3;
		break;
	default:
		break;
	}

	return key;
}

static struct aesgcm_ctx *snp_init_crypto(u8 *key, size_t keylen)
{
	struct aesgcm_ctx *ctx;

	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
	if (!ctx)
		return NULL;

	if (aesgcm_expandkey(ctx, key, keylen, AUTHTAG_LEN)) {
		pr_err("Crypto context initialization failed\n");
		kfree(ctx);
		return NULL;
	}

	return ctx;
}

int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id)
{
	/* Adjust the default VMPCK key based on the executing VMPL level */
	if (vmpck_id == -1)
		vmpck_id = snp_vmpl;

	mdesc->vmpck = get_vmpck(vmpck_id, mdesc->secrets, &mdesc->os_area_msg_seqno);
	if (!mdesc->vmpck) {
		pr_err("Invalid VMPCK%d communication key\n", vmpck_id);
		return -EINVAL;
	}

	/* Verify that VMPCK is not zero. */
	if (!memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) {
		pr_err("Empty VMPCK%d communication key\n", vmpck_id);
		return -EINVAL;
	}

	mdesc->vmpck_id = vmpck_id;

	mdesc->ctx = snp_init_crypto(mdesc->vmpck, VMPCK_KEY_LEN);
	if (!mdesc->ctx)
		return -ENOMEM;

	return 0;
}
EXPORT_SYMBOL_GPL(snp_msg_init);

struct snp_msg_desc *snp_msg_alloc(void)
{
	struct snp_msg_desc *mdesc;
	void __iomem *mem;

	BUILD_BUG_ON(sizeof(struct snp_guest_msg) > PAGE_SIZE);

	mdesc = kzalloc(sizeof(struct snp_msg_desc), GFP_KERNEL);
	if (!mdesc)
		return ERR_PTR(-ENOMEM);

	mem = ioremap_encrypted(sev_secrets_pa, PAGE_SIZE);
	if (!mem)
		goto e_free_mdesc;

	mdesc->secrets = (__force struct snp_secrets_page *)mem;

	/* Allocate the shared page used for the request and response message. */
	mdesc->request = alloc_shared_pages(sizeof(struct snp_guest_msg));
	if (!mdesc->request)
		goto e_unmap;

	mdesc->response = alloc_shared_pages(sizeof(struct snp_guest_msg));
	if (!mdesc->response)
		goto e_free_request;

	return mdesc;

e_free_request:
	free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg));
e_unmap:
	iounmap(mem);
e_free_mdesc:
	kfree(mdesc);

	return ERR_PTR(-ENOMEM);
}
EXPORT_SYMBOL_GPL(snp_msg_alloc);

void snp_msg_free(struct snp_msg_desc *mdesc)
{
	if (!mdesc)
		return;

	kfree(mdesc->ctx);
	free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg));
	free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg));
	iounmap((__force void __iomem *)mdesc->secrets);

	memset(mdesc, 0, sizeof(*mdesc));
	kfree(mdesc);
}
EXPORT_SYMBOL_GPL(snp_msg_free);

/* Mutex to serialize the shared buffer access and command handling. */
static DEFINE_MUTEX(snp_cmd_mutex);

/*
 * If an error is received from the host or AMD Secure Processor (ASP) there
 * are two options. Either retry the exact same encrypted request or discontinue
 * using the VMPCK.
 *
 * This is because in the current encryption scheme GHCB v2 uses AES-GCM to
 * encrypt the requests. The IV for this scheme is the sequence number. GCM
 * cannot tolerate IV reuse.
 *
 * The ASP FW v1.51 only increments the sequence numbers on a successful
 * guest<->ASP back and forth and only accepts messages at its exact sequence
 * number.
 *
 * So if the sequence number were to be reused the encryption scheme is
 * vulnerable. If the sequence number were incremented for a fresh IV the ASP
 * will reject the request.
 */
static void snp_disable_vmpck(struct snp_msg_desc *mdesc)
{
	pr_alert("Disabling VMPCK%d communication key to prevent IV reuse.\n",
		  mdesc->vmpck_id);
	memzero_explicit(mdesc->vmpck, VMPCK_KEY_LEN);
	mdesc->vmpck = NULL;
}

static inline u64 __snp_get_msg_seqno(struct snp_msg_desc *mdesc)
{
	u64 count;

	lockdep_assert_held(&snp_cmd_mutex);

	/* Read the current message sequence counter from secrets pages */
	count = *mdesc->os_area_msg_seqno;

	return count + 1;
}

/* Return a non-zero on success */
static u64 snp_get_msg_seqno(struct snp_msg_desc *mdesc)
{
	u64 count = __snp_get_msg_seqno(mdesc);

	/*
	 * The message sequence counter for the SNP guest request is a  64-bit
	 * value but the version 2 of GHCB specification defines a 32-bit storage
	 * for it. If the counter exceeds the 32-bit value then return zero.
	 * The caller should check the return value, but if the caller happens to
	 * not check the value and use it, then the firmware treats zero as an
	 * invalid number and will fail the  message request.
	 */
	if (count >= UINT_MAX) {
		pr_err("request message sequence counter overflow\n");
		return 0;
	}

	return count;
}

static void snp_inc_msg_seqno(struct snp_msg_desc *mdesc)
{
	/*
	 * The counter is also incremented by the PSP, so increment it by 2
	 * and save in secrets page.
	 */
	*mdesc->os_area_msg_seqno += 2;
}

static int verify_and_dec_payload(struct snp_msg_desc *mdesc, struct snp_guest_req *req)
{
	struct snp_guest_msg *resp_msg = &mdesc->secret_response;
	struct snp_guest_msg *req_msg = &mdesc->secret_request;
	struct snp_guest_msg_hdr *req_msg_hdr = &req_msg->hdr;
	struct snp_guest_msg_hdr *resp_msg_hdr = &resp_msg->hdr;
	struct aesgcm_ctx *ctx = mdesc->ctx;
	u8 iv[GCM_AES_IV_SIZE] = {};

	pr_debug("response [seqno %lld type %d version %d sz %d]\n",
		 resp_msg_hdr->msg_seqno, resp_msg_hdr->msg_type, resp_msg_hdr->msg_version,
		 resp_msg_hdr->msg_sz);

	/* Copy response from shared memory to encrypted memory. */
	memcpy(resp_msg, mdesc->response, sizeof(*resp_msg));

	/* Verify that the sequence counter is incremented by 1 */
	if (unlikely(resp_msg_hdr->msg_seqno != (req_msg_hdr->msg_seqno + 1)))
		return -EBADMSG;

	/* Verify response message type and version number. */
	if (resp_msg_hdr->msg_type != (req_msg_hdr->msg_type + 1) ||
	    resp_msg_hdr->msg_version != req_msg_hdr->msg_version)
		return -EBADMSG;

	/*
	 * If the message size is greater than our buffer length then return
	 * an error.
	 */
	if (unlikely((resp_msg_hdr->msg_sz + ctx->authsize) > req->resp_sz))
		return -EBADMSG;

	/* Decrypt the payload */
	memcpy(iv, &resp_msg_hdr->msg_seqno, min(sizeof(iv), sizeof(resp_msg_hdr->msg_seqno)));
	if (!aesgcm_decrypt(ctx, req->resp_buf, resp_msg->payload, resp_msg_hdr->msg_sz,
			    &resp_msg_hdr->algo, AAD_LEN, iv, resp_msg_hdr->authtag))
		return -EBADMSG;

	return 0;
}

static int enc_payload(struct snp_msg_desc *mdesc, u64 seqno, struct snp_guest_req *req)
{
	struct snp_guest_msg *msg = &mdesc->secret_request;
	struct snp_guest_msg_hdr *hdr = &msg->hdr;
	struct aesgcm_ctx *ctx = mdesc->ctx;
	u8 iv[GCM_AES_IV_SIZE] = {};

	memset(msg, 0, sizeof(*msg));

	hdr->algo = SNP_AEAD_AES_256_GCM;
	hdr->hdr_version = MSG_HDR_VER;
	hdr->hdr_sz = sizeof(*hdr);
	hdr->msg_type = req->msg_type;
	hdr->msg_version = req->msg_version;
	hdr->msg_seqno = seqno;
	hdr->msg_vmpck = req->vmpck_id;
	hdr->msg_sz = req->req_sz;

	/* Verify the sequence number is non-zero */
	if (!hdr->msg_seqno)
		return -ENOSR;

	pr_debug("request [seqno %lld type %d version %d sz %d]\n",
		 hdr->msg_seqno, hdr->msg_type, hdr->msg_version, hdr->msg_sz);

	if (WARN_ON((req->req_sz + ctx->authsize) > sizeof(msg->payload)))
		return -EBADMSG;

	memcpy(iv, &hdr->msg_seqno, min(sizeof(iv), sizeof(hdr->msg_seqno)));
	aesgcm_encrypt(ctx, msg->payload, req->req_buf, req->req_sz, &hdr->algo,
		       AAD_LEN, iv, hdr->authtag);

	return 0;
}

static int __handle_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req)
{
	unsigned long req_start = jiffies;
	unsigned int override_npages = 0;
	u64 override_err = 0;
	int rc;

retry_request:
	/*
	 * Call firmware to process the request. In this function the encrypted
	 * message enters shared memory with the host. So after this call the
	 * sequence number must be incremented or the VMPCK must be deleted to
	 * prevent reuse of the IV.
	 */
	rc = snp_issue_guest_request(req);
	switch (rc) {
	case -ENOSPC:
		/*
		 * If the extended guest request fails due to having too
		 * small of a certificate data buffer, retry the same
		 * guest request without the extended data request in
		 * order to increment the sequence number and thus avoid
		 * IV reuse.
		 */
		override_npages = req->input.data_npages;
		req->exit_code	= SVM_VMGEXIT_GUEST_REQUEST;

		/*
		 * Override the error to inform callers the given extended
		 * request buffer size was too small and give the caller the
		 * required buffer size.
		 */
		override_err = SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN);

		/*
		 * If this call to the firmware succeeds, the sequence number can
		 * be incremented allowing for continued use of the VMPCK. If
		 * there is an error reflected in the return value, this value
		 * is checked further down and the result will be the deletion
		 * of the VMPCK and the error code being propagated back to the
		 * user as an ioctl() return code.
		 */
		goto retry_request;

	/*
	 * The host may return SNP_GUEST_VMM_ERR_BUSY if the request has been
	 * throttled. Retry in the driver to avoid returning and reusing the
	 * message sequence number on a different message.
	 */
	case -EAGAIN:
		if (jiffies - req_start > SNP_REQ_MAX_RETRY_DURATION) {
			rc = -ETIMEDOUT;
			break;
		}
		schedule_timeout_killable(SNP_REQ_RETRY_DELAY);
		goto retry_request;
	}

	/*
	 * Increment the message sequence number. There is no harm in doing
	 * this now because decryption uses the value stored in the response
	 * structure and any failure will wipe the VMPCK, preventing further
	 * use anyway.
	 */
	snp_inc_msg_seqno(mdesc);

	if (override_err) {
		req->exitinfo2 = override_err;

		/*
		 * If an extended guest request was issued and the supplied certificate
		 * buffer was not large enough, a standard guest request was issued to
		 * prevent IV reuse. If the standard request was successful, return -EIO
		 * back to the caller as would have originally been returned.
		 */
		if (!rc && override_err == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN))
			rc = -EIO;
	}

	if (override_npages)
		req->input.data_npages = override_npages;

	return rc;
}

int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req)
{
	u64 seqno;
	int rc;

	/*
	 * enc_payload() calls aesgcm_encrypt(), which can potentially offload to HW.
	 * The offload's DMA SG list of data to encrypt has to be in linear mapping.
	 */
	if (!virt_addr_valid(req->req_buf) || !virt_addr_valid(req->resp_buf)) {
		pr_warn("AES-GSM buffers must be in linear mapping");
		return -EINVAL;
	}

	guard(mutex)(&snp_cmd_mutex);

	/* Check if the VMPCK is not empty */
	if (!mdesc->vmpck || !memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) {
		pr_err_ratelimited("VMPCK is disabled\n");
		return -ENOTTY;
	}

	/* Get message sequence and verify that its a non-zero */
	seqno = snp_get_msg_seqno(mdesc);
	if (!seqno)
		return -EIO;

	/* Clear shared memory's response for the host to populate. */
	memset(mdesc->response, 0, sizeof(struct snp_guest_msg));

	/* Encrypt the userspace provided payload in mdesc->secret_request. */
	rc = enc_payload(mdesc, seqno, req);
	if (rc)
		return rc;

	/*
	 * Write the fully encrypted request to the shared unencrypted
	 * request page.
	 */
	memcpy(mdesc->request, &mdesc->secret_request, sizeof(mdesc->secret_request));

	/* Initialize the input address for guest request */
	req->input.req_gpa = __pa(mdesc->request);
	req->input.resp_gpa = __pa(mdesc->response);
	req->input.data_gpa = req->certs_data ? __pa(req->certs_data) : 0;

	rc = __handle_guest_request(mdesc, req);
	if (rc) {
		if (rc == -EIO &&
		    req->exitinfo2 == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN))
			return rc;

		pr_alert("Detected error from ASP request. rc: %d, exitinfo2: 0x%llx\n",
			 rc, req->exitinfo2);

		snp_disable_vmpck(mdesc);
		return rc;
	}

	rc = verify_and_dec_payload(mdesc, req);
	if (rc) {
		pr_alert("Detected unexpected decode failure from ASP. rc: %d\n", rc);
		snp_disable_vmpck(mdesc);
		return rc;
	}

	return 0;
}
EXPORT_SYMBOL_GPL(snp_send_guest_request);

static int __init snp_get_tsc_info(void)
{
	struct snp_tsc_info_resp *tsc_resp;
	struct snp_tsc_info_req *tsc_req;
	struct snp_msg_desc *mdesc;
	struct snp_guest_req req = {};
	int rc = -ENOMEM;

	tsc_req = kzalloc(sizeof(*tsc_req), GFP_KERNEL);
	if (!tsc_req)
		return rc;

	/*
	 * The intermediate response buffer is used while decrypting the
	 * response payload. Make sure that it has enough space to cover
	 * the authtag.
	 */
	tsc_resp = kzalloc(sizeof(*tsc_resp) + AUTHTAG_LEN, GFP_KERNEL);
	if (!tsc_resp)
		goto e_free_tsc_req;

	mdesc = snp_msg_alloc();
	if (IS_ERR_OR_NULL(mdesc))
		goto e_free_tsc_resp;

	rc = snp_msg_init(mdesc, snp_vmpl);
	if (rc)
		goto e_free_mdesc;

	req.msg_version = MSG_HDR_VER;
	req.msg_type = SNP_MSG_TSC_INFO_REQ;
	req.vmpck_id = snp_vmpl;
	req.req_buf = tsc_req;
	req.req_sz = sizeof(*tsc_req);
	req.resp_buf = (void *)tsc_resp;
	req.resp_sz = sizeof(*tsc_resp) + AUTHTAG_LEN;
	req.exit_code = SVM_VMGEXIT_GUEST_REQUEST;

	rc = snp_send_guest_request(mdesc, &req);
	if (rc)
		goto e_request;

	pr_debug("%s: response status 0x%x scale 0x%llx offset 0x%llx factor 0x%x\n",
		 __func__, tsc_resp->status, tsc_resp->tsc_scale, tsc_resp->tsc_offset,
		 tsc_resp->tsc_factor);

	if (!tsc_resp->status) {
		snp_tsc_scale = tsc_resp->tsc_scale;
		snp_tsc_offset = tsc_resp->tsc_offset;
	} else {
		pr_err("Failed to get TSC info, response status 0x%x\n", tsc_resp->status);
		rc = -EIO;
	}

e_request:
	/* The response buffer contains sensitive data, explicitly clear it. */
	memzero_explicit(tsc_resp, sizeof(*tsc_resp) + AUTHTAG_LEN);
e_free_mdesc:
	snp_msg_free(mdesc);
e_free_tsc_resp:
	kfree(tsc_resp);
e_free_tsc_req:
	kfree(tsc_req);

	return rc;
}

void __init snp_secure_tsc_prepare(void)
{
	if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
		return;

	if (snp_get_tsc_info()) {
		pr_alert("Unable to retrieve Secure TSC info from ASP\n");
		sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC);
	}

	pr_debug("SecureTSC enabled");
}

static unsigned long securetsc_get_tsc_khz(void)
{
	return snp_tsc_freq_khz;
}

void __init snp_secure_tsc_init(void)
{
	struct snp_secrets_page *secrets;
	unsigned long tsc_freq_mhz;
	void *mem;

	if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
		return;

	mem = early_memremap_encrypted(sev_secrets_pa, PAGE_SIZE);
	if (!mem) {
		pr_err("Unable to get TSC_FACTOR: failed to map the SNP secrets page.\n");
		sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC);
	}

	secrets = (__force struct snp_secrets_page *)mem;

	setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
	rdmsrq(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz);

	/* Extract the GUEST TSC MHZ from BIT[17:0], rest is reserved space */
	tsc_freq_mhz &= GENMASK_ULL(17, 0);

	snp_tsc_freq_khz = SNP_SCALE_TSC_FREQ(tsc_freq_mhz * 1000, secrets->tsc_factor);

	x86_platform.calibrate_cpu = securetsc_get_tsc_khz;
	x86_platform.calibrate_tsc = securetsc_get_tsc_khz;

	early_memunmap(mem, PAGE_SIZE);
}
-												x86/sev-es: Compile early handler code into kernel image

Setup sev-es.c and include the code from the pre-decompression stage
to also build it into the image of the running kernel. Temporarily add
__maybe_unused annotations to avoid build warnings until the functions
get used.

 [ bp: Use the non-tracing rd/wrmsr variants because:
   vmlinux.o: warning: objtool: __sev_es_nmi_complete()+0x11f: \
	   call to do_trace_write_msr() leaves .noinstr.text section
   as __sev_es_nmi_complete() is noinstr due to being called from the
   NMI handler exc_nmi(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-39-joro@8bytes.org

											
										
										
											2020-09-07 15:15:39 +02:00
+								// SPDX-License-Identifier: GPL-2.0-only
 								/*
 								 * AMD Memory Encryption Support
 								 *
 								 * Copyright (C) 2019 SUSE
 								 *
 								 * Author: Joerg Roedel <jroedel@suse.de>
 								 */
-												x86/sev: Use "SEV: " prefix for messages from sev.c

The source file has been renamed froms sev-es.c to sev.c, but the
messages are still prefixed with "SEV-ES: ". Change that to "SEV: " to
make it consistent.

Fixes: e759959fe3b8 ("x86/sev-es: Rename sev-es.{ch} to sev.{ch}")
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20210622144825.27588-4-joro@8bytes.org

											
										
										
											2021-06-22 16:48:25 +02:00
+								#define pr_fmt(fmt)	"SEV: " fmt
-												x86/sev-es: Add a Runtime #VC Exception Handler

Add the handlers for #VC exceptions invoked at runtime.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-47-joro@8bytes.org

											
										
										
											2020-09-07 15:15:47 +02:00
-												x86/sev-es: Setup GHCB-based boot #VC handler

Add the infrastructure to handle #VC exceptions when the kernel runs on
virtual addresses and has mapped a GHCB. This handler will be used until
the runtime #VC handler takes over.

Since the handler runs very early, disable instrumentation for sev-es.c.

 [ bp: Make vc_ghcb_invalidate() __always_inline so that it can be
   inlined in noinstr functions like __sev_es_nmi_complete(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200908123816.GB3764@8bytes.org

											
										
										
											2020-09-08 14:38:16 +02:00
+								#include <linux/sched/debug.h>	/* For show_regs() */
-												x86/sev-es: Setup per-CPU GHCBs for the runtime handler

The runtime handler needs one GHCB per-CPU. Set them up and map them
unencrypted.

 [ bp: Touchups and simplification. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-42-joro@8bytes.org

											
										
										
											2020-09-07 15:15:42 +02:00
+								#include <linux/percpu-defs.h>
-												x86/sev: Replace occurrences of sev_es_active() with cc_platform_has()

Replace uses of sev_es_active() with the more generic cc_platform_has()
using CC_ATTR_GUEST_STATE_ENCRYPT. If future support is added for other
memory encyrption techonologies, the use of CC_ATTR_GUEST_STATE_ENCRYPT
can be updated, as required.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20210928191009.32551-8-bp@alien8.de

											
										
										
											2021-09-08 17:58:38 -05:00
+								#include <linux/cc_platform.h>
-												x86/sev-es: Setup GHCB-based boot #VC handler

Add the infrastructure to handle #VC exceptions when the kernel runs on
virtual addresses and has mapped a GHCB. This handler will be used until
the runtime #VC handler takes over.

Since the handler runs very early, disable instrumentation for sev-es.c.

 [ bp: Make vc_ghcb_invalidate() __always_inline so that it can be
   inlined in noinstr functions like __sev_es_nmi_complete(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200908123816.GB3764@8bytes.org

											
										
										
											2020-09-08 14:38:16 +02:00
+								#include <linux/printk.h>
-												x86/sev-es: Setup per-CPU GHCBs for the runtime handler

The runtime handler needs one GHCB per-CPU. Set them up and map them
unencrypted.

 [ bp: Touchups and simplification. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-42-joro@8bytes.org

											
										
										
											2020-09-07 15:15:42 +02:00
+								#include <linux/mm_types.h>
 								#include <linux/set_memory.h>
 								#include <linux/memblock.h>
 								#include <linux/kernel.h>
-												x86/sev-es: Compile early handler code into kernel image

Setup sev-es.c and include the code from the pre-decompression stage
to also build it into the image of the running kernel. Temporarily add
__maybe_unused annotations to avoid build warnings until the functions
get used.

 [ bp: Use the non-tracing rd/wrmsr variants because:
   vmlinux.o: warning: objtool: __sev_es_nmi_complete()+0x11f: \
	   call to do_trace_write_msr() leaves .noinstr.text section
   as __sev_es_nmi_complete() is noinstr due to being called from the
   NMI handler exc_nmi(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-39-joro@8bytes.org

											
										
										
											2020-09-07 15:15:39 +02:00
+								#include <linux/mm.h>
-												x86/sev: Use SEV-SNP AP creation to start secondary CPUs

To provide a more secure way to start APs under SEV-SNP, use the SEV-SNP
AP Creation NAE event. This allows for guest control over the AP register
state rather than trusting the hypervisor with the SEV-ES Jump Table
address.

During native_smp_prepare_cpus(), invoke an SEV-SNP function that, if
SEV-SNP is active, will set/override apic->wakeup_secondary_cpu. This
will allow the SEV-SNP AP Creation NAE event method to be used to boot
the APs. As a result of installing the override when SEV-SNP is active,
this method of starting the APs becomes the required method. The override
function will fail to start the AP if the hypervisor does not have
support for AP creation.

  [ bp: Work in forgotten review comments. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-23-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:32 -06:00
+								#include <linux/cpumask.h>
-												x86/sev: Register SEV-SNP guest request platform device

Version 2 of the GHCB specification provides a Non Automatic Exit (NAE)
event type that can be used by the SEV-SNP guest to communicate with the
PSP without risk from a malicious hypervisor who wishes to read, alter,
drop or replay the messages sent.

SNP_LAUNCH_UPDATE can insert two special pages into the guest’s memory:
the secrets page and the CPUID page. The PSP firmware populates the
contents of the secrets page. The secrets page contains encryption keys
used by the guest to interact with the firmware. Because the secrets
page is encrypted with the guest’s memory encryption key, the hypervisor
cannot read the keys. See SEV-SNP firmware spec for further details on
the secrets page format.

Create a platform device that the SEV-SNP guest driver can bind to get
the platform resources such as encryption key and message id to use to
communicate with the PSP. The SEV-SNP guest driver provides a userspace
interface to get the attestation report, key derivation, extended
attestation report etc.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-43-brijesh.singh@amd.com

											
										
										
											2022-02-24 10:56:21 -06:00
+								#include <linux/efi.h>
 								#include <linux/platform_device.h>
 								#include <linux/io.h>
-												x86/sev: Change snp_guest_issue_request()'s fw_err argument

The GHCB specification declares that the firmware error value for
a guest request will be stored in the lower 32 bits of EXIT_INFO_2.  The
upper 32 bits are for the VMM's own error code. The fw_err argument to
snp_guest_issue_request() is thus a misnomer, and callers will need
access to all 64 bits.

The type of unsigned long also causes problems, since sw_exit_info2 is
u64 (unsigned long long) vs the argument's unsigned long*. Change this
type for issuing the guest request. Pass the ioctl command struct's error
field directly instead of in a local variable, since an incomplete guest
request may not set the error code, and uninitialized stack memory would
be written back to user space.

The firmware might not even be called, so bookend the call with the no
firmware call error and clear the error.

Since the "fw_err" field is really exitinfo2 split into the upper bits'
vmm error code and lower bits' firmware error code, convert the 64 bit
value to a union.

  [ bp:
   - Massage commit message
   - adjust code
   - Fix a build issue as
   Reported-by: kernel test robot <lkp@intel.com>
   Link: https://lore.kernel.org/oe-kbuild-all/202303070609.vX6wp2Af-lkp@intel.com
   - print exitinfo2 in hex
   Tom:
    - Correct -EIO exit case. ]

Signed-off-by: Dionna Glaze <dionnaglaze@google.com>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230214164638.1189804-5-dionnaglaze@google.com
Link: https://lore.kernel.org/r/20230307192449.24732-12-bp@alien8.de

											
										
										
											2023-03-07 20:24:49 +01:00
+								#include <linux/psp-sev.h>
-												x86/sev: Skip ROM range scans and validation for SEV-SNP guests

SEV-SNP requires encrypted memory to be validated before access.
Because the ROM memory range is not part of the e820 table, it is not
pre-validated by the BIOS. Therefore, if a SEV-SNP guest kernel wishes
to access this range, the guest must first validate the range.

The current SEV-SNP code does indeed scan the ROM range during early
boot and thus attempts to validate the ROM range in probe_roms().
However, this behavior is neither sufficient nor necessary for the
following reasons:

* With regards to sufficiency, if EFI_CONFIG_TABLES are not enabled and
  CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK is set, the kernel will
  attempt to access the memory at SMBIOS_ENTRY_POINT_SCAN_START (which
  falls in the ROM range) prior to validation.

  For example, Project Oak Stage 0 provides a minimal guest firmware
  that currently meets these configuration conditions, meaning guests
  booting atop Oak Stage 0 firmware encounter a problematic call chain
  during dmi_setup() -> dmi_scan_machine() that results in a crash
  during boot if SEV-SNP is enabled.

* With regards to necessity, SEV-SNP guests generally read garbage
  (which changes across boots) from the ROM range, meaning these scans
  are unnecessary. The guest reads garbage because the legacy ROM range
  is unencrypted data but is accessed via an encrypted PMD during early
  boot (where the PMD is marked as encrypted due to potentially mapping
  actually-encrypted data in other PMD-contained ranges).

In one exceptional case, EISA probing treats the ROM range as
unencrypted data, which is inconsistent with other probing.

Continuing to allow SEV-SNP guests to use garbage and to inconsistently
classify ROM range encryption status can trigger undesirable behavior.
For instance, if garbage bytes appear to be a valid signature, memory
may be unnecessarily reserved for the ROM range. Future code or other
use cases may result in more problematic (arbitrary) behavior that
should be avoided.

While one solution would be to overhaul the early PMD mapping to always
treat the ROM region of the PMD as unencrypted, SEV-SNP guests do not
currently rely on data from the ROM region during early boot (and even
if they did, they would be mostly relying on garbage data anyways).

As a simpler solution, skip the ROM range scans (and the otherwise-
necessary range validation) during SEV-SNP guest early boot. The
potential SEV-SNP guest crash due to lack of ROM range validation is
thus avoided by simply not accessing the ROM range.

In most cases, skip the scans by overriding problematic x86_init
functions during sme_early_init() to SNP-safe variants, which can be
likened to x86_init overrides done for other platforms (ex: Xen); such
overrides also avoid the spread of cc_platform_has() checks throughout
the tree.

In the exceptional EISA case, still use cc_platform_has() for the
simplest change, given (1) checks for guest type (ex: Xen domain status)
are already performed here, and (2) these checks occur in a subsys
initcall instead of an x86_init function.

  [ bp: Massage commit message, remove "we"s. ]

Fixes: 9704c07bf9f7 ("x86/kernel: Validate ROM memory before accessing when SEV-SNP is active")
Signed-off-by: Kevin Loughlin <kevinloughlin@google.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Cc: <stable@kernel.org>
Link: https://lore.kernel.org/r/20240313121546.2964854-1-kevinloughlin@google.com

											
										
										
											2024-03-13 12:15:46 +00:00
+								#include <linux/dmi.h>
-												x86/sev: Change snp_guest_issue_request()'s fw_err argument

The GHCB specification declares that the firmware error value for
a guest request will be stored in the lower 32 bits of EXIT_INFO_2.  The
upper 32 bits are for the VMM's own error code. The fw_err argument to
snp_guest_issue_request() is thus a misnomer, and callers will need
access to all 64 bits.

The type of unsigned long also causes problems, since sw_exit_info2 is
u64 (unsigned long long) vs the argument's unsigned long*. Change this
type for issuing the guest request. Pass the ioctl command struct's error
field directly instead of in a local variable, since an incomplete guest
request may not set the error code, and uninitialized stack memory would
be written back to user space.

The firmware might not even be called, so bookend the call with the no
firmware call error and clear the error.

Since the "fw_err" field is really exitinfo2 split into the upper bits'
vmm error code and lower bits' firmware error code, convert the 64 bit
value to a union.

  [ bp:
   - Massage commit message
   - adjust code
   - Fix a build issue as
   Reported-by: kernel test robot <lkp@intel.com>
   Link: https://lore.kernel.org/oe-kbuild-all/202303070609.vX6wp2Af-lkp@intel.com
   - print exitinfo2 in hex
   Tom:
    - Correct -EIO exit case. ]

Signed-off-by: Dionna Glaze <dionnaglaze@google.com>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230214164638.1189804-5-dionnaglaze@google.com
Link: https://lore.kernel.org/r/20230307192449.24732-12-bp@alien8.de

											
										
										
											2023-03-07 20:24:49 +01:00
+								#include <uapi/linux/sev-guest.h>
-												x86/sev: Carve out and export SNP guest messaging init routines

Currently, the sev-guest driver is the only user of SNP guest messaging. All
routines for initializing SNP guest messaging are implemented within the
sev-guest driver and are not available during early boot.

In preparation for adding Secure TSC guest support, carve out APIs to allocate
and initialize the guest messaging descriptor context and make it part of
coco/sev/core.c. As there is no user of sev_guest_platform_data anymore,
remove the structure.

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250106124633.1418972-4-nikunj@amd.com

											
										
										
											2025-01-06 18:16:23 +05:30
+								#include <crypto/gcm.h>
-												x86/sev-es: Compile early handler code into kernel image

Setup sev-es.c and include the code from the pre-decompression stage
to also build it into the image of the running kernel. Temporarily add
__maybe_unused annotations to avoid build warnings until the functions
get used.

 [ bp: Use the non-tracing rd/wrmsr variants because:
   vmlinux.o: warning: objtool: __sev_es_nmi_complete()+0x11f: \
	   call to do_trace_write_msr() leaves .noinstr.text section
   as __sev_es_nmi_complete() is noinstr due to being called from the
   NMI handler exc_nmi(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-39-joro@8bytes.org

											
										
										
											2020-09-07 15:15:39 +02:00
-												x86/sev: Move early startup code into .head.text section

In preparation for implementing rigorous build time checks to enforce
that only code that can support it will be called from the early 1:1
mapping of memory, move SEV init code that is called in this manner to
the .head.text section.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Tested-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20240227151907.387873-19-ardb+git@google.com

											
										
										
											2024-02-27 16:19:16 +01:00
+								#include <asm/init.h>
-												x86/sev-es: Allocate and map an IST stack for #VC handler

Allocate and map an IST stack and an additional fall-back stack for
the #VC handler.  The memory for the stacks is allocated only when
SEV-ES is active.

The #VC handler needs to use an IST stack because a #VC exception can be
raised from kernel space with unsafe stack, e.g. in the SYSCALL entry
path.

Since the #VC exception can be nested, the #VC handler switches back to
the interrupted stack when entered from kernel space. If switching back
is not possible, the fall-back stack is used.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-43-joro@8bytes.org

											
										
										
											2020-09-07 15:15:43 +02:00
+								#include <asm/cpu_entry_area.h>
-												x86/realmode: Setup AP jump table

As part of the GHCB specification, the booting of APs under SEV-ES
requires an AP jump table when transitioning from one layer of code to
another (e.g. when going from UEFI to the OS). As a result, each layer
that parks an AP must provide the physical address of an AP jump table
to the next layer via the hypervisor.

Upon booting of the kernel, read the AP jump table address from the
hypervisor. Under SEV-ES, APs are started using the INIT-SIPI-SIPI
sequence. Before issuing the first SIPI request for an AP, the start
CS and IP is programmed into the AP jump table. Upon issuing the SIPI
request, the AP will awaken and jump to that start CS:IP address.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
[ jroedel@suse.de: - Adapted to different code base
                   - Moved AP table setup from SIPI sending path to
		     real-mode setup code
		   - Fix sparse warnings ]
Co-developed-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-67-joro@8bytes.org

											
										
										
											2020-09-07 15:16:07 +02:00
+								#include <asm/stacktrace.h>
-												x86/sev-es: Rename sev-es.{ch} to sev.{ch}

SEV-SNP builds upon the SEV-ES functionality while adding new hardware
protection. Version 2 of the GHCB specification adds new NAE events that
are SEV-SNP specific. Rename the sev-es.{ch} to sev.{ch} so that all
SEV* functionality can be consolidated in one place.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Joerg Roedel <jroedel@suse.de>
Link: https://lkml.kernel.org/r/20210427111636.1207-2-brijesh.singh@amd.com

											
										
										
											2021-04-27 06:16:34 -05:00
+								#include <asm/sev.h>
-												x86/sev: Prepare for splitting off early SEV code

Prepare for splitting off parts of the SEV core.c source file into a
file that carries code that must tolerate being called from the early
1:1 mapping. This will allow special build-time handling of thise code,
to ensure that it gets generated in a way that is compatible with the
early execution context.

So create a de-facto internal SEV API and put the definitions into
sev-internal.h. No attempt is made to allow this header file to be
included in arbitrary other sources - this is explicitly not the intent.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: linux-efi@vger.kernel.org
Link: https://lore.kernel.org/r/20250410134117.3713574-20-ardb+git@google.com

											
										
										
											2025-04-10 15:41:25 +02:00
+								#include <asm/sev-internal.h>
-												x86/sev-es: Compile early handler code into kernel image

Setup sev-es.c and include the code from the pre-decompression stage
to also build it into the image of the running kernel. Temporarily add
__maybe_unused annotations to avoid build warnings until the functions
get used.

 [ bp: Use the non-tracing rd/wrmsr variants because:
   vmlinux.o: warning: objtool: __sev_es_nmi_complete()+0x11f: \
	   call to do_trace_write_msr() leaves .noinstr.text section
   as __sev_es_nmi_complete() is noinstr due to being called from the
   NMI handler exc_nmi(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-39-joro@8bytes.org

											
										
										
											2020-09-07 15:15:39 +02:00
+								#include <asm/insn-eval.h>
-												x86/sev: Include fpu/xcr.h

Include the header which only provides the XCR accessors. That's all what
is needed here.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20211015011539.896573039@linutronix.de

											
										
										
											2021-10-15 03:16:36 +02:00
+								#include <asm/fpu/xcr.h>
-												x86/sev-es: Compile early handler code into kernel image

Setup sev-es.c and include the code from the pre-decompression stage
to also build it into the image of the running kernel. Temporarily add
__maybe_unused annotations to avoid build warnings until the functions
get used.

 [ bp: Use the non-tracing rd/wrmsr variants because:
   vmlinux.o: warning: objtool: __sev_es_nmi_complete()+0x11f: \
	   call to do_trace_write_msr() leaves .noinstr.text section
   as __sev_es_nmi_complete() is noinstr due to being called from the
   NMI handler exc_nmi(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-39-joro@8bytes.org

											
										
										
											2020-09-07 15:15:39 +02:00
+								#include <asm/processor.h>
-												x86/sev-es: Add a Runtime #VC Exception Handler

Add the handlers for #VC exceptions invoked at runtime.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-47-joro@8bytes.org

											
										
										
											2020-09-07 15:15:47 +02:00
+								#include <asm/realmode.h>
-												x86/sev: Remove do_early_exception() forward declarations

There's a perfectly fine prototype in the asm/setup.h header. Use it.

No functional changes.

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20211110220731.2396491-8-brijesh.singh@amd.com

											
										
										
											2021-11-10 16:06:53 -06:00
+								#include <asm/setup.h>
-												x86/sev-es: Add a Runtime #VC Exception Handler

Add the handlers for #VC exceptions invoked at runtime.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-47-joro@8bytes.org

											
										
										
											2020-09-07 15:15:47 +02:00
+								#include <asm/traps.h>
-												x86/sev-es: Compile early handler code into kernel image

Setup sev-es.c and include the code from the pre-decompression stage
to also build it into the image of the running kernel. Temporarily add
__maybe_unused annotations to avoid build warnings until the functions
get used.

 [ bp: Use the non-tracing rd/wrmsr variants because:
   vmlinux.o: warning: objtool: __sev_es_nmi_complete()+0x11f: \
	   call to do_trace_write_msr() leaves .noinstr.text section
   as __sev_es_nmi_complete() is noinstr due to being called from the
   NMI handler exc_nmi(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-39-joro@8bytes.org

											
										
										
											2020-09-07 15:15:39 +02:00
+								#include <asm/svm.h>
-												x86/sev-es: Support CPU offline/online

Add a play_dead handler when running under SEV-ES. This is needed
because the hypervisor can't deliver an SIPI request to restart the AP.
Instead, the kernel has to issue a VMGEXIT to halt the VCPU until the
hypervisor wakes it up again.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-70-joro@8bytes.org

											
										
										
											2020-09-07 15:16:10 +02:00
+								#include <asm/smp.h>
 								#include <asm/cpu.h>
-												x86/sev: Use SEV-SNP AP creation to start secondary CPUs

To provide a more secure way to start APs under SEV-SNP, use the SEV-SNP
AP Creation NAE event. This allows for guest control over the AP register
state rather than trusting the hypervisor with the SEV-ES Jump Table
address.

During native_smp_prepare_cpus(), invoke an SEV-SNP function that, if
SEV-SNP is active, will set/override apic->wakeup_secondary_cpu. This
will allow the SEV-SNP AP Creation NAE event method to be used to boot
the APs. As a result of installing the override when SEV-SNP is active,
this method of starting the APs becomes the required method. The override
function will fail to start the AP if the hypervisor does not have
support for AP creation.

  [ bp: Work in forgotten review comments. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-23-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:32 -06:00
+								#include <asm/apic.h>
-												x86/cpuid: Set <asm/cpuid/api.h> as the main CPUID header

The main CPUID header <asm/cpuid.h> was originally a storefront for the
headers:

    <asm/cpuid/api.h>
    <asm/cpuid/leaf_0x2_api.h>

Now that the latter CPUID(0x2) header has been merged into the former,
there is no practical difference between <asm/cpuid.h> and
<asm/cpuid/api.h>.

Migrate all users to the <asm/cpuid/api.h> header, in preparation of
the removal of <asm/cpuid.h>.

Don't remove <asm/cpuid.h> just yet, in case some new code in -next
started using it.

Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Ahmed S. Darwish <darwi@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: x86-cpuid@lists.linux.dev
Link: https://lore.kernel.org/r/20250508150240.172915-3-darwi@linutronix.de

											
										
										
											2025-05-08 17:02:31 +02:00
+								#include <asm/cpuid/api.h>
-												x86/sev: Use firmware-validated CPUID for SEV-SNP guests

SEV-SNP guests will be provided the location of special 'secrets' and
'CPUID' pages via the Confidential Computing blob. This blob is
provided to the run-time kernel either through a boot_params field that
was initialized by the boot/compressed kernel, or via a setup_data
structure as defined by the Linux Boot Protocol.

Locate the Confidential Computing blob from these sources and, if found,
use the provided CPUID page/table address to create a copy that the
run-time kernel will use when servicing CPUID instructions via a #VC
handler.

Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-40-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:49 -06:00
+								#include <asm/cmdline.h>
-												x86/msr: Add explicit includes of <asm/msr.h>

For historic reasons there are some TSC-related functions in the
<asm/msr.h> header, even though there's an <asm/tsc.h> header.

To facilitate the relocation of rdtsc{,_ordered}() from <asm/msr.h>
to <asm/tsc.h> and to eventually eliminate the inclusion of
<asm/msr.h> in <asm/tsc.h>, add an explicit <asm/msr.h> dependency
to the source files that reference definitions from <asm/msr.h>.

[ mingo: Clarified the changelog. ]

Signed-off-by: Xin Li (Intel) <xin@zytor.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Uros Bizjak <ubizjak@gmail.com>
Link: https://lore.kernel.org/r/20250501054241.1245648-1-xin@zytor.com

											
										
										
											2025-04-30 22:42:41 -07:00
+								#include <asm/msr.h>
-												x86/sev-es: Compile early handler code into kernel image

Setup sev-es.c and include the code from the pre-decompression stage
to also build it into the image of the running kernel. Temporarily add
__maybe_unused annotations to avoid build warnings until the functions
get used.

 [ bp: Use the non-tracing rd/wrmsr variants because:
   vmlinux.o: warning: objtool: __sev_es_nmi_complete()+0x11f: \
	   call to do_trace_write_msr() leaves .noinstr.text section
   as __sev_es_nmi_complete() is noinstr due to being called from the
   NMI handler exc_nmi(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-39-joro@8bytes.org

											
										
										
											2020-09-07 15:15:39 +02:00
-												x86/sev: Use SEV-SNP AP creation to start secondary CPUs

To provide a more secure way to start APs under SEV-SNP, use the SEV-SNP
AP Creation NAE event. This allows for guest control over the AP register
state rather than trusting the hypervisor with the SEV-ES Jump Table
address.

During native_smp_prepare_cpus(), invoke an SEV-SNP function that, if
SEV-SNP is active, will set/override apic->wakeup_secondary_cpu. This
will allow the SEV-SNP AP Creation NAE event method to be used to boot
the APs. As a result of installing the override when SEV-SNP is active,
this method of starting the APs becomes the required method. The override
function will fail to start the AP if the hypervisor does not have
support for AP creation.

  [ bp: Work in forgotten review comments. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-23-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:32 -06:00
+								/* AP INIT values as documented in the APM2  section "Processor Initialization State" */
 								#define AP_INIT_CS_LIMIT		0xffff
 								#define AP_INIT_DS_LIMIT		0xffff
 								#define AP_INIT_LDTR_LIMIT		0xffff
 								#define AP_INIT_GDTR_LIMIT		0xffff
 								#define AP_INIT_IDTR_LIMIT		0xffff
 								#define AP_INIT_TR_LIMIT		0xffff
 								#define AP_INIT_RFLAGS_DEFAULT		0x2
 								#define AP_INIT_DR6_DEFAULT		0xffff0ff0
 								#define AP_INIT_GPAT_DEFAULT		0x0007040600070406ULL
 								#define AP_INIT_XCR0_DEFAULT		0x1
 								#define AP_INIT_X87_FTW_DEFAULT		0x5555
 								#define AP_INIT_X87_FCW_DEFAULT		0x0040
 								#define AP_INIT_CR0_DEFAULT		0x60000010
 								#define AP_INIT_MXCSR_DEFAULT		0x1f80
-												x86/sev: Dump SEV_STATUS

It is, and will be even more useful in the future, to dump the SEV
features enabled according to SEV_STATUS. Do so:

  [    0.542753] Memory Encryption Features active: AMD SEV SEV-ES SEV-SNP
  [    0.544425] SEV: Status: SEV SEV-ES SEV-SNP DebugSwap

Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Nikunj A Dadhania <nikunj@amd.com>
Link: https://lore.kernel.org/r/20240219094216.GAZdMieDHKiI8aaP3n@fat_crate.local

											
										
										
											2024-02-19 10:42:16 +01:00
+								static const char * const sev_status_feat_names[] = {
 									[MSR_AMD64_SEV_ENABLED_BIT]		= "SEV",
 									[MSR_AMD64_SEV_ES_ENABLED_BIT]		= "SEV-ES",
 									[MSR_AMD64_SEV_SNP_ENABLED_BIT]		= "SEV-SNP",
 									[MSR_AMD64_SNP_VTOM_BIT]		= "vTom",
 									[MSR_AMD64_SNP_REFLECT_VC_BIT]		= "ReflectVC",
 									[MSR_AMD64_SNP_RESTRICTED_INJ_BIT]	= "RI",
 									[MSR_AMD64_SNP_ALT_INJ_BIT]		= "AI",
 									[MSR_AMD64_SNP_DEBUG_SWAP_BIT]		= "DebugSwap",
 									[MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT]	= "NoHostIBS",
 									[MSR_AMD64_SNP_BTB_ISOLATION_BIT]	= "BTBIsol",
 									[MSR_AMD64_SNP_VMPL_SSS_BIT]		= "VmplSSS",
 									[MSR_AMD64_SNP_SECURE_TSC_BIT]		= "SecureTSC",
 									[MSR_AMD64_SNP_VMGEXIT_PARAM_BIT]	= "VMGExitParam",
 									[MSR_AMD64_SNP_IBS_VIRT_BIT]		= "IBSVirt",
 									[MSR_AMD64_SNP_VMSA_REG_PROT_BIT]	= "VMSARegProt",
 									[MSR_AMD64_SNP_SMT_PROT_BIT]		= "SMTProt",
 								};
-												x86/sev: Add Secure TSC support for SNP guests

Add support for Secure TSC in SNP-enabled guests. Secure TSC allows guests
to securely use RDTSC/RDTSCP instructions, ensuring that the parameters used
cannot be altered by the hypervisor once the guest is launched.

Secure TSC-enabled guests need to query TSC information from the AMD Security
Processor. This communication channel is encrypted between the AMD Security
Processor and the guest, with the hypervisor acting merely as a conduit to
deliver the guest messages to the AMD Security Processor. Each message is
protected with AEAD (AES-256 GCM).

  [ bp: Zap a stray newline over amd_cc_platform_has() while at it,
    simplify CC_ATTR_GUEST_SNP_SECURE_TSC check ]

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20250106124633.1418972-6-nikunj@amd.com

											
										
										
											2025-01-06 18:16:25 +05:30
+								/*
 								 * For Secure TSC guests, the BSP fetches TSC_INFO using SNP guest messaging and
 								 * initializes snp_tsc_scale and snp_tsc_offset. These values are replicated
 								 * across the APs VMSA fields (TSC_SCALE and TSC_OFFSET).
 								 */
 								static u64 snp_tsc_scale __ro_after_init;
 								static u64 snp_tsc_offset __ro_after_init;
-												x86/sev: Use TSC_FACTOR for Secure TSC frequency calculation

When using Secure TSC, the GUEST_TSC_FREQ MSR reports a frequency based on
the nominal P0 frequency, which deviates slightly (typically ~0.2%) from
the actual mean TSC frequency due to clocking parameters.

Over extended VM uptime, this discrepancy accumulates, causing clock skew
between the hypervisor and a SEV-SNP VM, leading to early timer interrupts as
perceived by the guest.

The guest kernel relies on the reported nominal frequency for TSC-based
timekeeping, while the actual frequency set during SNP_LAUNCH_START may
differ. This mismatch results in inaccurate time calculations, causing the
guest to perceive hrtimers as firing earlier than expected.

Utilize the TSC_FACTOR from the SEV firmware's secrets page (see "Secrets
Page Format" in the SNP Firmware ABI Specification) to calculate the mean
TSC frequency, ensuring accurate timekeeping and mitigating clock skew in
SEV-SNP VMs.

Use early_ioremap_encrypted() to map the secrets page as
ioremap_encrypted() uses kmalloc() which is not available during early TSC
initialization and causes a panic.

  [ bp: Drop the silly dummy var:
    https://lore.kernel.org/r/20250630192726.GBaGLlHl84xIopx4Pt@fat_crate.local ]

Fixes: 73bbf3b0fbba ("x86/tsc: Init the TSC for Secure TSC guests")
Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/20250630081858.485187-1-nikunj@amd.com

											
										
										
											2025-06-30 13:48:58 +05:30
+								static unsigned long snp_tsc_freq_khz __ro_after_init;
-												x86/sev: Add Secure TSC support for SNP guests

Add support for Secure TSC in SNP-enabled guests. Secure TSC allows guests
to securely use RDTSC/RDTSCP instructions, ensuring that the parameters used
cannot be altered by the hypervisor once the guest is launched.

Secure TSC-enabled guests need to query TSC information from the AMD Security
Processor. This communication channel is encrypted between the AMD Security
Processor and the guest, with the hypervisor acting merely as a conduit to
deliver the guest messages to the AMD Security Processor. Each message is
protected with AEAD (AES-256 GCM).

  [ bp: Zap a stray newline over amd_cc_platform_has() while at it,
    simplify CC_ATTR_GUEST_SNP_SECURE_TSC check ]

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20250106124633.1418972-6-nikunj@amd.com

											
										
										
											2025-01-06 18:16:25 +05:30
-												x86/sev: Prepare for splitting off early SEV code

Prepare for splitting off parts of the SEV core.c source file into a
file that carries code that must tolerate being called from the early
1:1 mapping. This will allow special build-time handling of thise code,
to ensure that it gets generated in a way that is compatible with the
early execution context.

So create a de-facto internal SEV API and put the definitions into
sev-internal.h. No attempt is made to allow this header file to be
included in arbitrary other sources - this is explicitly not the intent.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: linux-efi@vger.kernel.org
Link: https://lore.kernel.org/r/20250410134117.3713574-20-ardb+git@google.com

											
										
										
											2025-04-10 15:41:25 +02:00
+								DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
 								DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
-												x86/sev: Use SEV-SNP AP creation to start secondary CPUs

To provide a more secure way to start APs under SEV-SNP, use the SEV-SNP
AP Creation NAE event. This allows for guest control over the AP register
state rather than trusting the hypervisor with the SEV-ES Jump Table
address.

During native_smp_prepare_cpus(), invoke an SEV-SNP function that, if
SEV-SNP is active, will set/override apic->wakeup_secondary_cpu. This
will allow the SEV-SNP AP Creation NAE event method to be used to boot
the APs. As a result of installing the override when SEV-SNP is active,
this method of starting the APs becomes the required method. The override
function will fail to start the AP if the hypervisor does not have
support for AP creation.

  [ bp: Work in forgotten review comments. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-23-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:32 -06:00
-												x86/sev: Make sure IRQs are disabled while GHCB is active

The #VC handler only cares about IRQs being disabled while the GHCB is
active, as it must not be interrupted by something which could cause
another #VC while it holds the GHCB (NMI is the exception for which the
backup GHCB exits).

Make sure nothing interrupts the code path while the GHCB is active
by making sure that callers of __sev_{get,put}_ghcb() have disabled
interrupts upfront.

 [ bp: Massage commit message. ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210618115409.22735-2-joro@8bytes.org

											
										
										
											2021-06-18 13:54:08 +02:00
+								/*
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+								 * SVSM related information:
 								 *   When running under an SVSM, the VMPL that Linux is executing at must be
 								 *   non-zero. The VMPL is therefore used to indicate the presence of an SVSM.
-												x86/sev: Make sure IRQs are disabled while GHCB is active

The #VC handler only cares about IRQs being disabled while the GHCB is
active, as it must not be interrupted by something which could cause
another #VC while it holds the GHCB (NMI is the exception for which the
backup GHCB exits).

Make sure nothing interrupts the code path while the GHCB is active
by making sure that callers of __sev_{get,put}_ghcb() have disabled
interrupts upfront.

 [ bp: Massage commit message. ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210618115409.22735-2-joro@8bytes.org

											
										
										
											2021-06-18 13:54:08 +02:00
+								 */
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+								u8 snp_vmpl __ro_after_init;
 								EXPORT_SYMBOL_GPL(snp_vmpl);
-												x86/sev-es: Add a Runtime #VC Exception Handler

Add the handlers for #VC exceptions invoked at runtime.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-47-joro@8bytes.org

											
										
										
											2020-09-07 15:15:47 +02:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+								static u64 __init get_snp_jump_table_addr(void)
-												x86/sev-es: Compile early handler code into kernel image

Setup sev-es.c and include the code from the pre-decompression stage
to also build it into the image of the running kernel. Temporarily add
__maybe_unused annotations to avoid build warnings until the functions
get used.

 [ bp: Use the non-tracing rd/wrmsr variants because:
   vmlinux.o: warning: objtool: __sev_es_nmi_complete()+0x11f: \
	   call to do_trace_write_msr() leaves .noinstr.text section
   as __sev_es_nmi_complete() is noinstr due to being called from the
   NMI handler exc_nmi(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-39-joro@8bytes.org

											
										
										
											2020-09-07 15:15:39 +02:00
+								{
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									struct snp_secrets_page *secrets;
 									void __iomem *mem;
 									u64 addr;
-												x86/sev-es: Compile early handler code into kernel image

Setup sev-es.c and include the code from the pre-decompression stage
to also build it into the image of the running kernel. Temporarily add
__maybe_unused annotations to avoid build warnings until the functions
get used.

 [ bp: Use the non-tracing rd/wrmsr variants because:
   vmlinux.o: warning: objtool: __sev_es_nmi_complete()+0x11f: \
	   call to do_trace_write_msr() leaves .noinstr.text section
   as __sev_es_nmi_complete() is noinstr due to being called from the
   NMI handler exc_nmi(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-39-joro@8bytes.org

											
										
										
											2020-09-07 15:15:39 +02:00
-												x86/sev: Share the sev_secrets_pa value again

This commits breaks SNP guests:

  234cf67fc3bd ("x86/sev: Split off startup code from core code")

The SNP guest boots, but no longer has access to the VMPCK keys needed
to communicate with the ASP, which is used, for example, to obtain an
attestation report.

The secrets_pa value is defined as static in both startup.c and
core.c. It is set by a function in startup.c and so when used in
core.c its value will be 0.

Share it again and add the sev_ prefix to put it into the global
SEV symbols namespace.

[ mingo: Renamed to sev_secrets_pa ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Link: https://lore.kernel.org/r/cf878810-81ed-3017-52c6-ce6aa41b5f01@amd.com

											
										
										
											2025-04-23 10:22:31 -05:00
+									mem = ioremap_encrypted(sev_secrets_pa, PAGE_SIZE);
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									if (!mem) {
 										pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n");
 										return 0;
-												x86/sev-es: Compile early handler code into kernel image

Setup sev-es.c and include the code from the pre-decompression stage
to also build it into the image of the running kernel. Temporarily add
__maybe_unused annotations to avoid build warnings until the functions
get used.

 [ bp: Use the non-tracing rd/wrmsr variants because:
   vmlinux.o: warning: objtool: __sev_es_nmi_complete()+0x11f: \
	   call to do_trace_write_msr() leaves .noinstr.text section
   as __sev_es_nmi_complete() is noinstr due to being called from the
   NMI handler exc_nmi(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-39-joro@8bytes.org

											
										
										
											2020-09-07 15:15:39 +02:00
+									}
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									secrets = (__force struct snp_secrets_page *)mem;
-												x86/sev-es: Split vc_decode_insn()

Split it into two helpers - a user- and a kernel-mode one for
readability. Yes, the original function body is not that convoluted but
splitting it makes following through that code trivial than having to
pay attention to each little difference when in user or in kernel mode.

No functional changes.

Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20210304174237.31945-13-bp@alien8.de

											
										
										
											2021-02-23 11:28:02 +01:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									addr = secrets->os_area.ap_jump_table_pa;
 									iounmap(mem);
 									return addr;
-												x86/sev-es: Compile early handler code into kernel image

Setup sev-es.c and include the code from the pre-decompression stage
to also build it into the image of the running kernel. Temporarily add
__maybe_unused annotations to avoid build warnings until the functions
get used.

 [ bp: Use the non-tracing rd/wrmsr variants because:
   vmlinux.o: warning: objtool: __sev_es_nmi_complete()+0x11f: \
	   call to do_trace_write_msr() leaves .noinstr.text section
   as __sev_es_nmi_complete() is noinstr due to being called from the
   NMI handler exc_nmi(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-39-joro@8bytes.org

											
										
										
											2020-09-07 15:15:39 +02:00
+								}
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+								static u64 __init get_jump_table_addr(void)
-												x86/sev-es: Split vc_decode_insn()

Split it into two helpers - a user- and a kernel-mode one for
readability. Yes, the original function body is not that convoluted but
splitting it makes following through that code trivial than having to
pay attention to each little difference when in user or in kernel mode.

No functional changes.

Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20210304174237.31945-13-bp@alien8.de

											
										
										
											2021-02-23 11:28:02 +01:00
+								{
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									struct ghcb_state state;
 									unsigned long flags;
 									struct ghcb *ghcb;
 									u64 ret = 0;
-												x86/sev-es: Compile early handler code into kernel image

Setup sev-es.c and include the code from the pre-decompression stage
to also build it into the image of the running kernel. Temporarily add
__maybe_unused annotations to avoid build warnings until the functions
get used.

 [ bp: Use the non-tracing rd/wrmsr variants because:
   vmlinux.o: warning: objtool: __sev_es_nmi_complete()+0x11f: \
	   call to do_trace_write_msr() leaves .noinstr.text section
   as __sev_es_nmi_complete() is noinstr due to being called from the
   NMI handler exc_nmi(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-39-joro@8bytes.org

											
										
										
											2020-09-07 15:15:39 +02:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
 										return get_snp_jump_table_addr();
-												x86/sev-es: Compile early handler code into kernel image

Setup sev-es.c and include the code from the pre-decompression stage
to also build it into the image of the running kernel. Temporarily add
__maybe_unused annotations to avoid build warnings until the functions
get used.

 [ bp: Use the non-tracing rd/wrmsr variants because:
   vmlinux.o: warning: objtool: __sev_es_nmi_complete()+0x11f: \
	   call to do_trace_write_msr() leaves .noinstr.text section
   as __sev_es_nmi_complete() is noinstr due to being called from the
   NMI handler exc_nmi(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-39-joro@8bytes.org

											
										
										
											2020-09-07 15:15:39 +02:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									local_irq_save(flags);
-												x86/sev-es: Compile early handler code into kernel image

Setup sev-es.c and include the code from the pre-decompression stage
to also build it into the image of the running kernel. Temporarily add
__maybe_unused annotations to avoid build warnings until the functions
get used.

 [ bp: Use the non-tracing rd/wrmsr variants because:
   vmlinux.o: warning: objtool: __sev_es_nmi_complete()+0x11f: \
	   call to do_trace_write_msr() leaves .noinstr.text section
   as __sev_es_nmi_complete() is noinstr due to being called from the
   NMI handler exc_nmi(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-39-joro@8bytes.org

											
										
										
											2020-09-07 15:15:39 +02:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									ghcb = __sev_get_ghcb(&state);
-												x86/sev-es: Compile early handler code into kernel image

Setup sev-es.c and include the code from the pre-decompression stage
to also build it into the image of the running kernel. Temporarily add
__maybe_unused annotations to avoid build warnings until the functions
get used.

 [ bp: Use the non-tracing rd/wrmsr variants because:
   vmlinux.o: warning: objtool: __sev_es_nmi_complete()+0x11f: \
	   call to do_trace_write_msr() leaves .noinstr.text section
   as __sev_es_nmi_complete() is noinstr due to being called from the
   NMI handler exc_nmi(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-39-joro@8bytes.org

											
										
										
											2020-09-07 15:15:39 +02:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									vc_ghcb_invalidate(ghcb);
 									ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE);
 									ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE);
 									ghcb_set_sw_exit_info_2(ghcb, 0);
 									sev_es_wr_ghcb_msr(__pa(ghcb));
 									VMGEXIT();
-												x86/sev-es: Compile early handler code into kernel image

Setup sev-es.c and include the code from the pre-decompression stage
to also build it into the image of the running kernel. Temporarily add
__maybe_unused annotations to avoid build warnings until the functions
get used.

 [ bp: Use the non-tracing rd/wrmsr variants because:
   vmlinux.o: warning: objtool: __sev_es_nmi_complete()+0x11f: \
	   call to do_trace_write_msr() leaves .noinstr.text section
   as __sev_es_nmi_complete() is noinstr due to being called from the
   NMI handler exc_nmi(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-39-joro@8bytes.org

											
										
										
											2020-09-07 15:15:39 +02:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									if (ghcb_sw_exit_info_1_is_valid(ghcb) &&
 									    ghcb_sw_exit_info_2_is_valid(ghcb))
 										ret = ghcb->save.sw_exit_info_2;
-												x86/sev-es: Compile early handler code into kernel image

Setup sev-es.c and include the code from the pre-decompression stage
to also build it into the image of the running kernel. Temporarily add
__maybe_unused annotations to avoid build warnings until the functions
get used.

 [ bp: Use the non-tracing rd/wrmsr variants because:
   vmlinux.o: warning: objtool: __sev_es_nmi_complete()+0x11f: \
	   call to do_trace_write_msr() leaves .noinstr.text section
   as __sev_es_nmi_complete() is noinstr due to being called from the
   NMI handler exc_nmi(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-39-joro@8bytes.org

											
										
										
											2020-09-07 15:15:39 +02:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									__sev_put_ghcb(&state);
-												x86/sev-es: Compile early handler code into kernel image

Setup sev-es.c and include the code from the pre-decompression stage
to also build it into the image of the running kernel. Temporarily add
__maybe_unused annotations to avoid build warnings until the functions
get used.

 [ bp: Use the non-tracing rd/wrmsr variants because:
   vmlinux.o: warning: objtool: __sev_es_nmi_complete()+0x11f: \
	   call to do_trace_write_msr() leaves .noinstr.text section
   as __sev_es_nmi_complete() is noinstr due to being called from the
   NMI handler exc_nmi(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-39-joro@8bytes.org

											
										
										
											2020-09-07 15:15:39 +02:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									local_irq_restore(flags);
-												x86/sev-es: Compile early handler code into kernel image

Setup sev-es.c and include the code from the pre-decompression stage
to also build it into the image of the running kernel. Temporarily add
__maybe_unused annotations to avoid build warnings until the functions
get used.

 [ bp: Use the non-tracing rd/wrmsr variants because:
   vmlinux.o: warning: objtool: __sev_es_nmi_complete()+0x11f: \
	   call to do_trace_write_msr() leaves .noinstr.text section
   as __sev_es_nmi_complete() is noinstr due to being called from the
   NMI handler exc_nmi(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-39-joro@8bytes.org

											
										
										
											2020-09-07 15:15:39 +02:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									return ret;
-												x86/sev-es: Compile early handler code into kernel image

Setup sev-es.c and include the code from the pre-decompression stage
to also build it into the image of the running kernel. Temporarily add
__maybe_unused annotations to avoid build warnings until the functions
get used.

 [ bp: Use the non-tracing rd/wrmsr variants because:
   vmlinux.o: warning: objtool: __sev_es_nmi_complete()+0x11f: \
	   call to do_trace_write_msr() leaves .noinstr.text section
   as __sev_es_nmi_complete() is noinstr due to being called from the
   NMI handler exc_nmi(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-39-joro@8bytes.org

											
										
										
											2020-09-07 15:15:39 +02:00
+								}
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+								static inline void __pval_terminate(u64 pfn, bool action, unsigned int page_size,
 												    int ret, u64 svsm_ret)
-												x86/sev-es: Compile early handler code into kernel image

Setup sev-es.c and include the code from the pre-decompression stage
to also build it into the image of the running kernel. Temporarily add
__maybe_unused annotations to avoid build warnings until the functions
get used.

 [ bp: Use the non-tracing rd/wrmsr variants because:
   vmlinux.o: warning: objtool: __sev_es_nmi_complete()+0x11f: \
	   call to do_trace_write_msr() leaves .noinstr.text section
   as __sev_es_nmi_complete() is noinstr due to being called from the
   NMI handler exc_nmi(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-39-joro@8bytes.org

											
										
										
											2020-09-07 15:15:39 +02:00
+								{
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									WARN(1, "PVALIDATE failure: pfn: 0x%llx, action: %u, size: %u, ret: %d, svsm_ret: 0x%llx\n",
 									     pfn, action, page_size, ret, svsm_ret);
-												x86/sev-es: Compile early handler code into kernel image

Setup sev-es.c and include the code from the pre-decompression stage
to also build it into the image of the running kernel. Temporarily add
__maybe_unused annotations to avoid build warnings until the functions
get used.

 [ bp: Use the non-tracing rd/wrmsr variants because:
   vmlinux.o: warning: objtool: __sev_es_nmi_complete()+0x11f: \
	   call to do_trace_write_msr() leaves .noinstr.text section
   as __sev_es_nmi_complete() is noinstr due to being called from the
   NMI handler exc_nmi(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-39-joro@8bytes.org

											
										
										
											2020-09-07 15:15:39 +02:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
 								}
-												x86/sev-es: Compile early handler code into kernel image

Setup sev-es.c and include the code from the pre-decompression stage
to also build it into the image of the running kernel. Temporarily add
__maybe_unused annotations to avoid build warnings until the functions
get used.

 [ bp: Use the non-tracing rd/wrmsr variants because:
   vmlinux.o: warning: objtool: __sev_es_nmi_complete()+0x11f: \
	   call to do_trace_write_msr() leaves .noinstr.text section
   as __sev_es_nmi_complete() is noinstr due to being called from the
   NMI handler exc_nmi(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-39-joro@8bytes.org

											
										
										
											2020-09-07 15:15:39 +02:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+								static void svsm_pval_terminate(struct svsm_pvalidate_call *pc, int ret, u64 svsm_ret)
 								{
 									unsigned int page_size;
 									bool action;
 									u64 pfn;
-												x86/sev-es: Compile early handler code into kernel image

Setup sev-es.c and include the code from the pre-decompression stage
to also build it into the image of the running kernel. Temporarily add
__maybe_unused annotations to avoid build warnings until the functions
get used.

 [ bp: Use the non-tracing rd/wrmsr variants because:
   vmlinux.o: warning: objtool: __sev_es_nmi_complete()+0x11f: \
	   call to do_trace_write_msr() leaves .noinstr.text section
   as __sev_es_nmi_complete() is noinstr due to being called from the
   NMI handler exc_nmi(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-39-joro@8bytes.org

											
										
										
											2020-09-07 15:15:39 +02:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									pfn = pc->entry[pc->cur_index].pfn;
 									action = pc->entry[pc->cur_index].action;
 									page_size = pc->entry[pc->cur_index].page_size;
-												x86/sev-es: Compile early handler code into kernel image

Setup sev-es.c and include the code from the pre-decompression stage
to also build it into the image of the running kernel. Temporarily add
__maybe_unused annotations to avoid build warnings until the functions
get used.

 [ bp: Use the non-tracing rd/wrmsr variants because:
   vmlinux.o: warning: objtool: __sev_es_nmi_complete()+0x11f: \
	   call to do_trace_write_msr() leaves .noinstr.text section
   as __sev_es_nmi_complete() is noinstr due to being called from the
   NMI handler exc_nmi(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-39-joro@8bytes.org

											
										
										
											2020-09-07 15:15:39 +02:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									__pval_terminate(pfn, action, page_size, ret, svsm_ret);
-												x86/sev-es: Compile early handler code into kernel image

Setup sev-es.c and include the code from the pre-decompression stage
to also build it into the image of the running kernel. Temporarily add
__maybe_unused annotations to avoid build warnings until the functions
get used.

 [ bp: Use the non-tracing rd/wrmsr variants because:
   vmlinux.o: warning: objtool: __sev_es_nmi_complete()+0x11f: \
	   call to do_trace_write_msr() leaves .noinstr.text section
   as __sev_es_nmi_complete() is noinstr due to being called from the
   NMI handler exc_nmi(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-39-joro@8bytes.org

											
										
										
											2020-09-07 15:15:39 +02:00
+								}
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+								static void pval_pages(struct snp_psc_desc *desc)
-												x86/sev-es: Handle MMIO events

Add a handler for #VC exceptions caused by MMIO intercepts. These
intercepts come along as nested page faults on pages with reserved
bits set.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
[ jroedel@suse.de: Adapt to VC handling framework ]
Co-developed-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-50-joro@8bytes.org

											
										
										
											2020-09-07 15:15:50 +02:00
+								{
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									struct psc_entry *e;
 									unsigned long vaddr;
 									unsigned int size;
 									unsigned int i;
 									bool validate;
 									u64 pfn;
 									int rc;
-												x86/sev-es: Handle MMIO events

Add a handler for #VC exceptions caused by MMIO intercepts. These
intercepts come along as nested page faults on pages with reserved
bits set.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
[ jroedel@suse.de: Adapt to VC handling framework ]
Co-developed-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-50-joro@8bytes.org

											
										
										
											2020-09-07 15:15:50 +02:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									for (i = 0; i <= desc->hdr.end_entry; i++) {
 										e = &desc->entries[i];
-												x86/sev-es: Handle MMIO events

Add a handler for #VC exceptions caused by MMIO intercepts. These
intercepts come along as nested page faults on pages with reserved
bits set.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
[ jroedel@suse.de: Adapt to VC handling framework ]
Co-developed-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-50-joro@8bytes.org

											
										
										
											2020-09-07 15:15:50 +02:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+										pfn = e->gfn;
 										vaddr = (unsigned long)pfn_to_kaddr(pfn);
 										size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
 										validate = e->operation == SNP_PAGE_STATE_PRIVATE;
-												x86/sev-es: Do not support MMIO to/from encrypted memory

MMIO memory is usually not mapped encrypted, so there is no reason to
support emulated MMIO when it is mapped encrypted.

Prevent a possible hypervisor attack where a RAM page is mapped as
an MMIO page in the nested page-table, so that any guest access to it
will trigger a #VC exception and leak the data on that page to the
hypervisor via the GHCB (like with valid MMIO). On the read side this
attack would allow the HV to inject data into the guest.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lkml.kernel.org/r/20201028164659.27002-6-joro@8bytes.org

											
										
										
											2020-10-28 17:46:59 +01:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+										rc = pvalidate(vaddr, size, validate);
 										if (!rc)
 											continue;
-												x86/sev-es: Handle MMIO events

Add a handler for #VC exceptions caused by MMIO intercepts. These
intercepts come along as nested page faults on pages with reserved
bits set.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
[ jroedel@suse.de: Adapt to VC handling framework ]
Co-developed-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-50-joro@8bytes.org

											
										
										
											2020-09-07 15:15:50 +02:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+										if (rc == PVALIDATE_FAIL_SIZEMISMATCH && size == RMP_PG_SIZE_2M) {
 											unsigned long vaddr_end = vaddr + PMD_SIZE;
-												x86/sev-es: Handle MMIO events

Add a handler for #VC exceptions caused by MMIO intercepts. These
intercepts come along as nested page faults on pages with reserved
bits set.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
[ jroedel@suse.de: Adapt to VC handling framework ]
Co-developed-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-50-joro@8bytes.org

											
										
										
											2020-09-07 15:15:50 +02:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+											for (; vaddr < vaddr_end; vaddr += PAGE_SIZE, pfn++) {
 												rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
 												if (rc)
 													__pval_terminate(pfn, validate, RMP_PG_SIZE_4K, rc, 0);
 											}
 										} else {
 											__pval_terminate(pfn, validate, size, rc, 0);
 										}
 									}
-												x86/sev-es: Handle MMIO events

Add a handler for #VC exceptions caused by MMIO intercepts. These
intercepts come along as nested page faults on pages with reserved
bits set.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
[ jroedel@suse.de: Adapt to VC handling framework ]
Co-developed-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-50-joro@8bytes.org

											
										
										
											2020-09-07 15:15:50 +02:00
+								}
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+								static u64 svsm_build_ca_from_pfn_range(u64 pfn, u64 pfn_end, bool action,
 													struct svsm_pvalidate_call *pc)
-												x86/sev: Check IOBM for IOIO exceptions from user-space

Check the IO permission bitmap (if present) before emulating IOIO #VC
exceptions for user-space. These permissions are checked by hardware
already before the #VC is raised, but due to the VC-handler decoding
race it needs to be checked again in software.

Fixes: 25189d08e516 ("x86/sev-es: Add support for handling IOIO exceptions")
Reported-by: Tom Dohrmann <erbse.13@gmx.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Tested-by: Tom Dohrmann <erbse.13@gmx.de>
Cc: <stable@kernel.org>

											
										
										
											2023-06-21 17:42:42 +02:00
+								{
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									struct svsm_pvalidate_entry *pe;
-												x86/sev: Check IOBM for IOIO exceptions from user-space

Check the IO permission bitmap (if present) before emulating IOIO #VC
exceptions for user-space. These permissions are checked by hardware
already before the #VC is raised, but due to the VC-handler decoding
race it needs to be checked again in software.

Fixes: 25189d08e516 ("x86/sev-es: Add support for handling IOIO exceptions")
Reported-by: Tom Dohrmann <erbse.13@gmx.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Tested-by: Tom Dohrmann <erbse.13@gmx.de>
Cc: <stable@kernel.org>

											
										
										
											2023-06-21 17:42:42 +02:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									/* Nothing in the CA yet */
 									pc->num_entries = 0;
 									pc->cur_index   = 0;
-												x86/sev: Check IOBM for IOIO exceptions from user-space

Check the IO permission bitmap (if present) before emulating IOIO #VC
exceptions for user-space. These permissions are checked by hardware
already before the #VC is raised, but due to the VC-handler decoding
race it needs to be checked again in software.

Fixes: 25189d08e516 ("x86/sev-es: Add support for handling IOIO exceptions")
Reported-by: Tom Dohrmann <erbse.13@gmx.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Tested-by: Tom Dohrmann <erbse.13@gmx.de>
Cc: <stable@kernel.org>

											
										
										
											2023-06-21 17:42:42 +02:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									pe = &pc->entry[0];
-												x86/sev: Check IOBM for IOIO exceptions from user-space

Check the IO permission bitmap (if present) before emulating IOIO #VC
exceptions for user-space. These permissions are checked by hardware
already before the #VC is raised, but due to the VC-handler decoding
race it needs to be checked again in software.

Fixes: 25189d08e516 ("x86/sev-es: Add support for handling IOIO exceptions")
Reported-by: Tom Dohrmann <erbse.13@gmx.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Tested-by: Tom Dohrmann <erbse.13@gmx.de>
Cc: <stable@kernel.org>

											
										
										
											2023-06-21 17:42:42 +02:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									while (pfn < pfn_end) {
 										pe->page_size = RMP_PG_SIZE_4K;
 										pe->action    = action;
 										pe->ignore_cf = 0;
 										pe->pfn       = pfn;
-												x86/sev: Check IOBM for IOIO exceptions from user-space

Check the IO permission bitmap (if present) before emulating IOIO #VC
exceptions for user-space. These permissions are checked by hardware
already before the #VC is raised, but due to the VC-handler decoding
race it needs to be checked again in software.

Fixes: 25189d08e516 ("x86/sev-es: Add support for handling IOIO exceptions")
Reported-by: Tom Dohrmann <erbse.13@gmx.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Tested-by: Tom Dohrmann <erbse.13@gmx.de>
Cc: <stable@kernel.org>

											
										
										
											2023-06-21 17:42:42 +02:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+										pe++;
 										pfn++;
-												x86/sev: Check IOBM for IOIO exceptions from user-space

Check the IO permission bitmap (if present) before emulating IOIO #VC
exceptions for user-space. These permissions are checked by hardware
already before the #VC is raised, but due to the VC-handler decoding
race it needs to be checked again in software.

Fixes: 25189d08e516 ("x86/sev-es: Add support for handling IOIO exceptions")
Reported-by: Tom Dohrmann <erbse.13@gmx.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Tested-by: Tom Dohrmann <erbse.13@gmx.de>
Cc: <stable@kernel.org>

											
										
										
											2023-06-21 17:42:42 +02:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+										pc->num_entries++;
 										if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT)
 											break;
 									}
-												x86/sev: Check IOBM for IOIO exceptions from user-space

Check the IO permission bitmap (if present) before emulating IOIO #VC
exceptions for user-space. These permissions are checked by hardware
already before the #VC is raised, but due to the VC-handler decoding
race it needs to be checked again in software.

Fixes: 25189d08e516 ("x86/sev-es: Add support for handling IOIO exceptions")
Reported-by: Tom Dohrmann <erbse.13@gmx.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Tested-by: Tom Dohrmann <erbse.13@gmx.de>
Cc: <stable@kernel.org>

											
										
										
											2023-06-21 17:42:42 +02:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									return pfn;
-												x86/sev: Check IOBM for IOIO exceptions from user-space

Check the IO permission bitmap (if present) before emulating IOIO #VC
exceptions for user-space. These permissions are checked by hardware
already before the #VC is raised, but due to the VC-handler decoding
race it needs to be checked again in software.

Fixes: 25189d08e516 ("x86/sev-es: Add support for handling IOIO exceptions")
Reported-by: Tom Dohrmann <erbse.13@gmx.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Tested-by: Tom Dohrmann <erbse.13@gmx.de>
Cc: <stable@kernel.org>

											
										
										
											2023-06-21 17:42:42 +02:00
+								}
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+								static int svsm_build_ca_from_psc_desc(struct snp_psc_desc *desc, unsigned int desc_entry,
 												       struct svsm_pvalidate_call *pc)
-												x86/sev: Use kernel provided SVSM Calling Areas

The SVSM Calling Area (CA) is used to communicate between Linux and the
SVSM. Since the firmware supplied CA for the BSP is likely to be in
reserved memory, switch off that CA to a kernel provided CA so that access
and use of the CA is available during boot. The CA switch is done using
the SVSM core protocol SVSM_CORE_REMAP_CA call.

An SVSM call is executed by filling out the SVSM CA and setting the proper
register state as documented by the SVSM protocol. The SVSM is invoked by
by requesting the hypervisor to run VMPL0.

Once it is safe to allocate/reserve memory, allocate a CA for each CPU.
After allocating the new CAs, the BSP will switch from the boot CA to the
per-CPU CA. The CA for an AP is identified to the SVSM when creating the
VMSA in preparation for booting the AP.

  [ bp: Heavily simplify svsm_issue_call() asm, other touchups. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/fa8021130bcc3bcf14d722a25548cb0cdf325456.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:46 -05:00
+								{
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									struct svsm_pvalidate_entry *pe;
 									struct psc_entry *e;
-												x86/sev: Use kernel provided SVSM Calling Areas

The SVSM Calling Area (CA) is used to communicate between Linux and the
SVSM. Since the firmware supplied CA for the BSP is likely to be in
reserved memory, switch off that CA to a kernel provided CA so that access
and use of the CA is available during boot. The CA switch is done using
the SVSM core protocol SVSM_CORE_REMAP_CA call.

An SVSM call is executed by filling out the SVSM CA and setting the proper
register state as documented by the SVSM protocol. The SVSM is invoked by
by requesting the hypervisor to run VMPL0.

Once it is safe to allocate/reserve memory, allocate a CA for each CPU.
After allocating the new CAs, the BSP will switch from the boot CA to the
per-CPU CA. The CA for an AP is identified to the SVSM when creating the
VMSA in preparation for booting the AP.

  [ bp: Heavily simplify svsm_issue_call() asm, other touchups. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/fa8021130bcc3bcf14d722a25548cb0cdf325456.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:46 -05:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									/* Nothing in the CA yet */
 									pc->num_entries = 0;
 									pc->cur_index   = 0;
-												x86/sev-es: Setup GHCB-based boot #VC handler

Add the infrastructure to handle #VC exceptions when the kernel runs on
virtual addresses and has mapped a GHCB. This handler will be used until
the runtime #VC handler takes over.

Since the handler runs very early, disable instrumentation for sev-es.c.

 [ bp: Make vc_ghcb_invalidate() __always_inline so that it can be
   inlined in noinstr functions like __sev_es_nmi_complete(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200908123816.GB3764@8bytes.org

											
										
										
											2020-09-08 14:38:16 +02:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									pe = &pc->entry[0];
 									e  = &desc->entries[desc_entry];
-												x86/sev-es: Move sev_es_put_ghcb() in prep for follow on patch

Move the location of sev_es_put_ghcb() in preparation for an update to it
in a follow-on patch. This will better highlight the changes being made
to the function.

No functional change.

Fixes: 0786138c78e79 ("x86/sev-es: Add a Runtime #VC Exception Handler")
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/8c07662ec17d3d82e5c53841a1d9e766d3bdbab6.1621273353.git.thomas.lendacky@amd.com

											
										
										
											2021-05-17 12:42:32 -05:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									while (desc_entry <= desc->hdr.end_entry) {
 										pe->page_size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
 										pe->action    = e->operation == SNP_PAGE_STATE_PRIVATE;
 										pe->ignore_cf = 0;
 										pe->pfn       = e->gfn;
-												x86/sev: Make sure IRQs are disabled while GHCB is active

The #VC handler only cares about IRQs being disabled while the GHCB is
active, as it must not be interrupted by something which could cause
another #VC while it holds the GHCB (NMI is the exception for which the
backup GHCB exits).

Make sure nothing interrupts the code path while the GHCB is active
by making sure that callers of __sev_{get,put}_ghcb() have disabled
interrupts upfront.

 [ bp: Massage commit message. ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210618115409.22735-2-joro@8bytes.org

											
										
										
											2021-06-18 13:54:08 +02:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+										pe++;
 										e++;
-												x86/sev-es: Move sev_es_put_ghcb() in prep for follow on patch

Move the location of sev_es_put_ghcb() in preparation for an update to it
in a follow-on patch. This will better highlight the changes being made
to the function.

No functional change.

Fixes: 0786138c78e79 ("x86/sev-es: Add a Runtime #VC Exception Handler")
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/8c07662ec17d3d82e5c53841a1d9e766d3bdbab6.1621273353.git.thomas.lendacky@amd.com

											
										
										
											2021-05-17 12:42:32 -05:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+										desc_entry++;
 										pc->num_entries++;
 										if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT)
 											break;
-												x86/sev-es: Move sev_es_put_ghcb() in prep for follow on patch

Move the location of sev_es_put_ghcb() in preparation for an update to it
in a follow-on patch. This will better highlight the changes being made
to the function.

No functional change.

Fixes: 0786138c78e79 ("x86/sev-es: Add a Runtime #VC Exception Handler")
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/8c07662ec17d3d82e5c53841a1d9e766d3bdbab6.1621273353.git.thomas.lendacky@amd.com

											
										
										
											2021-05-17 12:42:32 -05:00
+									}
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
 									return desc_entry;
-												x86/sev-es: Move sev_es_put_ghcb() in prep for follow on patch

Move the location of sev_es_put_ghcb() in preparation for an update to it
in a follow-on patch. This will better highlight the changes being made
to the function.

No functional change.

Fixes: 0786138c78e79 ("x86/sev-es: Add a Runtime #VC Exception Handler")
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/8c07662ec17d3d82e5c53841a1d9e766d3bdbab6.1621273353.git.thomas.lendacky@amd.com

											
										
										
											2021-05-17 12:42:32 -05:00
+								}
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+								static void svsm_pval_pages(struct snp_psc_desc *desc)
-												x86/sev: Use kernel provided SVSM Calling Areas

The SVSM Calling Area (CA) is used to communicate between Linux and the
SVSM. Since the firmware supplied CA for the BSP is likely to be in
reserved memory, switch off that CA to a kernel provided CA so that access
and use of the CA is available during boot. The CA switch is done using
the SVSM core protocol SVSM_CORE_REMAP_CA call.

An SVSM call is executed by filling out the SVSM CA and setting the proper
register state as documented by the SVSM protocol. The SVSM is invoked by
by requesting the hypervisor to run VMPL0.

Once it is safe to allocate/reserve memory, allocate a CA for each CPU.
After allocating the new CAs, the BSP will switch from the boot CA to the
per-CPU CA. The CA for an AP is identified to the SVSM when creating the
VMSA in preparation for booting the AP.

  [ bp: Heavily simplify svsm_issue_call() asm, other touchups. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/fa8021130bcc3bcf14d722a25548cb0cdf325456.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:46 -05:00
+								{
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									struct svsm_pvalidate_entry pv_4k[VMGEXIT_PSC_MAX_ENTRY];
 									unsigned int i, pv_4k_count = 0;
 									struct svsm_pvalidate_call *pc;
 									struct svsm_call call = {};
-												x86/sev: Use kernel provided SVSM Calling Areas

The SVSM Calling Area (CA) is used to communicate between Linux and the
SVSM. Since the firmware supplied CA for the BSP is likely to be in
reserved memory, switch off that CA to a kernel provided CA so that access
and use of the CA is available during boot. The CA switch is done using
the SVSM core protocol SVSM_CORE_REMAP_CA call.

An SVSM call is executed by filling out the SVSM CA and setting the proper
register state as documented by the SVSM protocol. The SVSM is invoked by
by requesting the hypervisor to run VMPL0.

Once it is safe to allocate/reserve memory, allocate a CA for each CPU.
After allocating the new CAs, the BSP will switch from the boot CA to the
per-CPU CA. The CA for an AP is identified to the SVSM when creating the
VMSA in preparation for booting the AP.

  [ bp: Heavily simplify svsm_issue_call() asm, other touchups. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/fa8021130bcc3bcf14d722a25548cb0cdf325456.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:46 -05:00
+									unsigned long flags;
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									bool action;
 									u64 pc_pa;
-												x86/sev: Use kernel provided SVSM Calling Areas

The SVSM Calling Area (CA) is used to communicate between Linux and the
SVSM. Since the firmware supplied CA for the BSP is likely to be in
reserved memory, switch off that CA to a kernel provided CA so that access
and use of the CA is available during boot. The CA switch is done using
the SVSM core protocol SVSM_CORE_REMAP_CA call.

An SVSM call is executed by filling out the SVSM CA and setting the proper
register state as documented by the SVSM protocol. The SVSM is invoked by
by requesting the hypervisor to run VMPL0.

Once it is safe to allocate/reserve memory, allocate a CA for each CPU.
After allocating the new CAs, the BSP will switch from the boot CA to the
per-CPU CA. The CA for an AP is identified to the SVSM when creating the
VMSA in preparation for booting the AP.

  [ bp: Heavily simplify svsm_issue_call() asm, other touchups. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/fa8021130bcc3bcf14d722a25548cb0cdf325456.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:46 -05:00
+									int ret;
 									/*
 									 * This can be called very early in the boot, use native functions in
 									 * order to avoid paravirt issues.
 									 */
 									flags = native_local_irq_save();
 									/*
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									 * The SVSM calling area (CA) can support processing 510 entries at a
 									 * time. Loop through the Page State Change descriptor until the CA is
 									 * full or the last entry in the descriptor is reached, at which time
 									 * the SVSM is invoked. This repeats until all entries in the descriptor
 									 * are processed.
-												x86/sev: Use kernel provided SVSM Calling Areas

The SVSM Calling Area (CA) is used to communicate between Linux and the
SVSM. Since the firmware supplied CA for the BSP is likely to be in
reserved memory, switch off that CA to a kernel provided CA so that access
and use of the CA is available during boot. The CA switch is done using
the SVSM core protocol SVSM_CORE_REMAP_CA call.

An SVSM call is executed by filling out the SVSM CA and setting the proper
register state as documented by the SVSM protocol. The SVSM is invoked by
by requesting the hypervisor to run VMPL0.

Once it is safe to allocate/reserve memory, allocate a CA for each CPU.
After allocating the new CAs, the BSP will switch from the boot CA to the
per-CPU CA. The CA for an AP is identified to the SVSM when creating the
VMSA in preparation for booting the AP.

  [ bp: Heavily simplify svsm_issue_call() asm, other touchups. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/fa8021130bcc3bcf14d722a25548cb0cdf325456.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:46 -05:00
+									 */
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									call.caa = svsm_get_caa();
-												x86/sev: Use kernel provided SVSM Calling Areas

The SVSM Calling Area (CA) is used to communicate between Linux and the
SVSM. Since the firmware supplied CA for the BSP is likely to be in
reserved memory, switch off that CA to a kernel provided CA so that access
and use of the CA is available during boot. The CA switch is done using
the SVSM core protocol SVSM_CORE_REMAP_CA call.

An SVSM call is executed by filling out the SVSM CA and setting the proper
register state as documented by the SVSM protocol. The SVSM is invoked by
by requesting the hypervisor to run VMPL0.

Once it is safe to allocate/reserve memory, allocate a CA for each CPU.
After allocating the new CAs, the BSP will switch from the boot CA to the
per-CPU CA. The CA for an AP is identified to the SVSM when creating the
VMSA in preparation for booting the AP.

  [ bp: Heavily simplify svsm_issue_call() asm, other touchups. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/fa8021130bcc3bcf14d722a25548cb0cdf325456.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:46 -05:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									pc = (struct svsm_pvalidate_call *)call.caa->svsm_buffer;
 									pc_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer);
-												x86/sev: Use kernel provided SVSM Calling Areas

The SVSM Calling Area (CA) is used to communicate between Linux and the
SVSM. Since the firmware supplied CA for the BSP is likely to be in
reserved memory, switch off that CA to a kernel provided CA so that access
and use of the CA is available during boot. The CA switch is done using
the SVSM core protocol SVSM_CORE_REMAP_CA call.

An SVSM call is executed by filling out the SVSM CA and setting the proper
register state as documented by the SVSM protocol. The SVSM is invoked by
by requesting the hypervisor to run VMPL0.

Once it is safe to allocate/reserve memory, allocate a CA for each CPU.
After allocating the new CAs, the BSP will switch from the boot CA to the
per-CPU CA. The CA for an AP is identified to the SVSM when creating the
VMSA in preparation for booting the AP.

  [ bp: Heavily simplify svsm_issue_call() asm, other touchups. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/fa8021130bcc3bcf14d722a25548cb0cdf325456.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:46 -05:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									/* Protocol 0, Call ID 1 */
 									call.rax = SVSM_CORE_CALL(SVSM_CORE_PVALIDATE);
 									call.rcx = pc_pa;
-												x86/sev: Use kernel provided SVSM Calling Areas

The SVSM Calling Area (CA) is used to communicate between Linux and the
SVSM. Since the firmware supplied CA for the BSP is likely to be in
reserved memory, switch off that CA to a kernel provided CA so that access
and use of the CA is available during boot. The CA switch is done using
the SVSM core protocol SVSM_CORE_REMAP_CA call.

An SVSM call is executed by filling out the SVSM CA and setting the proper
register state as documented by the SVSM protocol. The SVSM is invoked by
by requesting the hypervisor to run VMPL0.

Once it is safe to allocate/reserve memory, allocate a CA for each CPU.
After allocating the new CAs, the BSP will switch from the boot CA to the
per-CPU CA. The CA for an AP is identified to the SVSM when creating the
VMSA in preparation for booting the AP.

  [ bp: Heavily simplify svsm_issue_call() asm, other touchups. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/fa8021130bcc3bcf14d722a25548cb0cdf325456.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:46 -05:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									for (i = 0; i <= desc->hdr.end_entry;) {
 										i = svsm_build_ca_from_psc_desc(desc, i, pc);
-												x86/sev: Use kernel provided SVSM Calling Areas

The SVSM Calling Area (CA) is used to communicate between Linux and the
SVSM. Since the firmware supplied CA for the BSP is likely to be in
reserved memory, switch off that CA to a kernel provided CA so that access
and use of the CA is available during boot. The CA switch is done using
the SVSM core protocol SVSM_CORE_REMAP_CA call.

An SVSM call is executed by filling out the SVSM CA and setting the proper
register state as documented by the SVSM protocol. The SVSM is invoked by
by requesting the hypervisor to run VMPL0.

Once it is safe to allocate/reserve memory, allocate a CA for each CPU.
After allocating the new CAs, the BSP will switch from the boot CA to the
per-CPU CA. The CA for an AP is identified to the SVSM when creating the
VMSA in preparation for booting the AP.

  [ bp: Heavily simplify svsm_issue_call() asm, other touchups. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/fa8021130bcc3bcf14d722a25548cb0cdf325456.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:46 -05:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+										do {
 											ret = svsm_perform_call_protocol(&call);
 											if (!ret)
 												continue;
-												x86/sev: Use kernel provided SVSM Calling Areas

The SVSM Calling Area (CA) is used to communicate between Linux and the
SVSM. Since the firmware supplied CA for the BSP is likely to be in
reserved memory, switch off that CA to a kernel provided CA so that access
and use of the CA is available during boot. The CA switch is done using
the SVSM core protocol SVSM_CORE_REMAP_CA call.

An SVSM call is executed by filling out the SVSM CA and setting the proper
register state as documented by the SVSM protocol. The SVSM is invoked by
by requesting the hypervisor to run VMPL0.

Once it is safe to allocate/reserve memory, allocate a CA for each CPU.
After allocating the new CAs, the BSP will switch from the boot CA to the
per-CPU CA. The CA for an AP is identified to the SVSM when creating the
VMSA in preparation for booting the AP.

  [ bp: Heavily simplify svsm_issue_call() asm, other touchups. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/fa8021130bcc3bcf14d722a25548cb0cdf325456.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:46 -05:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+											/*
 											 * Check if the entry failed because of an RMP mismatch (a
 											 * PVALIDATE at 2M was requested, but the page is mapped in
 											 * the RMP as 4K).
 											 */
-												x86/sev: Get the AP jump table address from secrets page

The GHCB specification section 2.7 states that when SEV-SNP is enabled,
a guest should not rely on the hypervisor to provide the address of the
AP jump table. Instead, if a guest BIOS wants to provide an AP jump
table, it should record the address in the SNP secrets page so the guest
operating system can obtain it directly from there.

Fix this on the guest kernel side by having SNP guests use the AP jump
table address published in the secrets page rather than issuing a GHCB
request to get it.

  [ mroth:
    - Improve error handling when ioremap()/memremap() return NULL
    - Don't mix function calls with declarations
    - Add missing __init
    - Tweak commit message ]

Fixes: 0afb6b660a6b ("x86/sev: Use SEV-SNP AP creation to start secondary CPUs")
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220422135624.114172-3-michael.roth@amd.com

											
										
										
											2022-04-22 08:56:24 -05:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+											if (call.rax_out == SVSM_PVALIDATE_FAIL_SIZEMISMATCH &&
 											    pc->entry[pc->cur_index].page_size == RMP_PG_SIZE_2M) {
 												/* Save this entry for post-processing at 4K */
 												pv_4k[pv_4k_count++] = pc->entry[pc->cur_index];
 												/* Skip to the next one unless at the end of the list */
 												pc->cur_index++;
 												if (pc->cur_index < pc->num_entries)
 													ret = -EAGAIN;
 												else
 													ret = 0;
 											}
 										} while (ret == -EAGAIN);
 										if (ret)
 											svsm_pval_terminate(pc, ret, call.rax_out);
-												x86/sev: Get the AP jump table address from secrets page

The GHCB specification section 2.7 states that when SEV-SNP is enabled,
a guest should not rely on the hypervisor to provide the address of the
AP jump table. Instead, if a guest BIOS wants to provide an AP jump
table, it should record the address in the SNP secrets page so the guest
operating system can obtain it directly from there.

Fix this on the guest kernel side by having SNP guests use the AP jump
table address published in the secrets page rather than issuing a GHCB
request to get it.

  [ mroth:
    - Improve error handling when ioremap()/memremap() return NULL
    - Don't mix function calls with declarations
    - Add missing __init
    - Tweak commit message ]

Fixes: 0afb6b660a6b ("x86/sev: Use SEV-SNP AP creation to start secondary CPUs")
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220422135624.114172-3-michael.roth@amd.com

											
										
										
											2022-04-22 08:56:24 -05:00
+									}
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									/* Process any entries that failed to be validated at 2M and validate them at 4K */
 									for (i = 0; i < pv_4k_count; i++) {
 										u64 pfn, pfn_end;
-												x86/sev: Fix address space sparse warning

Fix:

  arch/x86/kernel/sev.c:605:16: warning: incorrect type in assignment (different address spaces)
  arch/x86/kernel/sev.c:605:16:    expected struct snp_secrets_page_layout *layout
  arch/x86/kernel/sev.c:605:16:    got void [noderef] __iomem *[assigned] mem

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/202205022233.XgNDR7WR-lkp@intel.com

											
										
										
											2022-05-02 17:33:40 +02:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+										action  = pv_4k[i].action;
 										pfn     = pv_4k[i].pfn;
 										pfn_end = pfn + 512;
-												x86/sev: Get the AP jump table address from secrets page

The GHCB specification section 2.7 states that when SEV-SNP is enabled,
a guest should not rely on the hypervisor to provide the address of the
AP jump table. Instead, if a guest BIOS wants to provide an AP jump
table, it should record the address in the SNP secrets page so the guest
operating system can obtain it directly from there.

Fix this on the guest kernel side by having SNP guests use the AP jump
table address published in the secrets page rather than issuing a GHCB
request to get it.

  [ mroth:
    - Improve error handling when ioremap()/memremap() return NULL
    - Don't mix function calls with declarations
    - Add missing __init
    - Tweak commit message ]

Fixes: 0afb6b660a6b ("x86/sev: Use SEV-SNP AP creation to start secondary CPUs")
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220422135624.114172-3-michael.roth@amd.com

											
										
										
											2022-04-22 08:56:24 -05:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+										while (pfn < pfn_end) {
 											pfn = svsm_build_ca_from_pfn_range(pfn, pfn_end, action, pc);
-												x86/sev: Get the AP jump table address from secrets page

The GHCB specification section 2.7 states that when SEV-SNP is enabled,
a guest should not rely on the hypervisor to provide the address of the
AP jump table. Instead, if a guest BIOS wants to provide an AP jump
table, it should record the address in the SNP secrets page so the guest
operating system can obtain it directly from there.

Fix this on the guest kernel side by having SNP guests use the AP jump
table address published in the secrets page rather than issuing a GHCB
request to get it.

  [ mroth:
    - Improve error handling when ioremap()/memremap() return NULL
    - Don't mix function calls with declarations
    - Add missing __init
    - Tweak commit message ]

Fixes: 0afb6b660a6b ("x86/sev: Use SEV-SNP AP creation to start secondary CPUs")
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220422135624.114172-3-michael.roth@amd.com

											
										
										
											2022-04-22 08:56:24 -05:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+											ret = svsm_perform_call_protocol(&call);
 											if (ret)
 												svsm_pval_terminate(pc, ret, call.rax_out);
 										}
 									}
-												x86/realmode: Setup AP jump table

As part of the GHCB specification, the booting of APs under SEV-ES
requires an AP jump table when transitioning from one layer of code to
another (e.g. when going from UEFI to the OS). As a result, each layer
that parks an AP must provide the physical address of an AP jump table
to the next layer via the hypervisor.

Upon booting of the kernel, read the AP jump table address from the
hypervisor. Under SEV-ES, APs are started using the INIT-SIPI-SIPI
sequence. Before issuing the first SIPI request for an AP, the start
CS and IP is programmed into the AP jump table. Upon issuing the SIPI
request, the AP will awaken and jump to that start CS:IP address.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
[ jroedel@suse.de: - Adapted to different code base
                   - Moved AP table setup from SIPI sending path to
		     real-mode setup code
		   - Fix sparse warnings ]
Co-developed-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-67-joro@8bytes.org

											
										
										
											2020-09-07 15:16:07 +02:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									native_local_irq_restore(flags);
 								}
-												x86/sev: Get the AP jump table address from secrets page

The GHCB specification section 2.7 states that when SEV-SNP is enabled,
a guest should not rely on the hypervisor to provide the address of the
AP jump table. Instead, if a guest BIOS wants to provide an AP jump
table, it should record the address in the SNP secrets page so the guest
operating system can obtain it directly from there.

Fix this on the guest kernel side by having SNP guests use the AP jump
table address published in the secrets page rather than issuing a GHCB
request to get it.

  [ mroth:
    - Improve error handling when ioremap()/memremap() return NULL
    - Don't mix function calls with declarations
    - Add missing __init
    - Tweak commit message ]

Fixes: 0afb6b660a6b ("x86/sev: Use SEV-SNP AP creation to start secondary CPUs")
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220422135624.114172-3-michael.roth@amd.com

											
										
										
											2022-04-22 08:56:24 -05:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+								static void pvalidate_pages(struct snp_psc_desc *desc)
 								{
 									if (snp_vmpl)
 										svsm_pval_pages(desc);
 									else
 										pval_pages(desc);
 								}
-												x86/realmode: Setup AP jump table

As part of the GHCB specification, the booting of APs under SEV-ES
requires an AP jump table when transitioning from one layer of code to
another (e.g. when going from UEFI to the OS). As a result, each layer
that parks an AP must provide the physical address of an AP jump table
to the next layer via the hypervisor.

Upon booting of the kernel, read the AP jump table address from the
hypervisor. Under SEV-ES, APs are started using the INIT-SIPI-SIPI
sequence. Before issuing the first SIPI request for an AP, the start
CS and IP is programmed into the AP jump table. Upon issuing the SIPI
request, the AP will awaken and jump to that start CS:IP address.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
[ jroedel@suse.de: - Adapted to different code base
                   - Moved AP table setup from SIPI sending path to
		     real-mode setup code
		   - Fix sparse warnings ]
Co-developed-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-67-joro@8bytes.org

											
										
										
											2020-09-07 15:16:07 +02:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+								static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc)
 								{
 									int cur_entry, end_entry, ret = 0;
 									struct snp_psc_desc *data;
 									struct es_em_ctxt ctxt;
-												x86/realmode: Setup AP jump table

As part of the GHCB specification, the booting of APs under SEV-ES
requires an AP jump table when transitioning from one layer of code to
another (e.g. when going from UEFI to the OS). As a result, each layer
that parks an AP must provide the physical address of an AP jump table
to the next layer via the hypervisor.

Upon booting of the kernel, read the AP jump table address from the
hypervisor. Under SEV-ES, APs are started using the INIT-SIPI-SIPI
sequence. Before issuing the first SIPI request for an AP, the start
CS and IP is programmed into the AP jump table. Upon issuing the SIPI
request, the AP will awaken and jump to that start CS:IP address.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
[ jroedel@suse.de: - Adapted to different code base
                   - Moved AP table setup from SIPI sending path to
		     real-mode setup code
		   - Fix sparse warnings ]
Co-developed-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-67-joro@8bytes.org

											
										
										
											2020-09-07 15:16:07 +02:00
 									vc_ghcb_invalidate(ghcb);
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									/* Copy the input desc into GHCB shared buffer */
 									data = (struct snp_psc_desc *)ghcb->shared_buffer;
 									memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc)));
-												x86/sev: Use large PSC requests if applicable

In advance of providing support for unaccepted memory, request 2M Page
State Change (PSC) requests when the address range allows for it. By using
a 2M page size, more PSC operations can be handled in a single request to
the hypervisor. The hypervisor will determine if it can accommodate the
larger request by checking the mapping in the nested page table. If mapped
as a large page, then the 2M page request can be performed, otherwise the
2M page request will be broken down into 512 4K page requests. This is
still more efficient than having the guest perform multiple PSC requests
in order to process the 512 4K pages.

In conjunction with the 2M PSC requests, attempt to perform the associated
PVALIDATE instruction of the page using the 2M page size. If PVALIDATE
fails with a size mismatch, then fallback to validating 512 4K pages. To
do this, page validation is modified to work with the PSC structure and
not just a virtual address range.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/050d17b460dfc237b51d72082e5df4498d3513cb.1686063086.git.thomas.lendacky@amd.com

											
										
										
											2023-06-06 09:51:25 -05:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									/*
 									 * As per the GHCB specification, the hypervisor can resume the guest
 									 * before processing all the entries. Check whether all the entries
 									 * are processed. If not, then keep retrying. Note, the hypervisor
 									 * will update the data memory directly to indicate the status, so
 									 * reference the data->hdr everywhere.
 									 *
 									 * The strategy here is to wait for the hypervisor to change the page
 									 * state in the RMP table before guest accesses the memory pages. If the
 									 * page state change was not successful, then later memory access will
 									 * result in a crash.
 									 */
 									cur_entry = data->hdr.cur_entry;
 									end_entry = data->hdr.end_entry;
-												x86/sev: Add helper for validating pages in early enc attribute changes

early_set_memory_{encrypted,decrypted}() are used for changing the page
state from decrypted (shared) to encrypted (private) and vice versa.

When SEV-SNP is active, the page state transition needs to go through
additional steps.

If the page is transitioned from shared to private, then perform the
following after the encryption attribute is set in the page table:

1. Issue the page state change VMGEXIT to add the page as a private
   in the RMP table.
2. Validate the page after its successfully added in the RMP table.

To maintain the security guarantees, if the page is transitioned from
private to shared, then perform the following before clearing the
encryption attribute from the page table.

1. Invalidate the page.
2. Issue the page state change VMGEXIT to make the page shared in the
   RMP table.

early_set_memory_{encrypted,decrypted}() can be called before the GHCB
is setup so use the SNP page state MSR protocol VMGEXIT defined in the
GHCB specification to request the page state change in the RMP table.

While at it, add a helper snp_prep_memory() which will be used in
probe_roms(), in a later patch.

  [ bp: Massage commit message. ]

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Venu Busireddy <venu.busireddy@oracle.com>
Link: https://lore.kernel.org/r/20220307213356.2797205-19-brijesh.singh@amd.com

											
										
										
											2022-02-09 12:10:12 -06:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+									while (data->hdr.cur_entry <= data->hdr.end_entry) {
 										ghcb_set_sw_scratch(ghcb, (u64)__pa(data));
-												x86/sev: Add helper for validating pages in early enc attribute changes

early_set_memory_{encrypted,decrypted}() are used for changing the page
state from decrypted (shared) to encrypted (private) and vice versa.

When SEV-SNP is active, the page state transition needs to go through
additional steps.

If the page is transitioned from shared to private, then perform the
following after the encryption attribute is set in the page table:

1. Issue the page state change VMGEXIT to add the page as a private
   in the RMP table.
2. Validate the page after its successfully added in the RMP table.

To maintain the security guarantees, if the page is transitioned from
private to shared, then perform the following before clearing the
encryption attribute from the page table.

1. Invalidate the page.
2. Issue the page state change VMGEXIT to make the page shared in the
   RMP table.

early_set_memory_{encrypted,decrypted}() can be called before the GHCB
is setup so use the SNP page state MSR protocol VMGEXIT defined in the
GHCB specification to request the page state change in the RMP table.

While at it, add a helper snp_prep_memory() which will be used in
probe_roms(), in a later patch.

  [ bp: Massage commit message. ]

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Venu Busireddy <venu.busireddy@oracle.com>
Link: https://lore.kernel.org/r/20220307213356.2797205-19-brijesh.singh@amd.com

											
										
										
											2022-02-09 12:10:12 -06:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+										/* This will advance the shared buffer data points to. */
 										ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0);
-												x86/sev: Use large PSC requests if applicable

In advance of providing support for unaccepted memory, request 2M Page
State Change (PSC) requests when the address range allows for it. By using
a 2M page size, more PSC operations can be handled in a single request to
the hypervisor. The hypervisor will determine if it can accommodate the
larger request by checking the mapping in the nested page table. If mapped
as a large page, then the 2M page request can be performed, otherwise the
2M page request will be broken down into 512 4K page requests. This is
still more efficient than having the guest perform multiple PSC requests
in order to process the 512 4K pages.

In conjunction with the 2M PSC requests, attempt to perform the associated
PVALIDATE instruction of the page using the 2M page size. If PVALIDATE
fails with a size mismatch, then fallback to validating 512 4K pages. To
do this, page validation is modified to work with the PSC structure and
not just a virtual address range.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/050d17b460dfc237b51d72082e5df4498d3513cb.1686063086.git.thomas.lendacky@amd.com

											
										
										
											2023-06-06 09:51:25 -05:00
-												x86/sev: Add helper for validating pages in early enc attribute changes

early_set_memory_{encrypted,decrypted}() are used for changing the page
state from decrypted (shared) to encrypted (private) and vice versa.

When SEV-SNP is active, the page state transition needs to go through
additional steps.

If the page is transitioned from shared to private, then perform the
following after the encryption attribute is set in the page table:

1. Issue the page state change VMGEXIT to add the page as a private
   in the RMP table.
2. Validate the page after its successfully added in the RMP table.

To maintain the security guarantees, if the page is transitioned from
private to shared, then perform the following before clearing the
encryption attribute from the page table.

1. Invalidate the page.
2. Issue the page state change VMGEXIT to make the page shared in the
   RMP table.

early_set_memory_{encrypted,decrypted}() can be called before the GHCB
is setup so use the SNP page state MSR protocol VMGEXIT defined in the
GHCB specification to request the page state change in the RMP table.

While at it, add a helper snp_prep_memory() which will be used in
probe_roms(), in a later patch.

  [ bp: Massage commit message. ]

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Venu Busireddy <venu.busireddy@oracle.com>
Link: https://lore.kernel.org/r/20220307213356.2797205-19-brijesh.singh@amd.com

											
										
										
											2022-02-09 12:10:12 -06:00
+										/*
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+										 * Page State Change VMGEXIT can pass error code through
 										 * exit_info_2.
-												x86/sev: Add helper for validating pages in early enc attribute changes

early_set_memory_{encrypted,decrypted}() are used for changing the page
state from decrypted (shared) to encrypted (private) and vice versa.

When SEV-SNP is active, the page state transition needs to go through
additional steps.

If the page is transitioned from shared to private, then perform the
following after the encryption attribute is set in the page table:

1. Issue the page state change VMGEXIT to add the page as a private
   in the RMP table.
2. Validate the page after its successfully added in the RMP table.

To maintain the security guarantees, if the page is transitioned from
private to shared, then perform the following before clearing the
encryption attribute from the page table.

1. Invalidate the page.
2. Issue the page state change VMGEXIT to make the page shared in the
   RMP table.

early_set_memory_{encrypted,decrypted}() can be called before the GHCB
is setup so use the SNP page state MSR protocol VMGEXIT defined in the
GHCB specification to request the page state change in the RMP table.

While at it, add a helper snp_prep_memory() which will be used in
probe_roms(), in a later patch.

  [ bp: Massage commit message. ]

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Venu Busireddy <venu.busireddy@oracle.com>
Link: https://lore.kernel.org/r/20220307213356.2797205-19-brijesh.singh@amd.com

											
										
										
											2022-02-09 12:10:12 -06:00
+										 */
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+										if (WARN(ret || ghcb->save.sw_exit_info_2,
 											 "SNP: PSC failed ret=%d exit_info_2=%llx\n",
 											 ret, ghcb->save.sw_exit_info_2)) {
 											ret = 1;
 											goto out;
 										}
-												x86/sev: Add helper for validating pages in early enc attribute changes

early_set_memory_{encrypted,decrypted}() are used for changing the page
state from decrypted (shared) to encrypted (private) and vice versa.

When SEV-SNP is active, the page state transition needs to go through
additional steps.

If the page is transitioned from shared to private, then perform the
following after the encryption attribute is set in the page table:

1. Issue the page state change VMGEXIT to add the page as a private
   in the RMP table.
2. Validate the page after its successfully added in the RMP table.

To maintain the security guarantees, if the page is transitioned from
private to shared, then perform the following before clearing the
encryption attribute from the page table.

1. Invalidate the page.
2. Issue the page state change VMGEXIT to make the page shared in the
   RMP table.

early_set_memory_{encrypted,decrypted}() can be called before the GHCB
is setup so use the SNP page state MSR protocol VMGEXIT defined in the
GHCB specification to request the page state change in the RMP table.

While at it, add a helper snp_prep_memory() which will be used in
probe_roms(), in a later patch.

  [ bp: Massage commit message. ]

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Venu Busireddy <venu.busireddy@oracle.com>
Link: https://lore.kernel.org/r/20220307213356.2797205-19-brijesh.singh@amd.com

											
										
										
											2022-02-09 12:10:12 -06:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+										/* Verify that reserved bit is not set */
 										if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) {
 											ret = 1;
 											goto out;
 										}
-												x86/sev: Use large PSC requests if applicable

In advance of providing support for unaccepted memory, request 2M Page
State Change (PSC) requests when the address range allows for it. By using
a 2M page size, more PSC operations can be handled in a single request to
the hypervisor. The hypervisor will determine if it can accommodate the
larger request by checking the mapping in the nested page table. If mapped
as a large page, then the 2M page request can be performed, otherwise the
2M page request will be broken down into 512 4K page requests. This is
still more efficient than having the guest perform multiple PSC requests
in order to process the 512 4K pages.

In conjunction with the 2M PSC requests, attempt to perform the associated
PVALIDATE instruction of the page using the 2M page size. If PVALIDATE
fails with a size mismatch, then fallback to validating 512 4K pages. To
do this, page validation is modified to work with the PSC structure and
not just a virtual address range.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/050d17b460dfc237b51d72082e5df4498d3513cb.1686063086.git.thomas.lendacky@amd.com

											
										
										
											2023-06-06 09:51:25 -05:00
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+										/*
 										 * Sanity check that entry processing is not going backwards.
 										 * This will happen only if hypervisor is tricking us.
 										 */
 										if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry,
 								"SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n",
 											 end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) {
 											ret = 1;
 											goto out;
 										}
-												x86/sev: Add helper for validating pages in early enc attribute changes

early_set_memory_{encrypted,decrypted}() are used for changing the page
state from decrypted (shared) to encrypted (private) and vice versa.

When SEV-SNP is active, the page state transition needs to go through
additional steps.

If the page is transitioned from shared to private, then perform the
following after the encryption attribute is set in the page table:

1. Issue the page state change VMGEXIT to add the page as a private
   in the RMP table.
2. Validate the page after its successfully added in the RMP table.

To maintain the security guarantees, if the page is transitioned from
private to shared, then perform the following before clearing the
encryption attribute from the page table.

1. Invalidate the page.
2. Issue the page state change VMGEXIT to make the page shared in the
   RMP table.

early_set_memory_{encrypted,decrypted}() can be called before the GHCB
is setup so use the SNP page state MSR protocol VMGEXIT defined in the
GHCB specification to request the page state change in the RMP table.

While at it, add a helper snp_prep_memory() which will be used in
probe_roms(), in a later patch.

  [ bp: Massage commit message. ]

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Venu Busireddy <venu.busireddy@oracle.com>
Link: https://lore.kernel.org/r/20220307213356.2797205-19-brijesh.singh@amd.com

											
										
										
											2022-02-09 12:10:12 -06:00
+									}
-												x86/sev: Split off startup code from core code

Disentangle the SEV core code and the SEV code that is called during
early boot. The latter piece will be moved into startup/ in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250418141253.2601348-11-ardb+git@google.com

											
										
										
											2025-04-18 16:12:57 +02:00
+								out:
 									return ret;
-												x86/sev: Add helper for validating pages in early enc attribute changes

early_set_memory_{encrypted,decrypted}() are used for changing the page
state from decrypted (shared) to encrypted (private) and vice versa.

When SEV-SNP is active, the page state transition needs to go through
additional steps.

If the page is transitioned from shared to private, then perform the
following after the encryption attribute is set in the page table:

1. Issue the page state change VMGEXIT to add the page as a private
   in the RMP table.
2. Validate the page after its successfully added in the RMP table.

To maintain the security guarantees, if the page is transitioned from
private to shared, then perform the following before clearing the
encryption attribute from the page table.

1. Invalidate the page.
2. Issue the page state change VMGEXIT to make the page shared in the
   RMP table.

early_set_memory_{encrypted,decrypted}() can be called before the GHCB
is setup so use the SNP page state MSR protocol VMGEXIT defined in the
GHCB specification to request the page state change in the RMP table.

While at it, add a helper snp_prep_memory() which will be used in
probe_roms(), in a later patch.

  [ bp: Massage commit message. ]

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Venu Busireddy <venu.busireddy@oracle.com>
Link: https://lore.kernel.org/r/20220307213356.2797205-19-brijesh.singh@amd.com

											
										
										
											2022-02-09 12:10:12 -06:00
+								}
-												x86/sev: Use large PSC requests if applicable

In advance of providing support for unaccepted memory, request 2M Page
State Change (PSC) requests when the address range allows for it. By using
a 2M page size, more PSC operations can be handled in a single request to
the hypervisor. The hypervisor will determine if it can accommodate the
larger request by checking the mapping in the nested page table. If mapped
as a large page, then the 2M page request can be performed, otherwise the
2M page request will be broken down into 512 4K page requests. This is
still more efficient than having the guest perform multiple PSC requests
in order to process the 512 4K pages.

In conjunction with the 2M PSC requests, attempt to perform the associated
PVALIDATE instruction of the page using the 2M page size. If PVALIDATE
fails with a size mismatch, then fallback to validating 512 4K pages. To
do this, page validation is modified to work with the PSC structure and
not just a virtual address range.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/050d17b460dfc237b51d72082e5df4498d3513cb.1686063086.git.thomas.lendacky@amd.com

											
										
										
											2023-06-06 09:51:25 -05:00
+								static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr,
 												       unsigned long vaddr_end, int op)
-												x86/mm: Validate memory when changing the C-bit

Add the needed functionality to change pages state from shared
to private and vice-versa using the Page State Change VMGEXIT as
documented in the GHCB spec.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-22-brijesh.singh@amd.com

											
										
										
											2022-02-24 10:56:01 -06:00
+								{
 									struct ghcb_state state;
-												x86/sev: Use large PSC requests if applicable

In advance of providing support for unaccepted memory, request 2M Page
State Change (PSC) requests when the address range allows for it. By using
a 2M page size, more PSC operations can be handled in a single request to
the hypervisor. The hypervisor will determine if it can accommodate the
larger request by checking the mapping in the nested page table. If mapped
as a large page, then the 2M page request can be performed, otherwise the
2M page request will be broken down into 512 4K page requests. This is
still more efficient than having the guest perform multiple PSC requests
in order to process the 512 4K pages.

In conjunction with the 2M PSC requests, attempt to perform the associated
PVALIDATE instruction of the page using the 2M page size. If PVALIDATE
fails with a size mismatch, then fallback to validating 512 4K pages. To
do this, page validation is modified to work with the PSC structure and
not just a virtual address range.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/050d17b460dfc237b51d72082e5df4498d3513cb.1686063086.git.thomas.lendacky@amd.com

											
										
										
											2023-06-06 09:51:25 -05:00
+									bool use_large_entry;
-												x86/mm: Validate memory when changing the C-bit

Add the needed functionality to change pages state from shared
to private and vice-versa using the Page State Change VMGEXIT as
documented in the GHCB spec.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-22-brijesh.singh@amd.com

											
										
										
											2022-02-24 10:56:01 -06:00
+									struct psc_hdr *hdr;
 									struct psc_entry *e;
-												x86/sev: Allow for use of the early boot GHCB for PSC requests

Using a GHCB for a page stage change (as opposed to the MSR protocol)
allows for multiple pages to be processed in a single request. In prep
for early PSC requests in support of unaccepted memory, update the
invocation of vmgexit_psc() to be able to use the early boot GHCB and not
just the per-CPU GHCB structure.

In order to use the proper GHCB (early boot vs per-CPU), set a flag that
indicates when the per-CPU GHCBs are available and registered. For APs,
the per-CPU GHCBs are created before they are started and registered upon
startup, so this flag can be used globally for the BSP and APs instead of
creating a per-CPU flag. This will allow for a significant reduction in
the number of MSR protocol page state change requests when accepting
memory.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/d6cbb21f87f81eb8282dd3bf6c34d9698c8a4bbc.1686063086.git.thomas.lendacky@amd.com

											
										
										
											2023-06-06 09:51:24 -05:00
+									unsigned long flags;
-												x86/mm: Validate memory when changing the C-bit

Add the needed functionality to change pages state from shared
to private and vice-versa using the Page State Change VMGEXIT as
documented in the GHCB spec.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-22-brijesh.singh@amd.com

											
										
										
											2022-02-24 10:56:01 -06:00
+									unsigned long pfn;
-												x86/sev: Allow for use of the early boot GHCB for PSC requests

Using a GHCB for a page stage change (as opposed to the MSR protocol)
allows for multiple pages to be processed in a single request. In prep
for early PSC requests in support of unaccepted memory, update the
invocation of vmgexit_psc() to be able to use the early boot GHCB and not
just the per-CPU GHCB structure.

In order to use the proper GHCB (early boot vs per-CPU), set a flag that
indicates when the per-CPU GHCBs are available and registered. For APs,
the per-CPU GHCBs are created before they are started and registered upon
startup, so this flag can be used globally for the BSP and APs instead of
creating a per-CPU flag. This will allow for a significant reduction in
the number of MSR protocol page state change requests when accepting
memory.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/d6cbb21f87f81eb8282dd3bf6c34d9698c8a4bbc.1686063086.git.thomas.lendacky@amd.com

											
										
										
											2023-06-06 09:51:24 -05:00
+									struct ghcb *ghcb;
-												x86/mm: Validate memory when changing the C-bit

Add the needed functionality to change pages state from shared
to private and vice-versa using the Page State Change VMGEXIT as
documented in the GHCB spec.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-22-brijesh.singh@amd.com

											
										
										
											2022-02-24 10:56:01 -06:00
+									int i;
 									hdr = &data->hdr;
 									e = data->entries;
 									memset(data, 0, sizeof(*data));
 									i = 0;
-												x86/sev: Use large PSC requests if applicable

In advance of providing support for unaccepted memory, request 2M Page
State Change (PSC) requests when the address range allows for it. By using
a 2M page size, more PSC operations can be handled in a single request to
the hypervisor. The hypervisor will determine if it can accommodate the
larger request by checking the mapping in the nested page table. If mapped
as a large page, then the 2M page request can be performed, otherwise the
2M page request will be broken down into 512 4K page requests. This is
still more efficient than having the guest perform multiple PSC requests
in order to process the 512 4K pages.

In conjunction with the 2M PSC requests, attempt to perform the associated
PVALIDATE instruction of the page using the 2M page size. If PVALIDATE
fails with a size mismatch, then fallback to validating 512 4K pages. To
do this, page validation is modified to work with the PSC structure and
not just a virtual address range.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/050d17b460dfc237b51d72082e5df4498d3513cb.1686063086.git.thomas.lendacky@amd.com

											
										
										
											2023-06-06 09:51:25 -05:00
+									while (vaddr < vaddr_end && i < ARRAY_SIZE(data->entries)) {
 										hdr->end_entry = i;
 										if (is_vmalloc_addr((void *)vaddr)) {
-												x86/mm: Validate memory when changing the C-bit

Add the needed functionality to change pages state from shared
to private and vice-versa using the Page State Change VMGEXIT as
documented in the GHCB spec.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-22-brijesh.singh@amd.com

											
										
										
											2022-02-24 10:56:01 -06:00
+											pfn = vmalloc_to_pfn((void *)vaddr);
-												x86/sev: Use large PSC requests if applicable

In advance of providing support for unaccepted memory, request 2M Page
State Change (PSC) requests when the address range allows for it. By using
a 2M page size, more PSC operations can be handled in a single request to
the hypervisor. The hypervisor will determine if it can accommodate the
larger request by checking the mapping in the nested page table. If mapped
as a large page, then the 2M page request can be performed, otherwise the
2M page request will be broken down into 512 4K page requests. This is
still more efficient than having the guest perform multiple PSC requests
in order to process the 512 4K pages.

In conjunction with the 2M PSC requests, attempt to perform the associated
PVALIDATE instruction of the page using the 2M page size. If PVALIDATE
fails with a size mismatch, then fallback to validating 512 4K pages. To
do this, page validation is modified to work with the PSC structure and
not just a virtual address range.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/050d17b460dfc237b51d72082e5df4498d3513cb.1686063086.git.thomas.lendacky@amd.com

											
										
										
											2023-06-06 09:51:25 -05:00
+											use_large_entry = false;
 										} else {
-												x86/mm: Validate memory when changing the C-bit

Add the needed functionality to change pages state from shared
to private and vice-versa using the Page State Change VMGEXIT as
documented in the GHCB spec.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-22-brijesh.singh@amd.com

											
										
										
											2022-02-24 10:56:01 -06:00
+											pfn = __pa(vaddr) >> PAGE_SHIFT;
-												x86/sev: Use large PSC requests if applicable

In advance of providing support for unaccepted memory, request 2M Page
State Change (PSC) requests when the address range allows for it. By using
a 2M page size, more PSC operations can be handled in a single request to
the hypervisor. The hypervisor will determine if it can accommodate the
larger request by checking the mapping in the nested page table. If mapped
as a large page, then the 2M page request can be performed, otherwise the
2M page request will be broken down into 512 4K page requests. This is
still more efficient than having the guest perform multiple PSC requests
in order to process the 512 4K pages.

In conjunction with the 2M PSC requests, attempt to perform the associated
PVALIDATE instruction of the page using the 2M page size. If PVALIDATE
fails with a size mismatch, then fallback to validating 512 4K pages. To
do this, page validation is modified to work with the PSC structure and
not just a virtual address range.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/050d17b460dfc237b51d72082e5df4498d3513cb.1686063086.git.thomas.lendacky@amd.com

											
										
										
											2023-06-06 09:51:25 -05:00
+											use_large_entry = true;
 										}
-												x86/mm: Validate memory when changing the C-bit

Add the needed functionality to change pages state from shared
to private and vice-versa using the Page State Change VMGEXIT as
documented in the GHCB spec.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-22-brijesh.singh@amd.com

											
										
										
											2022-02-24 10:56:01 -06:00
 										e->gfn = pfn;
 										e->operation = op;
-												x86/sev: Use large PSC requests if applicable

In advance of providing support for unaccepted memory, request 2M Page
State Change (PSC) requests when the address range allows for it. By using
a 2M page size, more PSC operations can be handled in a single request to
the hypervisor. The hypervisor will determine if it can accommodate the
larger request by checking the mapping in the nested page table. If mapped
as a large page, then the 2M page request can be performed, otherwise the
2M page request will be broken down into 512 4K page requests. This is
still more efficient than having the guest perform multiple PSC requests
in order to process the 512 4K pages.

In conjunction with the 2M PSC requests, attempt to perform the associated
PVALIDATE instruction of the page using the 2M page size. If PVALIDATE
fails with a size mismatch, then fallback to validating 512 4K pages. To
do this, page validation is modified to work with the PSC structure and
not just a virtual address range.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/050d17b460dfc237b51d72082e5df4498d3513cb.1686063086.git.thomas.lendacky@amd.com

											
										
										
											2023-06-06 09:51:25 -05:00
+										if (use_large_entry && IS_ALIGNED(vaddr, PMD_SIZE) &&
 										    (vaddr_end - vaddr) >= PMD_SIZE) {
 											e->pagesize = RMP_PG_SIZE_2M;
 											vaddr += PMD_SIZE;
 										} else {
 											e->pagesize = RMP_PG_SIZE_4K;
 											vaddr += PAGE_SIZE;
 										}
-												x86/mm: Validate memory when changing the C-bit

Add the needed functionality to change pages state from shared
to private and vice-versa using the Page State Change VMGEXIT as
documented in the GHCB spec.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-22-brijesh.singh@amd.com

											
										
										
											2022-02-24 10:56:01 -06:00
 										e++;
 										i++;
 									}
-												x86/sev: Use large PSC requests if applicable

In advance of providing support for unaccepted memory, request 2M Page
State Change (PSC) requests when the address range allows for it. By using
a 2M page size, more PSC operations can be handled in a single request to
the hypervisor. The hypervisor will determine if it can accommodate the
larger request by checking the mapping in the nested page table. If mapped
as a large page, then the 2M page request can be performed, otherwise the
2M page request will be broken down into 512 4K page requests. This is
still more efficient than having the guest perform multiple PSC requests
in order to process the 512 4K pages.

In conjunction with the 2M PSC requests, attempt to perform the associated
PVALIDATE instruction of the page using the 2M page size. If PVALIDATE
fails with a size mismatch, then fallback to validating 512 4K pages. To
do this, page validation is modified to work with the PSC structure and
not just a virtual address range.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/050d17b460dfc237b51d72082e5df4498d3513cb.1686063086.git.thomas.lendacky@amd.com

											
										
										
											2023-06-06 09:51:25 -05:00
+									/* Page validation must be rescinded before changing to shared */
 									if (op == SNP_PAGE_STATE_SHARED)
 										pvalidate_pages(data);
-												x86/sev: Allow for use of the early boot GHCB for PSC requests

Using a GHCB for a page stage change (as opposed to the MSR protocol)
allows for multiple pages to be processed in a single request. In prep
for early PSC requests in support of unaccepted memory, update the
invocation of vmgexit_psc() to be able to use the early boot GHCB and not
just the per-CPU GHCB structure.

In order to use the proper GHCB (early boot vs per-CPU), set a flag that
indicates when the per-CPU GHCBs are available and registered. For APs,
the per-CPU GHCBs are created before they are started and registered upon
startup, so this flag can be used globally for the BSP and APs instead of
creating a per-CPU flag. This will allow for a significant reduction in
the number of MSR protocol page state change requests when accepting
memory.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/d6cbb21f87f81eb8282dd3bf6c34d9698c8a4bbc.1686063086.git.thomas.lendacky@amd.com

											
										
										
											2023-06-06 09:51:24 -05:00
+									local_irq_save(flags);
 									if (sev_cfg.ghcbs_initialized)
 										ghcb = __sev_get_ghcb(&state);
 									else
 										ghcb = boot_ghcb;
-												x86/sev: Use large PSC requests if applicable

In advance of providing support for unaccepted memory, request 2M Page
State Change (PSC) requests when the address range allows for it. By using
a 2M page size, more PSC operations can be handled in a single request to
the hypervisor. The hypervisor will determine if it can accommodate the
larger request by checking the mapping in the nested page table. If mapped
as a large page, then the 2M page request can be performed, otherwise the
2M page request will be broken down into 512 4K page requests. This is
still more efficient than having the guest perform multiple PSC requests
in order to process the 512 4K pages.

In conjunction with the 2M PSC requests, attempt to perform the associated
PVALIDATE instruction of the page using the 2M page size. If PVALIDATE
fails with a size mismatch, then fallback to validating 512 4K pages. To
do this, page validation is modified to work with the PSC structure and
not just a virtual address range.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/050d17b460dfc237b51d72082e5df4498d3513cb.1686063086.git.thomas.lendacky@amd.com

											
										
										
											2023-06-06 09:51:25 -05:00
+									/* Invoke the hypervisor to perform the page state changes */
-												x86/sev: Allow for use of the early boot GHCB for PSC requests

Using a GHCB for a page stage change (as opposed to the MSR protocol)
allows for multiple pages to be processed in a single request. In prep
for early PSC requests in support of unaccepted memory, update the
invocation of vmgexit_psc() to be able to use the early boot GHCB and not
just the per-CPU GHCB structure.

In order to use the proper GHCB (early boot vs per-CPU), set a flag that
indicates when the per-CPU GHCBs are available and registered. For APs,
the per-CPU GHCBs are created before they are started and registered upon
startup, so this flag can be used globally for the BSP and APs instead of
creating a per-CPU flag. This will allow for a significant reduction in
the number of MSR protocol page state change requests when accepting
memory.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/d6cbb21f87f81eb8282dd3bf6c34d9698c8a4bbc.1686063086.git.thomas.lendacky@amd.com

											
										
										
											2023-06-06 09:51:24 -05:00
+									if (!ghcb || vmgexit_psc(ghcb, data))
-												x86/mm: Validate memory when changing the C-bit

Add the needed functionality to change pages state from shared
to private and vice-versa using the Page State Change VMGEXIT as
documented in the GHCB spec.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-22-brijesh.singh@amd.com

											
										
										
											2022-02-24 10:56:01 -06:00
+										sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
-												x86/sev: Allow for use of the early boot GHCB for PSC requests

Using a GHCB for a page stage change (as opposed to the MSR protocol)
allows for multiple pages to be processed in a single request. In prep
for early PSC requests in support of unaccepted memory, update the
invocation of vmgexit_psc() to be able to use the early boot GHCB and not
just the per-CPU GHCB structure.

In order to use the proper GHCB (early boot vs per-CPU), set a flag that
indicates when the per-CPU GHCBs are available and registered. For APs,
the per-CPU GHCBs are created before they are started and registered upon
startup, so this flag can be used globally for the BSP and APs instead of
creating a per-CPU flag. This will allow for a significant reduction in
the number of MSR protocol page state change requests when accepting
memory.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/d6cbb21f87f81eb8282dd3bf6c34d9698c8a4bbc.1686063086.git.thomas.lendacky@amd.com

											
										
										
											2023-06-06 09:51:24 -05:00
 									if (sev_cfg.ghcbs_initialized)
 										__sev_put_ghcb(&state);
 									local_irq_restore(flags);
-												x86/sev: Use large PSC requests if applicable

In advance of providing support for unaccepted memory, request 2M Page
State Change (PSC) requests when the address range allows for it. By using
a 2M page size, more PSC operations can be handled in a single request to
the hypervisor. The hypervisor will determine if it can accommodate the
larger request by checking the mapping in the nested page table. If mapped
as a large page, then the 2M page request can be performed, otherwise the
2M page request will be broken down into 512 4K page requests. This is
still more efficient than having the guest perform multiple PSC requests
in order to process the 512 4K pages.

In conjunction with the 2M PSC requests, attempt to perform the associated
PVALIDATE instruction of the page using the 2M page size. If PVALIDATE
fails with a size mismatch, then fallback to validating 512 4K pages. To
do this, page validation is modified to work with the PSC structure and
not just a virtual address range.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/050d17b460dfc237b51d72082e5df4498d3513cb.1686063086.git.thomas.lendacky@amd.com

											
										
										
											2023-06-06 09:51:25 -05:00
 									/* Page validation must be performed after changing to private */
 									if (op == SNP_PAGE_STATE_PRIVATE)
 										pvalidate_pages(data);
 									return vaddr;
-												x86/mm: Validate memory when changing the C-bit

Add the needed functionality to change pages state from shared
to private and vice-versa using the Page State Change VMGEXIT as
documented in the GHCB spec.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-22-brijesh.singh@amd.com

											
										
										
											2022-02-24 10:56:01 -06:00
+								}
-												x86/sev: Fix calculation of end address based on number of pages

When calculating an end address based on an unsigned int number of pages,
any value greater than or equal to 0x100000 that is shift PAGE_SHIFT bits
results in a 0 value, resulting in an invalid end address. Change the
number of pages variable in various routines from an unsigned int to an
unsigned long to calculate the end address correctly.

Fixes: 5e5ccff60a29 ("x86/sev: Add helper for validating pages in early enc attribute changes")
Fixes: dc3f3d2474b8 ("x86/mm: Validate memory when changing the C-bit")
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/6a6e4eea0e1414402bac747744984fa4e9c01bb6.1686063086.git.thomas.lendacky@amd.com

											
										
										
											2023-06-06 09:51:22 -05:00
+								static void set_pages_state(unsigned long vaddr, unsigned long npages, int op)
-												x86/mm: Validate memory when changing the C-bit

Add the needed functionality to change pages state from shared
to private and vice-versa using the Page State Change VMGEXIT as
documented in the GHCB spec.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-22-brijesh.singh@amd.com

											
										
										
											2022-02-24 10:56:01 -06:00
+								{
-												x86/sev: Put PSC struct on the stack in prep for unaccepted memory support

In advance of providing support for unaccepted memory, switch from using
kmalloc() for allocating the Page State Change (PSC) structure to using a
local variable that lives on the stack. This is needed to avoid a possible
recursive call into set_pages_state() if the kmalloc() call requires
(more) memory to be accepted, which would result in a hang.

The current size of the PSC struct is 2,032 bytes. To make the struct more
stack friendly, reduce the number of PSC entries from 253 down to 64,
resulting in a size of 520 bytes. This is a nice compromise on struct size
and total PSC requests while still allowing parallel PSC operations across
vCPUs.

If the reduction in PSC entries results in any kind of performance issue
(that is not seen at the moment), use of a larger static PSC struct, with
fallback to the smaller stack version, can be investigated.

For more background info on this decision, see the subthread in the Link:
tag below.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/lkml/658c455c40e8950cb046dd885dd19dc1c52d060a.1659103274.git.thomas.lendacky@amd.com

											
										
										
											2023-06-06 09:51:23 -05:00
+									struct snp_psc_desc desc;
-												x86/sev: Use large PSC requests if applicable

In advance of providing support for unaccepted memory, request 2M Page
State Change (PSC) requests when the address range allows for it. By using
a 2M page size, more PSC operations can be handled in a single request to
the hypervisor. The hypervisor will determine if it can accommodate the
larger request by checking the mapping in the nested page table. If mapped
as a large page, then the 2M page request can be performed, otherwise the
2M page request will be broken down into 512 4K page requests. This is
still more efficient than having the guest perform multiple PSC requests
in order to process the 512 4K pages.

In conjunction with the 2M PSC requests, attempt to perform the associated
PVALIDATE instruction of the page using the 2M page size. If PVALIDATE
fails with a size mismatch, then fallback to validating 512 4K pages. To
do this, page validation is modified to work with the PSC structure and
not just a virtual address range.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/050d17b460dfc237b51d72082e5df4498d3513cb.1686063086.git.thomas.lendacky@amd.com

											
										
										
											2023-06-06 09:51:25 -05:00
+									unsigned long vaddr_end;
-												x86/mm: Validate memory when changing the C-bit

Add the needed functionality to change pages state from shared
to private and vice-versa using the Page State Change VMGEXIT as
documented in the GHCB spec.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-22-brijesh.singh@amd.com

											
										
										
											2022-02-24 10:56:01 -06:00
-												x86/sev: Allow for use of the early boot GHCB for PSC requests

Using a GHCB for a page stage change (as opposed to the MSR protocol)
allows for multiple pages to be processed in a single request. In prep
for early PSC requests in support of unaccepted memory, update the
invocation of vmgexit_psc() to be able to use the early boot GHCB and not
just the per-CPU GHCB structure.

In order to use the proper GHCB (early boot vs per-CPU), set a flag that
indicates when the per-CPU GHCBs are available and registered. For APs,
the per-CPU GHCBs are created before they are started and registered upon
startup, so this flag can be used globally for the BSP and APs instead of
creating a per-CPU flag. This will allow for a significant reduction in
the number of MSR protocol page state change requests when accepting
memory.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/d6cbb21f87f81eb8282dd3bf6c34d9698c8a4bbc.1686063086.git.thomas.lendacky@amd.com

											
										
										
											2023-06-06 09:51:24 -05:00
+									/* Use the MSR protocol when a GHCB is not available. */
 									if (!boot_ghcb)
-												x86/sev: Use large PSC requests if applicable

In advance of providing support for unaccepted memory, request 2M Page
State Change (PSC) requests when the address range allows for it. By using
a 2M page size, more PSC operations can be handled in a single request to
the hypervisor. The hypervisor will determine if it can accommodate the
larger request by checking the mapping in the nested page table. If mapped
as a large page, then the 2M page request can be performed, otherwise the
2M page request will be broken down into 512 4K page requests. This is
still more efficient than having the guest perform multiple PSC requests
in order to process the 512 4K pages.

In conjunction with the 2M PSC requests, attempt to perform the associated
PVALIDATE instruction of the page using the 2M page size. If PVALIDATE
fails with a size mismatch, then fallback to validating 512 4K pages. To
do this, page validation is modified to work with the PSC structure and
not just a virtual address range.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/050d17b460dfc237b51d72082e5df4498d3513cb.1686063086.git.thomas.lendacky@amd.com

											
										
										
											2023-06-06 09:51:25 -05:00
+										return early_set_pages_state(vaddr, __pa(vaddr), npages, op);
-												x86/mm: Validate memory when changing the C-bit

Add the needed functionality to change pages state from shared
to private and vice-versa using the Page State Change VMGEXIT as
documented in the GHCB spec.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-22-brijesh.singh@amd.com

											
										
										
											2022-02-24 10:56:01 -06:00
 									vaddr = vaddr & PAGE_MASK;
 									vaddr_end = vaddr + (npages << PAGE_SHIFT);
-												x86/sev: Use large PSC requests if applicable

In advance of providing support for unaccepted memory, request 2M Page
State Change (PSC) requests when the address range allows for it. By using
a 2M page size, more PSC operations can be handled in a single request to
the hypervisor. The hypervisor will determine if it can accommodate the
larger request by checking the mapping in the nested page table. If mapped
as a large page, then the 2M page request can be performed, otherwise the
2M page request will be broken down into 512 4K page requests. This is
still more efficient than having the guest perform multiple PSC requests
in order to process the 512 4K pages.

In conjunction with the 2M PSC requests, attempt to perform the associated
PVALIDATE instruction of the page using the 2M page size. If PVALIDATE
fails with a size mismatch, then fallback to validating 512 4K pages. To
do this, page validation is modified to work with the PSC structure and
not just a virtual address range.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/050d17b460dfc237b51d72082e5df4498d3513cb.1686063086.git.thomas.lendacky@amd.com

											
										
										
											2023-06-06 09:51:25 -05:00
+									while (vaddr < vaddr_end)
 										vaddr = __set_pages_state(&desc, vaddr, vaddr_end, op);
-												x86/mm: Validate memory when changing the C-bit

Add the needed functionality to change pages state from shared
to private and vice-versa using the Page State Change VMGEXIT as
documented in the GHCB spec.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-22-brijesh.singh@amd.com

											
										
										
											2022-02-24 10:56:01 -06:00
+								}
-												x86/sev: Fix calculation of end address based on number of pages

When calculating an end address based on an unsigned int number of pages,
any value greater than or equal to 0x100000 that is shift PAGE_SHIFT bits
results in a 0 value, resulting in an invalid end address. Change the
number of pages variable in various routines from an unsigned int to an
unsigned long to calculate the end address correctly.

Fixes: 5e5ccff60a29 ("x86/sev: Add helper for validating pages in early enc attribute changes")
Fixes: dc3f3d2474b8 ("x86/mm: Validate memory when changing the C-bit")
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/6a6e4eea0e1414402bac747744984fa4e9c01bb6.1686063086.git.thomas.lendacky@amd.com

											
										
										
											2023-06-06 09:51:22 -05:00
+								void snp_set_memory_shared(unsigned long vaddr, unsigned long npages)
-												x86/mm: Validate memory when changing the C-bit

Add the needed functionality to change pages state from shared
to private and vice-versa using the Page State Change VMGEXIT as
documented in the GHCB spec.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-22-brijesh.singh@amd.com

											
										
										
											2022-02-24 10:56:01 -06:00
+								{
 									if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
 										return;
 									set_pages_state(vaddr, npages, SNP_PAGE_STATE_SHARED);
 								}
-												x86/sev: Fix calculation of end address based on number of pages

When calculating an end address based on an unsigned int number of pages,
any value greater than or equal to 0x100000 that is shift PAGE_SHIFT bits
results in a 0 value, resulting in an invalid end address. Change the
number of pages variable in various routines from an unsigned int to an
unsigned long to calculate the end address correctly.

Fixes: 5e5ccff60a29 ("x86/sev: Add helper for validating pages in early enc attribute changes")
Fixes: dc3f3d2474b8 ("x86/mm: Validate memory when changing the C-bit")
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/6a6e4eea0e1414402bac747744984fa4e9c01bb6.1686063086.git.thomas.lendacky@amd.com

											
										
										
											2023-06-06 09:51:22 -05:00
+								void snp_set_memory_private(unsigned long vaddr, unsigned long npages)
-												x86/mm: Validate memory when changing the C-bit

Add the needed functionality to change pages state from shared
to private and vice-versa using the Page State Change VMGEXIT as
documented in the GHCB spec.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-22-brijesh.singh@amd.com

											
										
										
											2022-02-24 10:56:01 -06:00
+								{
 									if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
 										return;
 									set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
 								}
-												x86/sev: Add SNP-specific unaccepted memory support

Add SNP-specific hooks to the unaccepted memory support in the boot
path (__accept_memory()) and the core kernel (accept_memory()) in order
to support booting SNP guests when unaccepted memory is present. Without
this support, SNP guests will fail to boot and/or panic() when unaccepted
memory is present in the EFI memory map.

The process of accepting memory under SNP involves invoking the hypervisor
to perform a page state change for the page to private memory and then
issuing a PVALIDATE instruction to accept the page.

Since the boot path and the core kernel paths perform similar operations,
move the pvalidate_pages() and vmgexit_psc() functions into sev-shared.c
to avoid code duplication.

Create the new header file arch/x86/boot/compressed/sev.h because adding
the function declaration to any of the existing SEV related header files
pulls in too many other header files, causing the build to fail.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/a52fa69f460fd1876d70074b20ad68210dfc31dd.1686063086.git.thomas.lendacky@amd.com

											
										
										
											2023-06-06 09:51:26 -05:00
+								void snp_accept_memory(phys_addr_t start, phys_addr_t end)
 								{
-												x86/sev: Change npages to unsigned long in snp_accept_memory()

In snp_accept_memory(), the npages variables value is calculated from
phys_addr_t variables but is an unsigned int. A very large range passed
into snp_accept_memory() could lead to truncating npages to zero. This
doesn't happen at the moment but let's be prepared.

Fixes: 6c3211796326 ("x86/sev: Add SNP-specific unaccepted memory support")
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Cc: <stable@kernel.org>
Link: https://lore.kernel.org/r/6d511c25576494f682063c9fb6c705b526a3757e.1687441505.git.thomas.lendacky@amd.com

											
										
										
											2023-06-22 08:45:05 -05:00
+									unsigned long vaddr, npages;
-												x86/sev: Add SNP-specific unaccepted memory support

Add SNP-specific hooks to the unaccepted memory support in the boot
path (__accept_memory()) and the core kernel (accept_memory()) in order
to support booting SNP guests when unaccepted memory is present. Without
this support, SNP guests will fail to boot and/or panic() when unaccepted
memory is present in the EFI memory map.

The process of accepting memory under SNP involves invoking the hypervisor
to perform a page state change for the page to private memory and then
issuing a PVALIDATE instruction to accept the page.

Since the boot path and the core kernel paths perform similar operations,
move the pvalidate_pages() and vmgexit_psc() functions into sev-shared.c
to avoid code duplication.

Create the new header file arch/x86/boot/compressed/sev.h because adding
the function declaration to any of the existing SEV related header files
pulls in too many other header files, causing the build to fail.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/a52fa69f460fd1876d70074b20ad68210dfc31dd.1686063086.git.thomas.lendacky@amd.com

											
										
										
											2023-06-06 09:51:26 -05:00
 									if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
 										return;
 									vaddr = (unsigned long)__va(start);
 									npages = (end - start) >> PAGE_SHIFT;
 									set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
-												x86/mm: Validate memory when changing the C-bit

Add the needed functionality to change pages state from shared
to private and vice-versa using the Page State Change VMGEXIT as
documented in the GHCB spec.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-22-brijesh.singh@amd.com

											
										
										
											2022-02-24 10:56:01 -06:00
+								}
-												x86/sev: Do not touch VMSA pages during SNP guest memory kdump

When kdump is running makedumpfile to generate vmcore and dump SNP guest
memory it touches the VMSA page of the vCPU executing kdump.

It then results in unrecoverable #NPF/RMP faults as the VMSA page is
marked busy/in-use when the vCPU is running and subsequently a causes
guest softlockup/hang.

Additionally, other APs may be halted in guest mode and their VMSA pages
are marked busy and touching these VMSA pages during guest memory dump
will also cause #NPF.

Issue AP_DESTROY GHCB calls on other APs to ensure they are kicked out
of guest mode and then clear the VMSA bit on their VMSA pages.

If the vCPU running kdump is an AP, mark it's VMSA page as offline to
ensure that makedumpfile excludes that page while dumping guest memory.

Fixes: 3074152e56c9 ("x86/sev: Convert shared memory back to private on kexec")
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Pankaj Gupta <pankaj.gupta@amd.com>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Tested-by: Srikanth Aithal <sraithal@amd.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/20250428214151.155464-1-Ashish.Kalra@amd.com

											
										
										
											2025-04-28 21:41:51 +00:00
+								static int vmgexit_ap_control(u64 event, struct sev_es_save_area *vmsa, u32 apic_id)
 								{
 									bool create = event != SVM_VMGEXIT_AP_DESTROY;
 									struct ghcb_state state;
 									unsigned long flags;
 									struct ghcb *ghcb;
 									int ret = 0;
 									local_irq_save(flags);
 									ghcb = __sev_get_ghcb(&state);
 									vc_ghcb_invalidate(ghcb);
 									if (create)
 										ghcb_set_rax(ghcb, vmsa->sev_features);
 									ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION);
 									ghcb_set_sw_exit_info_1(ghcb,
 												((u64)apic_id << 32)	|
 												((u64)snp_vmpl << 16)	|
 												event);
 									ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa));
 									sev_es_wr_ghcb_msr(__pa(ghcb));
 									VMGEXIT();
 									if (!ghcb_sw_exit_info_1_is_valid(ghcb) ||
 									    lower_32_bits(ghcb->save.sw_exit_info_1)) {
 										pr_err("SNP AP %s error\n", (create ? "CREATE" : "DESTROY"));
 										ret = -EINVAL;
 									}
 									__sev_put_ghcb(&state);
 									local_irq_restore(flags);
 									return ret;
 								}
 								static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa)
 								{
 									int ret;
 									if (snp_vmpl) {
 										struct svsm_call call = {};
 										unsigned long flags;
 										local_irq_save(flags);
 										call.caa = this_cpu_read(svsm_caa);
 										call.rcx = __pa(va);
 										if (make_vmsa) {
 											/* Protocol 0, Call ID 2 */
 											call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU);
 											call.rdx = __pa(caa);
 											call.r8  = apic_id;
 										} else {
 											/* Protocol 0, Call ID 3 */
 											call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU);
 										}
 										ret = svsm_perform_call_protocol(&call);
 										local_irq_restore(flags);
 									} else {
 										/*
 										 * If the kernel runs at VMPL0, it can change the VMSA
 										 * bit for a page using the RMPADJUST instruction.
 										 * However, for the instruction to succeed it must
 										 * target the permissions of a lesser privileged (higher
 										 * numbered) VMPL level, so use VMPL1.
 										 */
 										u64 attrs = 1;
 										if (make_vmsa)
 											attrs |= RMPADJUST_VMSA_PAGE_BIT;
 										ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs);
 									}
 									return ret;
 								}
 								static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id)
 								{
 									int err;
 									err = snp_set_vmsa(vmsa, NULL, apic_id, false);
 									if (err)
 										pr_err("clear VMSA page failed (%u), leaking page\n", err);
 									else
 										free_page((unsigned long)vmsa);
 								}
-												x86/sev: Convert shared memory back to private on kexec

SNP guests allocate shared buffers to perform I/O. It is done by
allocating pages normally from the buddy allocator and converting them
to shared with set_memory_decrypted().

The second, kexec-ed, kernel has no idea what memory is converted this
way. It only sees E820_TYPE_RAM.

Accessing shared memory via private mapping will cause unrecoverable RMP
page-faults.

On kexec, walk direct mapping and convert all shared memory back to
private. It makes all RAM private again and second kernel may use it
normally. Additionally, for SNP guests, convert all bss decrypted
section pages back to private.

The conversion occurs in two steps: stopping new conversions and
unsharing all memory. In the case of normal kexec, the stopping of
conversions takes place while scheduling is still functioning. This
allows for waiting until any ongoing conversions are finished. The
second step is carried out when all CPUs except one are inactive and
interrupts are disabled. This prevents any conflicts with code that may
access shared memory.

Co-developed-by: Borislav Petkov (AMD) <bp@alien8.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/05a8c15fb665dbb062b04a8cb3d592a63f235937.1722520012.git.ashish.kalra@amd.com

											
										
										
											2024-08-01 19:14:50 +00:00
+								static void set_pte_enc(pte_t *kpte, int level, void *va)
 								{
 									struct pte_enc_desc d = {
 										.kpte	   = kpte,
 										.pte_level = level,
 										.va	   = va,
 										.encrypt   = true
 									};
 									prepare_pte_enc(&d);
 									set_pte_enc_mask(kpte, d.pfn, d.new_pgprot);
 								}
 								static void unshare_all_memory(void)
 								{
 									unsigned long addr, end, size, ghcb;
 									struct sev_es_runtime_data *data;
 									unsigned int npages, level;
 									bool skipped_addr;
 									pte_t *pte;
 									int cpu;
 									/* Unshare the direct mapping. */
 									addr = PAGE_OFFSET;
 									end  = PAGE_OFFSET + get_max_mapped();
 									while (addr < end) {
 										pte = lookup_address(addr, &level);
 										size = page_level_size(level);
 										npages = size / PAGE_SIZE;
 										skipped_addr = false;
 										if (!pte || !pte_decrypted(*pte) || pte_none(*pte)) {
 											addr += size;
 											continue;
 										}
 										/*
 										 * Ensure that all the per-CPU GHCBs are made private at the
 										 * end of the unsharing loop so that the switch to the slower
 										 * MSR protocol happens last.
 										 */
 										for_each_possible_cpu(cpu) {
 											data = per_cpu(runtime_data, cpu);
 											ghcb = (unsigned long)&data->ghcb_page;
-												x86/sev: Make sure pages are not skipped during kdump

When shared pages are being converted to private during kdump, additional
checks are performed. They include handling the case of a GHCB page being
contained within a huge page.

Currently, this check incorrectly skips a page just below the GHCB page from
being transitioned back to private during kdump preparation.

This skipped page causes a 0x404 #VC exception when it is accessed later while
dumping guest memory for vmcore generation.

Correct the range to be checked for GHCB contained in a huge page.  Also,
ensure that the skipped huge page containing the GHCB page is transitioned
back to private by applying the correct address mask later when changing GHCBs
to private at end of kdump preparation.

  [ bp: Massage commit message. ]

Fixes: 3074152e56c9 ("x86/sev: Convert shared memory back to private on kexec")
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Tested-by: Srikanth Aithal <sraithal@amd.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/20250506183529.289549-1-Ashish.Kalra@amd.com

											
										
										
											2025-05-06 18:35:29 +00:00
+											/* Handle the case of a huge page containing the GHCB page */
 											if (addr <= ghcb && ghcb < addr + size) {
-												x86/sev: Convert shared memory back to private on kexec

SNP guests allocate shared buffers to perform I/O. It is done by
allocating pages normally from the buddy allocator and converting them
to shared with set_memory_decrypted().

The second, kexec-ed, kernel has no idea what memory is converted this
way. It only sees E820_TYPE_RAM.

Accessing shared memory via private mapping will cause unrecoverable RMP
page-faults.

On kexec, walk direct mapping and convert all shared memory back to
private. It makes all RAM private again and second kernel may use it
normally. Additionally, for SNP guests, convert all bss decrypted
section pages back to private.

The conversion occurs in two steps: stopping new conversions and
unsharing all memory. In the case of normal kexec, the stopping of
conversions takes place while scheduling is still functioning. This
allows for waiting until any ongoing conversions are finished. The
second step is carried out when all CPUs except one are inactive and
interrupts are disabled. This prevents any conflicts with code that may
access shared memory.

Co-developed-by: Borislav Petkov (AMD) <bp@alien8.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/05a8c15fb665dbb062b04a8cb3d592a63f235937.1722520012.git.ashish.kalra@amd.com

											
										
										
											2024-08-01 19:14:50 +00:00
+												skipped_addr = true;
 												break;
 											}
 										}
 										if (!skipped_addr) {
 											set_pte_enc(pte, level, (void *)addr);
 											snp_set_memory_private(addr, npages);
 										}
 										addr += size;
 									}
 									/* Unshare all bss decrypted memory. */
 									addr = (unsigned long)__start_bss_decrypted;
 									end  = (unsigned long)__start_bss_decrypted_unused;
 									npages = (end - addr) >> PAGE_SHIFT;
 									for (; addr < end; addr += PAGE_SIZE) {
 										pte = lookup_address(addr, &level);
 										if (!pte || !pte_decrypted(*pte) || pte_none(*pte))
 											continue;
 										set_pte_enc(pte, level, (void *)addr);
 									}
 									addr = (unsigned long)__start_bss_decrypted;
 									snp_set_memory_private(addr, npages);
 									__flush_tlb_all();
 								}
 								/* Stop new private<->shared conversions */
 								void snp_kexec_begin(void)
 								{
 									if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
 										return;
 									if (!IS_ENABLED(CONFIG_KEXEC_CORE))
 										return;
 									/*
 									 * Crash kernel ends up here with interrupts disabled: can't wait for
 									 * conversions to finish.
 									 *
 									 * If race happened, just report and proceed.
 									 */
 									if (!set_memory_enc_stop_conversion())
 										pr_warn("Failed to stop shared<->private conversions\n");
 								}
-												x86/sev: Do not touch VMSA pages during SNP guest memory kdump

When kdump is running makedumpfile to generate vmcore and dump SNP guest
memory it touches the VMSA page of the vCPU executing kdump.

It then results in unrecoverable #NPF/RMP faults as the VMSA page is
marked busy/in-use when the vCPU is running and subsequently a causes
guest softlockup/hang.

Additionally, other APs may be halted in guest mode and their VMSA pages
are marked busy and touching these VMSA pages during guest memory dump
will also cause #NPF.

Issue AP_DESTROY GHCB calls on other APs to ensure they are kicked out
of guest mode and then clear the VMSA bit on their VMSA pages.

If the vCPU running kdump is an AP, mark it's VMSA page as offline to
ensure that makedumpfile excludes that page while dumping guest memory.

Fixes: 3074152e56c9 ("x86/sev: Convert shared memory back to private on kexec")
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Pankaj Gupta <pankaj.gupta@amd.com>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Tested-by: Srikanth Aithal <sraithal@amd.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/20250428214151.155464-1-Ashish.Kalra@amd.com

											
										
										
											2025-04-28 21:41:51 +00:00
+								/*
 								 * Shutdown all APs except the one handling kexec/kdump and clearing
 								 * the VMSA tag on AP's VMSA pages as they are not being used as
 								 * VMSA page anymore.
 								 */
 								static void shutdown_all_aps(void)
 								{
 									struct sev_es_save_area *vmsa;
 									int apic_id, this_cpu, cpu;
 									this_cpu = get_cpu();
 									/*
 									 * APs are already in HLT loop when enc_kexec_finish() callback
 									 * is invoked.
 									 */
 									for_each_present_cpu(cpu) {
 										vmsa = per_cpu(sev_vmsa, cpu);
 										/*
 										 * The BSP or offlined APs do not have guest allocated VMSA
 										 * and there is no need  to clear the VMSA tag for this page.
 										 */
 										if (!vmsa)
 											continue;
 										/*
 										 * Cannot clear the VMSA tag for the currently running vCPU.
 										 */
 										if (this_cpu == cpu) {
 											unsigned long pa;
 											struct page *p;
 											pa = __pa(vmsa);
 											/*
 											 * Mark the VMSA page of the running vCPU as offline
 											 * so that is excluded and not touched by makedumpfile
 											 * while generating vmcore during kdump.
 											 */
 											p = pfn_to_online_page(pa >> PAGE_SHIFT);
 											if (p)
 												__SetPageOffline(p);
 											continue;
 										}
 										apic_id = cpuid_to_apicid[cpu];
 										/*
 										 * Issue AP destroy to ensure AP gets kicked out of guest mode
 										 * to allow using RMPADJUST to remove the VMSA tag on it's
 										 * VMSA page.
 										 */
 										vmgexit_ap_control(SVM_VMGEXIT_AP_DESTROY, vmsa, apic_id);
 										snp_cleanup_vmsa(vmsa, apic_id);
 									}
 									put_cpu();
 								}
-												x86/sev: Convert shared memory back to private on kexec

SNP guests allocate shared buffers to perform I/O. It is done by
allocating pages normally from the buddy allocator and converting them
to shared with set_memory_decrypted().

The second, kexec-ed, kernel has no idea what memory is converted this
way. It only sees E820_TYPE_RAM.

Accessing shared memory via private mapping will cause unrecoverable RMP
page-faults.

On kexec, walk direct mapping and convert all shared memory back to
private. It makes all RAM private again and second kernel may use it
normally. Additionally, for SNP guests, convert all bss decrypted
section pages back to private.

The conversion occurs in two steps: stopping new conversions and
unsharing all memory. In the case of normal kexec, the stopping of
conversions takes place while scheduling is still functioning. This
allows for waiting until any ongoing conversions are finished. The
second step is carried out when all CPUs except one are inactive and
interrupts are disabled. This prevents any conflicts with code that may
access shared memory.

Co-developed-by: Borislav Petkov (AMD) <bp@alien8.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/05a8c15fb665dbb062b04a8cb3d592a63f235937.1722520012.git.ashish.kalra@amd.com

											
										
										
											2024-08-01 19:14:50 +00:00
+								void snp_kexec_finish(void)
 								{
 									struct sev_es_runtime_data *data;
-												x86/sev: Make sure pages are not skipped during kdump

When shared pages are being converted to private during kdump, additional
checks are performed. They include handling the case of a GHCB page being
contained within a huge page.

Currently, this check incorrectly skips a page just below the GHCB page from
being transitioned back to private during kdump preparation.

This skipped page causes a 0x404 #VC exception when it is accessed later while
dumping guest memory for vmcore generation.

Correct the range to be checked for GHCB contained in a huge page.  Also,
ensure that the skipped huge page containing the GHCB page is transitioned
back to private by applying the correct address mask later when changing GHCBs
to private at end of kdump preparation.

  [ bp: Massage commit message. ]

Fixes: 3074152e56c9 ("x86/sev: Convert shared memory back to private on kexec")
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Tested-by: Srikanth Aithal <sraithal@amd.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/20250506183529.289549-1-Ashish.Kalra@amd.com

											
										
										
											2025-05-06 18:35:29 +00:00
+									unsigned long size, addr;
-												x86/sev: Convert shared memory back to private on kexec

SNP guests allocate shared buffers to perform I/O. It is done by
allocating pages normally from the buddy allocator and converting them
to shared with set_memory_decrypted().

The second, kexec-ed, kernel has no idea what memory is converted this
way. It only sees E820_TYPE_RAM.

Accessing shared memory via private mapping will cause unrecoverable RMP
page-faults.

On kexec, walk direct mapping and convert all shared memory back to
private. It makes all RAM private again and second kernel may use it
normally. Additionally, for SNP guests, convert all bss decrypted
section pages back to private.

The conversion occurs in two steps: stopping new conversions and
unsharing all memory. In the case of normal kexec, the stopping of
conversions takes place while scheduling is still functioning. This
allows for waiting until any ongoing conversions are finished. The
second step is carried out when all CPUs except one are inactive and
interrupts are disabled. This prevents any conflicts with code that may
access shared memory.

Co-developed-by: Borislav Petkov (AMD) <bp@alien8.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/05a8c15fb665dbb062b04a8cb3d592a63f235937.1722520012.git.ashish.kalra@amd.com

											
										
										
											2024-08-01 19:14:50 +00:00
+									unsigned int level, cpu;
 									struct ghcb *ghcb;
 									pte_t *pte;
 									if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
 										return;
 									if (!IS_ENABLED(CONFIG_KEXEC_CORE))
 										return;
-												x86/sev: Do not touch VMSA pages during SNP guest memory kdump

When kdump is running makedumpfile to generate vmcore and dump SNP guest
memory it touches the VMSA page of the vCPU executing kdump.

It then results in unrecoverable #NPF/RMP faults as the VMSA page is
marked busy/in-use when the vCPU is running and subsequently a causes
guest softlockup/hang.

Additionally, other APs may be halted in guest mode and their VMSA pages
are marked busy and touching these VMSA pages during guest memory dump
will also cause #NPF.

Issue AP_DESTROY GHCB calls on other APs to ensure they are kicked out
of guest mode and then clear the VMSA bit on their VMSA pages.

If the vCPU running kdump is an AP, mark it's VMSA page as offline to
ensure that makedumpfile excludes that page while dumping guest memory.

Fixes: 3074152e56c9 ("x86/sev: Convert shared memory back to private on kexec")
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Pankaj Gupta <pankaj.gupta@amd.com>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Tested-by: Srikanth Aithal <sraithal@amd.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/20250428214151.155464-1-Ashish.Kalra@amd.com

											
										
										
											2025-04-28 21:41:51 +00:00
+									shutdown_all_aps();
-												x86/sev: Convert shared memory back to private on kexec

SNP guests allocate shared buffers to perform I/O. It is done by
allocating pages normally from the buddy allocator and converting them
to shared with set_memory_decrypted().

The second, kexec-ed, kernel has no idea what memory is converted this
way. It only sees E820_TYPE_RAM.

Accessing shared memory via private mapping will cause unrecoverable RMP
page-faults.

On kexec, walk direct mapping and convert all shared memory back to
private. It makes all RAM private again and second kernel may use it
normally. Additionally, for SNP guests, convert all bss decrypted
section pages back to private.

The conversion occurs in two steps: stopping new conversions and
unsharing all memory. In the case of normal kexec, the stopping of
conversions takes place while scheduling is still functioning. This
allows for waiting until any ongoing conversions are finished. The
second step is carried out when all CPUs except one are inactive and
interrupts are disabled. This prevents any conflicts with code that may
access shared memory.

Co-developed-by: Borislav Petkov (AMD) <bp@alien8.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/05a8c15fb665dbb062b04a8cb3d592a63f235937.1722520012.git.ashish.kalra@amd.com

											
										
										
											2024-08-01 19:14:50 +00:00
+									unshare_all_memory();
 									/*
 									 * Switch to using the MSR protocol to change per-CPU GHCBs to
 									 * private. All the per-CPU GHCBs have been switched back to private,
 									 * so can't do any more GHCB calls to the hypervisor beyond this point
 									 * until the kexec'ed kernel starts running.
 									 */
 									boot_ghcb = NULL;
 									sev_cfg.ghcbs_initialized = false;
 									for_each_possible_cpu(cpu) {
 										data = per_cpu(runtime_data, cpu);
 										ghcb = &data->ghcb_page;
 										pte = lookup_address((unsigned long)ghcb, &level);
 										size = page_level_size(level);
-												x86/sev: Make sure pages are not skipped during kdump

When shared pages are being converted to private during kdump, additional
checks are performed. They include handling the case of a GHCB page being
contained within a huge page.

Currently, this check incorrectly skips a page just below the GHCB page from
being transitioned back to private during kdump preparation.

This skipped page causes a 0x404 #VC exception when it is accessed later while
dumping guest memory for vmcore generation.

Correct the range to be checked for GHCB contained in a huge page.  Also,
ensure that the skipped huge page containing the GHCB page is transitioned
back to private by applying the correct address mask later when changing GHCBs
to private at end of kdump preparation.

  [ bp: Massage commit message. ]

Fixes: 3074152e56c9 ("x86/sev: Convert shared memory back to private on kexec")
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Tested-by: Srikanth Aithal <sraithal@amd.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/20250506183529.289549-1-Ashish.Kalra@amd.com

											
										
										
											2025-05-06 18:35:29 +00:00
+										/* Handle the case of a huge page containing the GHCB page */
 										addr = (unsigned long)ghcb & page_level_mask(level);
 										set_pte_enc(pte, level, (void *)addr);
 										snp_set_memory_private(addr, (size / PAGE_SIZE));
-												x86/sev: Use the SVSM to create a vCPU when not in VMPL0

Using the RMPADJUST instruction, the VMSA attribute can only be changed
at VMPL0. An SVSM will be present when running at VMPL1 or a lower
privilege level.

In that case, use the SVSM_CORE_CREATE_VCPU call or the
SVSM_CORE_DESTROY_VCPU call to perform VMSA attribute changes. Use the
VMPL level supplied by the SVSM for the VMSA when starting the AP.

  [ bp: Fix typo + touchups. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/bcdd95ecabe9723673b9693c7f1533a2b8f17781.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:48 -05:00
+									}
-												x86/sev: Use SEV-SNP AP creation to start secondary CPUs

To provide a more secure way to start APs under SEV-SNP, use the SEV-SNP
AP Creation NAE event. This allows for guest control over the AP register
state rather than trusting the hypervisor with the SEV-ES Jump Table
address.

During native_smp_prepare_cpus(), invoke an SEV-SNP function that, if
SEV-SNP is active, will set/override apic->wakeup_secondary_cpu. This
will allow the SEV-SNP AP Creation NAE event method to be used to boot
the APs. As a result of installing the override when SEV-SNP is active,
this method of starting the APs becomes the required method. The override
function will fail to start the AP if the hypervisor does not have
support for AP creation.

  [ bp: Work in forgotten review comments. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-23-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:32 -06:00
+								}
 								#define __ATTR_BASE		(SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK)
 								#define INIT_CS_ATTRIBS		(__ATTR_BASE | SVM_SELECTOR_READ_MASK | SVM_SELECTOR_CODE_MASK)
 								#define INIT_DS_ATTRIBS		(__ATTR_BASE | SVM_SELECTOR_WRITE_MASK)
 								#define INIT_LDTR_ATTRIBS	(SVM_SELECTOR_P_MASK | 2)
 								#define INIT_TR_ATTRIBS		(SVM_SELECTOR_P_MASK | 3)
-												x86/sev: Take NUMA node into account when allocating memory for per-CPU SEV data

per-CPU SEV data is dominantly accessed from their own local CPUs,
so allocate them node-local to improve performance.

Signed-off-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Nikunj A Dadhania <nikunj@amd.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Link: https://lore.kernel.org/r/20240412030130.49704-1-lirongqing@baidu.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

											
										
										
											2024-04-12 11:01:30 +08:00
+								static void *snp_alloc_vmsa_page(int cpu)
-												x86/sev: Use SEV-SNP AP creation to start secondary CPUs

To provide a more secure way to start APs under SEV-SNP, use the SEV-SNP
AP Creation NAE event. This allows for guest control over the AP register
state rather than trusting the hypervisor with the SEV-ES Jump Table
address.

During native_smp_prepare_cpus(), invoke an SEV-SNP function that, if
SEV-SNP is active, will set/override apic->wakeup_secondary_cpu. This
will allow the SEV-SNP AP Creation NAE event method to be used to boot
the APs. As a result of installing the override when SEV-SNP is active,
this method of starting the APs becomes the required method. The override
function will fail to start the AP if the hypervisor does not have
support for AP creation.

  [ bp: Work in forgotten review comments. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-23-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:32 -06:00
+								{
 									struct page *p;
 									/*
 									 * Allocate VMSA page to work around the SNP erratum where the CPU will
 									 * incorrectly signal an RMP violation #PF if a large page (2MB or 1GB)
 									 * collides with the RMP entry of VMSA page. The recommended workaround
 									 * is to not use a large page.
 									 *
 									 * Allocate an 8k page which is also 8k-aligned.
 									 */
-												x86/sev: Take NUMA node into account when allocating memory for per-CPU SEV data

per-CPU SEV data is dominantly accessed from their own local CPUs,
so allocate them node-local to improve performance.

Signed-off-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Nikunj A Dadhania <nikunj@amd.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Link: https://lore.kernel.org/r/20240412030130.49704-1-lirongqing@baidu.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

											
										
										
											2024-04-12 11:01:30 +08:00
+									p = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1);
-												x86/sev: Use SEV-SNP AP creation to start secondary CPUs

To provide a more secure way to start APs under SEV-SNP, use the SEV-SNP
AP Creation NAE event. This allows for guest control over the AP register
state rather than trusting the hypervisor with the SEV-ES Jump Table
address.

During native_smp_prepare_cpus(), invoke an SEV-SNP function that, if
SEV-SNP is active, will set/override apic->wakeup_secondary_cpu. This
will allow the SEV-SNP AP Creation NAE event method to be used to boot
the APs. As a result of installing the override when SEV-SNP is active,
this method of starting the APs becomes the required method. The override
function will fail to start the AP if the hypervisor does not have
support for AP creation.

  [ bp: Work in forgotten review comments. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-23-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:32 -06:00
+									if (!p)
 										return NULL;
 									split_page(p, 1);
 									/* Free the first 4k. This page may be 2M/1G aligned and cannot be used. */
 									__free_page(p);
 									return page_address(p + 1);
 								}
-												arch/x86: Provide the CPU number in the wakeup AP callback

When starting APs, confidential guests and paravisor guests
need to know the CPU number, and the pattern of using the linear
search has emerged in several places. With N processors that leads
to the O(N^2) time complexity.

Provide the CPU number in the AP wake up callback so that one can
get the CPU number in constant time.

Suggested-by: Michael Kelley <mhklinux@outlook.com>
Signed-off-by: Roman Kisel <romank@linux.microsoft.com>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Michael Kelley <mhklinux@outlook.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://lore.kernel.org/r/20250507182227.7421-3-romank@linux.microsoft.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
Message-ID: <20250507182227.7421-3-romank@linux.microsoft.com>

											
										
										
											2025-05-07 11:22:26 -07:00
+								static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip, unsigned int cpu)
-												x86/sev: Use SEV-SNP AP creation to start secondary CPUs

To provide a more secure way to start APs under SEV-SNP, use the SEV-SNP
AP Creation NAE event. This allows for guest control over the AP register
state rather than trusting the hypervisor with the SEV-ES Jump Table
address.

During native_smp_prepare_cpus(), invoke an SEV-SNP function that, if
SEV-SNP is active, will set/override apic->wakeup_secondary_cpu. This
will allow the SEV-SNP AP Creation NAE event method to be used to boot
the APs. As a result of installing the override when SEV-SNP is active,
this method of starting the APs becomes the required method. The override
function will fail to start the AP if the hypervisor does not have
support for AP creation.

  [ bp: Work in forgotten review comments. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-23-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:32 -06:00
+								{
 									struct sev_es_save_area *cur_vmsa, *vmsa;
-												x86/sev: Use the SVSM to create a vCPU when not in VMPL0

Using the RMPADJUST instruction, the VMSA attribute can only be changed
at VMPL0. An SVSM will be present when running at VMPL1 or a lower
privilege level.

In that case, use the SVSM_CORE_CREATE_VCPU call or the
SVSM_CORE_DESTROY_VCPU call to perform VMSA attribute changes. Use the
VMPL level supplied by the SVSM for the VMSA when starting the AP.

  [ bp: Fix typo + touchups. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/bcdd95ecabe9723673b9693c7f1533a2b8f17781.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:48 -05:00
+									struct svsm_ca *caa;
-												x86/sev: Use SEV-SNP AP creation to start secondary CPUs

To provide a more secure way to start APs under SEV-SNP, use the SEV-SNP
AP Creation NAE event. This allows for guest control over the AP register
state rather than trusting the hypervisor with the SEV-ES Jump Table
address.

During native_smp_prepare_cpus(), invoke an SEV-SNP function that, if
SEV-SNP is active, will set/override apic->wakeup_secondary_cpu. This
will allow the SEV-SNP AP Creation NAE event method to be used to boot
the APs. As a result of installing the override when SEV-SNP is active,
this method of starting the APs becomes the required method. The override
function will fail to start the AP if the hypervisor does not have
support for AP creation.

  [ bp: Work in forgotten review comments. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-23-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:32 -06:00
+									u8 sipi_vector;
-												arch/x86: Provide the CPU number in the wakeup AP callback

When starting APs, confidential guests and paravisor guests
need to know the CPU number, and the pattern of using the linear
search has emerged in several places. With N processors that leads
to the O(N^2) time complexity.

Provide the CPU number in the AP wake up callback so that one can
get the CPU number in constant time.

Suggested-by: Michael Kelley <mhklinux@outlook.com>
Signed-off-by: Roman Kisel <romank@linux.microsoft.com>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Michael Kelley <mhklinux@outlook.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://lore.kernel.org/r/20250507182227.7421-3-romank@linux.microsoft.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
Message-ID: <20250507182227.7421-3-romank@linux.microsoft.com>

											
										
										
											2025-05-07 11:22:26 -07:00
+									int ret;
-												x86/sev: Use SEV-SNP AP creation to start secondary CPUs

To provide a more secure way to start APs under SEV-SNP, use the SEV-SNP
AP Creation NAE event. This allows for guest control over the AP register
state rather than trusting the hypervisor with the SEV-ES Jump Table
address.

During native_smp_prepare_cpus(), invoke an SEV-SNP function that, if
SEV-SNP is active, will set/override apic->wakeup_secondary_cpu. This
will allow the SEV-SNP AP Creation NAE event method to be used to boot
the APs. As a result of installing the override when SEV-SNP is active,
this method of starting the APs becomes the required method. The override
function will fail to start the AP if the hypervisor does not have
support for AP creation.

  [ bp: Work in forgotten review comments. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-23-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:32 -06:00
+									u64 cr4;
 									/*
 									 * The hypervisor SNP feature support check has happened earlier, just check
 									 * the AP_CREATION one here.
 									 */
 									if (!(sev_hv_features & GHCB_HV_FT_SNP_AP_CREATION))
 										return -EOPNOTSUPP;
 									/*
 									 * Verify the desired start IP against the known trampoline start IP
 									 * to catch any future new trampolines that may be introduced that
 									 * would require a new protected guest entry point.
 									 */
 									if (WARN_ONCE(start_ip != real_mode_header->trampoline_start,
 										      "Unsupported SNP start_ip: %lx\n", start_ip))
 										return -EINVAL;
 									/* Override start_ip with known protected guest start IP */
 									start_ip = real_mode_header->sev_es_trampoline_start;
 									cur_vmsa = per_cpu(sev_vmsa, cpu);
 									/*
 									 * A new VMSA is created each time because there is no guarantee that
 									 * the current VMSA is the kernels or that the vCPU is not running. If
 									 * an attempt was done to use the current VMSA with a running vCPU, a
 									 * #VMEXIT of that vCPU would wipe out all of the settings being done
 									 * here.
 									 */
-												x86/sev: Take NUMA node into account when allocating memory for per-CPU SEV data

per-CPU SEV data is dominantly accessed from their own local CPUs,
so allocate them node-local to improve performance.

Signed-off-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Nikunj A Dadhania <nikunj@amd.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Link: https://lore.kernel.org/r/20240412030130.49704-1-lirongqing@baidu.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

											
										
										
											2024-04-12 11:01:30 +08:00
+									vmsa = (struct sev_es_save_area *)snp_alloc_vmsa_page(cpu);
-												x86/sev: Use SEV-SNP AP creation to start secondary CPUs

To provide a more secure way to start APs under SEV-SNP, use the SEV-SNP
AP Creation NAE event. This allows for guest control over the AP register
state rather than trusting the hypervisor with the SEV-ES Jump Table
address.

During native_smp_prepare_cpus(), invoke an SEV-SNP function that, if
SEV-SNP is active, will set/override apic->wakeup_secondary_cpu. This
will allow the SEV-SNP AP Creation NAE event method to be used to boot
the APs. As a result of installing the override when SEV-SNP is active,
this method of starting the APs becomes the required method. The override
function will fail to start the AP if the hypervisor does not have
support for AP creation.

  [ bp: Work in forgotten review comments. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-23-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:32 -06:00
+									if (!vmsa)
 										return -ENOMEM;
-												x86/sev: Use the SVSM to create a vCPU when not in VMPL0

Using the RMPADJUST instruction, the VMSA attribute can only be changed
at VMPL0. An SVSM will be present when running at VMPL1 or a lower
privilege level.

In that case, use the SVSM_CORE_CREATE_VCPU call or the
SVSM_CORE_DESTROY_VCPU call to perform VMSA attribute changes. Use the
VMPL level supplied by the SVSM for the VMSA when starting the AP.

  [ bp: Fix typo + touchups. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/bcdd95ecabe9723673b9693c7f1533a2b8f17781.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:48 -05:00
+									/* If an SVSM is present, the SVSM per-CPU CAA will be !NULL */
 									caa = per_cpu(svsm_caa, cpu);
-												x86/sev: Use SEV-SNP AP creation to start secondary CPUs

To provide a more secure way to start APs under SEV-SNP, use the SEV-SNP
AP Creation NAE event. This allows for guest control over the AP register
state rather than trusting the hypervisor with the SEV-ES Jump Table
address.

During native_smp_prepare_cpus(), invoke an SEV-SNP function that, if
SEV-SNP is active, will set/override apic->wakeup_secondary_cpu. This
will allow the SEV-SNP AP Creation NAE event method to be used to boot
the APs. As a result of installing the override when SEV-SNP is active,
this method of starting the APs becomes the required method. The override
function will fail to start the AP if the hypervisor does not have
support for AP creation.

  [ bp: Work in forgotten review comments. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-23-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:32 -06:00
+									/* CR4 should maintain the MCE value */
 									cr4 = native_read_cr4() & X86_CR4_MCE;
 									/* Set the CS value based on the start_ip converted to a SIPI vector */
 									sipi_vector		= (start_ip >> 12);
 									vmsa->cs.base		= sipi_vector << 12;
 									vmsa->cs.limit		= AP_INIT_CS_LIMIT;
 									vmsa->cs.attrib		= INIT_CS_ATTRIBS;
 									vmsa->cs.selector	= sipi_vector << 8;
 									/* Set the RIP value based on start_ip */
 									vmsa->rip		= start_ip & 0xfff;
 									/* Set AP INIT defaults as documented in the APM */
 									vmsa->ds.limit		= AP_INIT_DS_LIMIT;
 									vmsa->ds.attrib		= INIT_DS_ATTRIBS;
 									vmsa->es		= vmsa->ds;
 									vmsa->fs		= vmsa->ds;
 									vmsa->gs		= vmsa->ds;
 									vmsa->ss		= vmsa->ds;
 									vmsa->gdtr.limit	= AP_INIT_GDTR_LIMIT;
 									vmsa->ldtr.limit	= AP_INIT_LDTR_LIMIT;
 									vmsa->ldtr.attrib	= INIT_LDTR_ATTRIBS;
 									vmsa->idtr.limit	= AP_INIT_IDTR_LIMIT;
 									vmsa->tr.limit		= AP_INIT_TR_LIMIT;
 									vmsa->tr.attrib		= INIT_TR_ATTRIBS;
 									vmsa->cr4		= cr4;
 									vmsa->cr0		= AP_INIT_CR0_DEFAULT;
 									vmsa->dr7		= DR7_RESET_VALUE;
 									vmsa->dr6		= AP_INIT_DR6_DEFAULT;
 									vmsa->rflags		= AP_INIT_RFLAGS_DEFAULT;
 									vmsa->g_pat		= AP_INIT_GPAT_DEFAULT;
 									vmsa->xcr0		= AP_INIT_XCR0_DEFAULT;
 									vmsa->mxcsr		= AP_INIT_MXCSR_DEFAULT;
 									vmsa->x87_ftw		= AP_INIT_X87_FTW_DEFAULT;
 									vmsa->x87_fcw		= AP_INIT_X87_FCW_DEFAULT;
 									/* SVME must be set. */
 									vmsa->efer		= EFER_SVME;
 									/*
 									 * Set the SNP-specific fields for this VMSA:
 									 *   VMPL level
 									 *   SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits)
 									 */
-												x86/sev: Use the SVSM to create a vCPU when not in VMPL0

Using the RMPADJUST instruction, the VMSA attribute can only be changed
at VMPL0. An SVSM will be present when running at VMPL1 or a lower
privilege level.

In that case, use the SVSM_CORE_CREATE_VCPU call or the
SVSM_CORE_DESTROY_VCPU call to perform VMSA attribute changes. Use the
VMPL level supplied by the SVSM for the VMSA when starting the AP.

  [ bp: Fix typo + touchups. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/bcdd95ecabe9723673b9693c7f1533a2b8f17781.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:48 -05:00
+									vmsa->vmpl		= snp_vmpl;
-												x86/sev: Use SEV-SNP AP creation to start secondary CPUs

To provide a more secure way to start APs under SEV-SNP, use the SEV-SNP
AP Creation NAE event. This allows for guest control over the AP register
state rather than trusting the hypervisor with the SEV-ES Jump Table
address.

During native_smp_prepare_cpus(), invoke an SEV-SNP function that, if
SEV-SNP is active, will set/override apic->wakeup_secondary_cpu. This
will allow the SEV-SNP AP Creation NAE event method to be used to boot
the APs. As a result of installing the override when SEV-SNP is active,
this method of starting the APs becomes the required method. The override
function will fail to start the AP if the hypervisor does not have
support for AP creation.

  [ bp: Work in forgotten review comments. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-23-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:32 -06:00
+									vmsa->sev_features	= sev_status >> 2;
-												x86/sev: Add Secure TSC support for SNP guests

Add support for Secure TSC in SNP-enabled guests. Secure TSC allows guests
to securely use RDTSC/RDTSCP instructions, ensuring that the parameters used
cannot be altered by the hypervisor once the guest is launched.

Secure TSC-enabled guests need to query TSC information from the AMD Security
Processor. This communication channel is encrypted between the AMD Security
Processor and the guest, with the hypervisor acting merely as a conduit to
deliver the guest messages to the AMD Security Processor. Each message is
protected with AEAD (AES-256 GCM).

  [ bp: Zap a stray newline over amd_cc_platform_has() while at it,
    simplify CC_ATTR_GUEST_SNP_SECURE_TSC check ]

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20250106124633.1418972-6-nikunj@amd.com

											
										
										
											2025-01-06 18:16:25 +05:30
+									/* Populate AP's TSC scale/offset to get accurate TSC values. */
 									if (cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) {
 										vmsa->tsc_scale = snp_tsc_scale;
 										vmsa->tsc_offset = snp_tsc_offset;
 									}
-												x86/sev: Use SEV-SNP AP creation to start secondary CPUs

To provide a more secure way to start APs under SEV-SNP, use the SEV-SNP
AP Creation NAE event. This allows for guest control over the AP register
state rather than trusting the hypervisor with the SEV-ES Jump Table
address.

During native_smp_prepare_cpus(), invoke an SEV-SNP function that, if
SEV-SNP is active, will set/override apic->wakeup_secondary_cpu. This
will allow the SEV-SNP AP Creation NAE event method to be used to boot
the APs. As a result of installing the override when SEV-SNP is active,
this method of starting the APs becomes the required method. The override
function will fail to start the AP if the hypervisor does not have
support for AP creation.

  [ bp: Work in forgotten review comments. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-23-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:32 -06:00
+									/* Switch the page over to a VMSA page now that it is initialized */
-												x86/sev: Use the SVSM to create a vCPU when not in VMPL0

Using the RMPADJUST instruction, the VMSA attribute can only be changed
at VMPL0. An SVSM will be present when running at VMPL1 or a lower
privilege level.

In that case, use the SVSM_CORE_CREATE_VCPU call or the
SVSM_CORE_DESTROY_VCPU call to perform VMSA attribute changes. Use the
VMPL level supplied by the SVSM for the VMSA when starting the AP.

  [ bp: Fix typo + touchups. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/bcdd95ecabe9723673b9693c7f1533a2b8f17781.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:48 -05:00
+									ret = snp_set_vmsa(vmsa, caa, apic_id, true);
-												x86/sev: Use SEV-SNP AP creation to start secondary CPUs

To provide a more secure way to start APs under SEV-SNP, use the SEV-SNP
AP Creation NAE event. This allows for guest control over the AP register
state rather than trusting the hypervisor with the SEV-ES Jump Table
address.

During native_smp_prepare_cpus(), invoke an SEV-SNP function that, if
SEV-SNP is active, will set/override apic->wakeup_secondary_cpu. This
will allow the SEV-SNP AP Creation NAE event method to be used to boot
the APs. As a result of installing the override when SEV-SNP is active,
this method of starting the APs becomes the required method. The override
function will fail to start the AP if the hypervisor does not have
support for AP creation.

  [ bp: Work in forgotten review comments. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-23-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:32 -06:00
+									if (ret) {
 										pr_err("set VMSA page failed (%u)\n", ret);
 										free_page((unsigned long)vmsa);
 										return -EINVAL;
 									}
 									/* Issue VMGEXIT AP Creation NAE event */
-												x86/sev: Do not touch VMSA pages during SNP guest memory kdump

When kdump is running makedumpfile to generate vmcore and dump SNP guest
memory it touches the VMSA page of the vCPU executing kdump.

It then results in unrecoverable #NPF/RMP faults as the VMSA page is
marked busy/in-use when the vCPU is running and subsequently a causes
guest softlockup/hang.

Additionally, other APs may be halted in guest mode and their VMSA pages
are marked busy and touching these VMSA pages during guest memory dump
will also cause #NPF.

Issue AP_DESTROY GHCB calls on other APs to ensure they are kicked out
of guest mode and then clear the VMSA bit on their VMSA pages.

If the vCPU running kdump is an AP, mark it's VMSA page as offline to
ensure that makedumpfile excludes that page while dumping guest memory.

Fixes: 3074152e56c9 ("x86/sev: Convert shared memory back to private on kexec")
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Pankaj Gupta <pankaj.gupta@amd.com>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Tested-by: Srikanth Aithal <sraithal@amd.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/20250428214151.155464-1-Ashish.Kalra@amd.com

											
										
										
											2025-04-28 21:41:51 +00:00
+									ret = vmgexit_ap_control(SVM_VMGEXIT_AP_CREATE, vmsa, apic_id);
-												x86/sev: Use SEV-SNP AP creation to start secondary CPUs

To provide a more secure way to start APs under SEV-SNP, use the SEV-SNP
AP Creation NAE event. This allows for guest control over the AP register
state rather than trusting the hypervisor with the SEV-ES Jump Table
address.

During native_smp_prepare_cpus(), invoke an SEV-SNP function that, if
SEV-SNP is active, will set/override apic->wakeup_secondary_cpu. This
will allow the SEV-SNP AP Creation NAE event method to be used to boot
the APs. As a result of installing the override when SEV-SNP is active,
this method of starting the APs becomes the required method. The override
function will fail to start the AP if the hypervisor does not have
support for AP creation.

  [ bp: Work in forgotten review comments. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-23-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:32 -06:00
+									if (ret) {
-												x86/sev: Use the SVSM to create a vCPU when not in VMPL0

Using the RMPADJUST instruction, the VMSA attribute can only be changed
at VMPL0. An SVSM will be present when running at VMPL1 or a lower
privilege level.

In that case, use the SVSM_CORE_CREATE_VCPU call or the
SVSM_CORE_DESTROY_VCPU call to perform VMSA attribute changes. Use the
VMPL level supplied by the SVSM for the VMSA when starting the AP.

  [ bp: Fix typo + touchups. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/bcdd95ecabe9723673b9693c7f1533a2b8f17781.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:48 -05:00
+										snp_cleanup_vmsa(vmsa, apic_id);
-												x86/sev: Use SEV-SNP AP creation to start secondary CPUs

To provide a more secure way to start APs under SEV-SNP, use the SEV-SNP
AP Creation NAE event. This allows for guest control over the AP register
state rather than trusting the hypervisor with the SEV-ES Jump Table
address.

During native_smp_prepare_cpus(), invoke an SEV-SNP function that, if
SEV-SNP is active, will set/override apic->wakeup_secondary_cpu. This
will allow the SEV-SNP AP Creation NAE event method to be used to boot
the APs. As a result of installing the override when SEV-SNP is active,
this method of starting the APs becomes the required method. The override
function will fail to start the AP if the hypervisor does not have
support for AP creation.

  [ bp: Work in forgotten review comments. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-23-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:32 -06:00
+										vmsa = NULL;
 									}
 									/* Free up any previous VMSA page */
 									if (cur_vmsa)
-												x86/sev: Use the SVSM to create a vCPU when not in VMPL0

Using the RMPADJUST instruction, the VMSA attribute can only be changed
at VMPL0. An SVSM will be present when running at VMPL1 or a lower
privilege level.

In that case, use the SVSM_CORE_CREATE_VCPU call or the
SVSM_CORE_DESTROY_VCPU call to perform VMSA attribute changes. Use the
VMPL level supplied by the SVSM for the VMSA when starting the AP.

  [ bp: Fix typo + touchups. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/bcdd95ecabe9723673b9693c7f1533a2b8f17781.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:48 -05:00
+										snp_cleanup_vmsa(cur_vmsa, apic_id);
-												x86/sev: Use SEV-SNP AP creation to start secondary CPUs

To provide a more secure way to start APs under SEV-SNP, use the SEV-SNP
AP Creation NAE event. This allows for guest control over the AP register
state rather than trusting the hypervisor with the SEV-ES Jump Table
address.

During native_smp_prepare_cpus(), invoke an SEV-SNP function that, if
SEV-SNP is active, will set/override apic->wakeup_secondary_cpu. This
will allow the SEV-SNP AP Creation NAE event method to be used to boot
the APs. As a result of installing the override when SEV-SNP is active,
this method of starting the APs becomes the required method. The override
function will fail to start the AP if the hypervisor does not have
support for AP creation.

  [ bp: Work in forgotten review comments. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-23-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:32 -06:00
 									/* Record the current VMSA page */
 									per_cpu(sev_vmsa, cpu) = vmsa;
 									return ret;
 								}
-												x86/apic: Convert other overrides to apic_update_callback()

Convert all places which just assign a new function directly to the apic
callback to use apic_update_callback() which prepares for using static
calls.

Mark snp_set_wakeup_secondary_cpu() and kvm_setup_pv_ipi() __init, as they
are only invoked from init code and otherwise trigger a section mismatch as
they are now invoking a __init function.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Sohil Mehta <sohil.mehta@intel.com>
Tested-by: Juergen Gross <jgross@suse.com> # Xen PV (dom0 and unpriv. guest)

											
										
										
											2023-08-08 15:04:20 -07:00
+								void __init snp_set_wakeup_secondary_cpu(void)
-												x86/sev: Use SEV-SNP AP creation to start secondary CPUs

To provide a more secure way to start APs under SEV-SNP, use the SEV-SNP
AP Creation NAE event. This allows for guest control over the AP register
state rather than trusting the hypervisor with the SEV-ES Jump Table
address.

During native_smp_prepare_cpus(), invoke an SEV-SNP function that, if
SEV-SNP is active, will set/override apic->wakeup_secondary_cpu. This
will allow the SEV-SNP AP Creation NAE event method to be used to boot
the APs. As a result of installing the override when SEV-SNP is active,
this method of starting the APs becomes the required method. The override
function will fail to start the AP if the hypervisor does not have
support for AP creation.

  [ bp: Work in forgotten review comments. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-23-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:32 -06:00
+								{
 									if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
 										return;
 									/*
 									 * Always set this override if SNP is enabled. This makes it the
 									 * required method to start APs under SNP. If the hypervisor does
 									 * not support AP creation, then no APs will be started.
 									 */
-												x86/apic: Convert other overrides to apic_update_callback()

Convert all places which just assign a new function directly to the apic
callback to use apic_update_callback() which prepares for using static
calls.

Mark snp_set_wakeup_secondary_cpu() and kvm_setup_pv_ipi() __init, as they
are only invoked from init code and otherwise trigger a section mismatch as
they are now invoking a __init function.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Sohil Mehta <sohil.mehta@intel.com>
Tested-by: Juergen Gross <jgross@suse.com> # Xen PV (dom0 and unpriv. guest)

											
										
										
											2023-08-08 15:04:20 -07:00
+									apic_update_callback(wakeup_secondary_cpu, wakeup_cpu_via_vmgexit);
-												x86/sev: Use SEV-SNP AP creation to start secondary CPUs

To provide a more secure way to start APs under SEV-SNP, use the SEV-SNP
AP Creation NAE event. This allows for guest control over the AP register
state rather than trusting the hypervisor with the SEV-ES Jump Table
address.

During native_smp_prepare_cpus(), invoke an SEV-SNP function that, if
SEV-SNP is active, will set/override apic->wakeup_secondary_cpu. This
will allow the SEV-SNP AP Creation NAE event method to be used to boot
the APs. As a result of installing the override when SEV-SNP is active,
this method of starting the APs becomes the required method. The override
function will fail to start the AP if the hypervisor does not have
support for AP creation.

  [ bp: Work in forgotten review comments. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-23-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:32 -06:00
+								}
-												x86/sev: Add missing __init annotations to SEV init routines

Currently, get_secrets_page() is only reachable from the following call
chain:

  __init snp_init_platform_device():
    get_secrets_page()

so mark it as __init as well. This is also needed since it calls
early_memremap(), which is also an __init routine.

Similarly, get_jump_table_addr() is only reachable from the following
call chain:

  __init setup_real_mode():
    sme_sev_setup_real_mode():
      sev_es_setup_ap_jump_table():
        get_jump_table_addr()

so mark get_jump_table_addr() and everything up that call chain as
__init as well. This is also needed since future patches will add a
call to get_secrets_page(), which needs to be __init due to the reasons
stated above.

Suggested-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220422135624.114172-2-michael.roth@amd.com

											
										
										
											2022-04-22 08:56:23 -05:00
+								int __init sev_es_setup_ap_jump_table(struct real_mode_header *rmh)
-												x86/realmode: Setup AP jump table

As part of the GHCB specification, the booting of APs under SEV-ES
requires an AP jump table when transitioning from one layer of code to
another (e.g. when going from UEFI to the OS). As a result, each layer
that parks an AP must provide the physical address of an AP jump table
to the next layer via the hypervisor.

Upon booting of the kernel, read the AP jump table address from the
hypervisor. Under SEV-ES, APs are started using the INIT-SIPI-SIPI
sequence. Before issuing the first SIPI request for an AP, the start
CS and IP is programmed into the AP jump table. Upon issuing the SIPI
request, the AP will awaken and jump to that start CS:IP address.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
[ jroedel@suse.de: - Adapted to different code base
                   - Moved AP table setup from SIPI sending path to
		     real-mode setup code
		   - Fix sparse warnings ]
Co-developed-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-67-joro@8bytes.org

											
										
										
											2020-09-07 15:16:07 +02:00
+								{
 									u16 startup_cs, startup_ip;
 									phys_addr_t jump_table_pa;
 									u64 jump_table_addr;
 									u16 __iomem *jump_table;
 									jump_table_addr = get_jump_table_addr();
 									/* On UP guests there is no jump table so this is not a failure */
 									if (!jump_table_addr)
 										return 0;
 									/* Check if AP Jump Table is page-aligned */
 									if (jump_table_addr & ~PAGE_MASK)
 										return -EINVAL;
 									jump_table_pa = jump_table_addr & PAGE_MASK;
 									startup_cs = (u16)(rmh->trampoline_start >> 4);
 									startup_ip = (u16)(rmh->sev_es_trampoline_start -
 											   rmh->trampoline_start);
 									jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE);
 									if (!jump_table)
 										return -EIO;
 									writew(startup_ip, &jump_table[0]);
 									writew(startup_cs, &jump_table[1]);
 									iounmap(jump_table);
 									return 0;
 								}
-												x86/efi: Add GHCB mappings when SEV-ES is active

Calling down to EFI runtime services can result in the firmware
performing VMGEXIT calls. The firmware is likely to use the GHCB of the
OS (e.g., for setting EFI variables), so each GHCB in the system needs
to be identity-mapped in the EFI page tables, as unencrypted, to avoid
page faults.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
[ jroedel@suse.de: Moved GHCB mapping loop to sev-es.c ]
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lkml.kernel.org/r/20200907131613.12703-72-joro@8bytes.org

											
										
										
											2020-09-07 15:16:12 +02:00
+								/*
 								 * This is needed by the OVMF UEFI firmware which will use whatever it finds in
 								 * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu
 								 * runtime GHCBs used by the kernel are also mapped in the EFI page-table.
-												x86/sev: Let sev_es_efi_map_ghcbs() map the CA pages too

OVMF EFI firmware needs access to the CA page to do SVSM protocol calls. For
example, when the SVSM implements an EFI variable store, such calls will be
necessary.

So add that to sev_es_efi_map_ghcbs() and also rename the function to reflect
the additional job it is doing now.

  [ bp: Massage. ]

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/20250626114014.373748-4-kraxel@redhat.com

											
										
										
											2025-06-26 13:40:13 +02:00
+								 *
 								 * When running under SVSM the CA page is needed too, so map it as well.
-												x86/efi: Add GHCB mappings when SEV-ES is active

Calling down to EFI runtime services can result in the firmware
performing VMGEXIT calls. The firmware is likely to use the GHCB of the
OS (e.g., for setting EFI variables), so each GHCB in the system needs
to be identity-mapped in the EFI page tables, as unencrypted, to avoid
page faults.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
[ jroedel@suse.de: Moved GHCB mapping loop to sev-es.c ]
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lkml.kernel.org/r/20200907131613.12703-72-joro@8bytes.org

											
										
										
											2020-09-07 15:16:12 +02:00
+								 */
-												x86/sev: Let sev_es_efi_map_ghcbs() map the CA pages too

OVMF EFI firmware needs access to the CA page to do SVSM protocol calls. For
example, when the SVSM implements an EFI variable store, such calls will be
necessary.

So add that to sev_es_efi_map_ghcbs() and also rename the function to reflect
the additional job it is doing now.

  [ bp: Massage. ]

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/20250626114014.373748-4-kraxel@redhat.com

											
										
										
											2025-06-26 13:40:13 +02:00
+								int __init sev_es_efi_map_ghcbs_cas(pgd_t *pgd)
-												x86/efi: Add GHCB mappings when SEV-ES is active

Calling down to EFI runtime services can result in the firmware
performing VMGEXIT calls. The firmware is likely to use the GHCB of the
OS (e.g., for setting EFI variables), so each GHCB in the system needs
to be identity-mapped in the EFI page tables, as unencrypted, to avoid
page faults.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
[ jroedel@suse.de: Moved GHCB mapping loop to sev-es.c ]
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lkml.kernel.org/r/20200907131613.12703-72-joro@8bytes.org

											
										
										
											2020-09-07 15:16:12 +02:00
+								{
-												x86/sev: Let sev_es_efi_map_ghcbs() map the CA pages too

OVMF EFI firmware needs access to the CA page to do SVSM protocol calls. For
example, when the SVSM implements an EFI variable store, such calls will be
necessary.

So add that to sev_es_efi_map_ghcbs() and also rename the function to reflect
the additional job it is doing now.

  [ bp: Massage. ]

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/20250626114014.373748-4-kraxel@redhat.com

											
										
										
											2025-06-26 13:40:13 +02:00
+									unsigned long address, pflags, pflags_enc;
-												x86/efi: Add GHCB mappings when SEV-ES is active

Calling down to EFI runtime services can result in the firmware
performing VMGEXIT calls. The firmware is likely to use the GHCB of the
OS (e.g., for setting EFI variables), so each GHCB in the system needs
to be identity-mapped in the EFI page tables, as unencrypted, to avoid
page faults.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
[ jroedel@suse.de: Moved GHCB mapping loop to sev-es.c ]
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lkml.kernel.org/r/20200907131613.12703-72-joro@8bytes.org

											
										
										
											2020-09-07 15:16:12 +02:00
+									struct sev_es_runtime_data *data;
 									int cpu;
 									u64 pfn;
-												x86/sev: Replace occurrences of sev_es_active() with cc_platform_has()

Replace uses of sev_es_active() with the more generic cc_platform_has()
using CC_ATTR_GUEST_STATE_ENCRYPT. If future support is added for other
memory encyrption techonologies, the use of CC_ATTR_GUEST_STATE_ENCRYPT
can be updated, as required.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20210928191009.32551-8-bp@alien8.de

											
										
										
											2021-09-08 17:58:38 -05:00
+									if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
-												x86/efi: Add GHCB mappings when SEV-ES is active

Calling down to EFI runtime services can result in the firmware
performing VMGEXIT calls. The firmware is likely to use the GHCB of the
OS (e.g., for setting EFI variables), so each GHCB in the system needs
to be identity-mapped in the EFI page tables, as unencrypted, to avoid
page faults.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
[ jroedel@suse.de: Moved GHCB mapping loop to sev-es.c ]
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lkml.kernel.org/r/20200907131613.12703-72-joro@8bytes.org

											
										
										
											2020-09-07 15:16:12 +02:00
+										return 0;
 									pflags = _PAGE_NX | _PAGE_RW;
-												x86/sev: Let sev_es_efi_map_ghcbs() map the CA pages too

OVMF EFI firmware needs access to the CA page to do SVSM protocol calls. For
example, when the SVSM implements an EFI variable store, such calls will be
necessary.

So add that to sev_es_efi_map_ghcbs() and also rename the function to reflect
the additional job it is doing now.

  [ bp: Massage. ]

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/20250626114014.373748-4-kraxel@redhat.com

											
										
										
											2025-06-26 13:40:13 +02:00
+									pflags_enc = cc_mkenc(pflags);
-												x86/efi: Add GHCB mappings when SEV-ES is active

Calling down to EFI runtime services can result in the firmware
performing VMGEXIT calls. The firmware is likely to use the GHCB of the
OS (e.g., for setting EFI variables), so each GHCB in the system needs
to be identity-mapped in the EFI page tables, as unencrypted, to avoid
page faults.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
[ jroedel@suse.de: Moved GHCB mapping loop to sev-es.c ]
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lkml.kernel.org/r/20200907131613.12703-72-joro@8bytes.org

											
										
										
											2020-09-07 15:16:12 +02:00
 									for_each_possible_cpu(cpu) {
 										data = per_cpu(runtime_data, cpu);
 										address = __pa(&data->ghcb_page);
 										pfn = address >> PAGE_SHIFT;
 										if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags))
 											return 1;
-												x86/sev: Let sev_es_efi_map_ghcbs() map the CA pages too

OVMF EFI firmware needs access to the CA page to do SVSM protocol calls. For
example, when the SVSM implements an EFI variable store, such calls will be
necessary.

So add that to sev_es_efi_map_ghcbs() and also rename the function to reflect
the additional job it is doing now.

  [ bp: Massage. ]

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/20250626114014.373748-4-kraxel@redhat.com

											
										
										
											2025-06-26 13:40:13 +02:00
 										if (snp_vmpl) {
 											address = per_cpu(svsm_caa_pa, cpu);
 											if (!address)
 												return 1;
 											pfn = address >> PAGE_SHIFT;
 											if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags_enc))
 												return 1;
 										}
-												x86/efi: Add GHCB mappings when SEV-ES is active

Calling down to EFI runtime services can result in the firmware
performing VMGEXIT calls. The firmware is likely to use the GHCB of the
OS (e.g., for setting EFI variables), so each GHCB in the system needs
to be identity-mapped in the EFI page tables, as unencrypted, to avoid
page faults.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
[ jroedel@suse.de: Moved GHCB mapping loop to sev-es.c ]
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lkml.kernel.org/r/20200907131613.12703-72-joro@8bytes.org

											
										
										
											2020-09-07 15:16:12 +02:00
+									}
 									return 0;
 								}
-												x86/sev: Register GHCB memory when SEV-SNP is active

The SEV-SNP guest is required by the GHCB spec to register the GHCB's
Guest Physical Address (GPA). This is because the hypervisor may prefer
that a guest uses a consistent and/or specific GPA for the GHCB associated
with a vCPU. For more information, see the GHCB specification section
"GHCB GPA Registration".

  [ bp: Cleanup comments. ]

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-18-brijesh.singh@amd.com

											
										
										
											2022-02-09 12:10:11 -06:00
+								static void snp_register_per_cpu_ghcb(void)
-												x86/sev-es: Setup GHCB-based boot #VC handler

Add the infrastructure to handle #VC exceptions when the kernel runs on
virtual addresses and has mapped a GHCB. This handler will be used until
the runtime #VC handler takes over.

Since the handler runs very early, disable instrumentation for sev-es.c.

 [ bp: Make vc_ghcb_invalidate() __always_inline so that it can be
   inlined in noinstr functions like __sev_es_nmi_complete(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200908123816.GB3764@8bytes.org

											
										
										
											2020-09-08 14:38:16 +02:00
+								{
-												x86/sev: Register GHCB memory when SEV-SNP is active

The SEV-SNP guest is required by the GHCB spec to register the GHCB's
Guest Physical Address (GPA). This is because the hypervisor may prefer
that a guest uses a consistent and/or specific GPA for the GHCB associated
with a vCPU. For more information, see the GHCB specification section
"GHCB GPA Registration".

  [ bp: Cleanup comments. ]

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-18-brijesh.singh@amd.com

											
										
										
											2022-02-09 12:10:11 -06:00
+									struct sev_es_runtime_data *data;
 									struct ghcb *ghcb;
 									data = this_cpu_read(runtime_data);
 									ghcb = &data->ghcb_page;
 									snp_register_ghcb_early(__pa(ghcb));
 								}
 								void setup_ghcb(void)
 								{
 									if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
 										return;
 									/*
 									 * Check whether the runtime #VC exception handler is active. It uses
 									 * the per-CPU GHCB page which is set up by sev_es_init_vc_handling().
 									 *
 									 * If SNP is active, register the per-CPU GHCB page so that the runtime
 									 * exception handler can use it.
 									 */
 									if (initial_vc_handler == (unsigned long)kernel_exc_vmm_communication) {
 										if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
 											snp_register_per_cpu_ghcb();
-												x86/sev: Allow for use of the early boot GHCB for PSC requests

Using a GHCB for a page stage change (as opposed to the MSR protocol)
allows for multiple pages to be processed in a single request. In prep
for early PSC requests in support of unaccepted memory, update the
invocation of vmgexit_psc() to be able to use the early boot GHCB and not
just the per-CPU GHCB structure.

In order to use the proper GHCB (early boot vs per-CPU), set a flag that
indicates when the per-CPU GHCBs are available and registered. For APs,
the per-CPU GHCBs are created before they are started and registered upon
startup, so this flag can be used globally for the BSP and APs instead of
creating a per-CPU flag. This will allow for a significant reduction in
the number of MSR protocol page state change requests when accepting
memory.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/d6cbb21f87f81eb8282dd3bf6c34d9698c8a4bbc.1686063086.git.thomas.lendacky@amd.com

											
										
										
											2023-06-06 09:51:24 -05:00
+										sev_cfg.ghcbs_initialized = true;
-												x86/sev: Register GHCB memory when SEV-SNP is active

The SEV-SNP guest is required by the GHCB spec to register the GHCB's
Guest Physical Address (GPA). This is because the hypervisor may prefer
that a guest uses a consistent and/or specific GPA for the GHCB associated
with a vCPU. For more information, see the GHCB specification section
"GHCB GPA Registration".

  [ bp: Cleanup comments. ]

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-18-brijesh.singh@amd.com

											
										
										
											2022-02-09 12:10:11 -06:00
+										return;
 									}
-												x86/sev-es: Setup GHCB-based boot #VC handler

Add the infrastructure to handle #VC exceptions when the kernel runs on
virtual addresses and has mapped a GHCB. This handler will be used until
the runtime #VC handler takes over.

Since the handler runs very early, disable instrumentation for sev-es.c.

 [ bp: Make vc_ghcb_invalidate() __always_inline so that it can be
   inlined in noinstr functions like __sev_es_nmi_complete(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200908123816.GB3764@8bytes.org

											
										
										
											2020-09-08 14:38:16 +02:00
-												x86/sev: Fix kernel crash due to late update to read-only ghcb_version

A write-access violation page fault kernel crash was observed while running
cpuhotplug LTP testcases on SEV-ES enabled systems. The crash was
observed during hotplug, after the CPU was offlined and the process
was migrated to different CPU. setup_ghcb() is called again which
tries to update ghcb_version in sev_es_negotiate_protocol(). Ideally this
is a read_only variable which is initialised during booting.

Trying to write it results in a pagefault:

  BUG: unable to handle page fault for address: ffffffffba556e70
  #PF: supervisor write access in kernel mode
  #PF: error_code(0x0003) - permissions violation
  [ ...]
  Call Trace:
   <TASK>
   ? __die_body.cold+0x1a/0x1f
   ? __die+0x2a/0x35
   ? page_fault_oops+0x10c/0x270
   ? setup_ghcb+0x71/0x100
   ? __x86_return_thunk+0x5/0x6
   ? search_exception_tables+0x60/0x70
   ? __x86_return_thunk+0x5/0x6
   ? fixup_exception+0x27/0x320
   ? kernelmode_fixup_or_oops+0xa2/0x120
   ? __bad_area_nosemaphore+0x16a/0x1b0
   ? kernel_exc_vmm_communication+0x60/0xb0
   ? bad_area_nosemaphore+0x16/0x20
   ? do_kern_addr_fault+0x7a/0x90
   ? exc_page_fault+0xbd/0x160
   ? asm_exc_page_fault+0x27/0x30
   ? setup_ghcb+0x71/0x100
   ? setup_ghcb+0xe/0x100
   cpu_init_exception_handling+0x1b9/0x1f0

The fix is to call sev_es_negotiate_protocol() only in the BSP boot phase,
and it only needs to be done once in any case.

[ mingo: Refined the changelog. ]

Fixes: 95d33bfaa3e1 ("x86/sev: Register GHCB memory when SEV-SNP is active")
Suggested-by: Tom Lendacky <thomas.lendacky@amd.com>
Co-developed-by: Bo Gan <bo.gan@broadcom.com>
Signed-off-by: Bo Gan <bo.gan@broadcom.com>
Signed-off-by: Ashwin Dayanand Kamat <ashwin.kamat@broadcom.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/1701254429-18250-1-git-send-email-kashwindayan@vmware.com

											
										
										
											2023-11-29 16:10:29 +05:30
+									/*
 									 * Make sure the hypervisor talks a supported protocol.
 									 * This gets called only in the BSP boot phase.
 									 */
 									if (!sev_es_negotiate_protocol())
 										sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
-												x86/sev-es: Setup GHCB-based boot #VC handler

Add the infrastructure to handle #VC exceptions when the kernel runs on
virtual addresses and has mapped a GHCB. This handler will be used until
the runtime #VC handler takes over.

Since the handler runs very early, disable instrumentation for sev-es.c.

 [ bp: Make vc_ghcb_invalidate() __always_inline so that it can be
   inlined in noinstr functions like __sev_es_nmi_complete(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200908123816.GB3764@8bytes.org

											
										
										
											2020-09-08 14:38:16 +02:00
+									/*
 									 * Clear the boot_ghcb. The first exception comes in before the bss
 									 * section is cleared.
 									 */
 									memset(&boot_ghcb_page, 0, PAGE_SIZE);
 									/* Alright - Make the boot-ghcb public */
 									boot_ghcb = &boot_ghcb_page;
-												x86/sev: Register GHCB memory when SEV-SNP is active

The SEV-SNP guest is required by the GHCB spec to register the GHCB's
Guest Physical Address (GPA). This is because the hypervisor may prefer
that a guest uses a consistent and/or specific GPA for the GHCB associated
with a vCPU. For more information, see the GHCB specification section
"GHCB GPA Registration".

  [ bp: Cleanup comments. ]

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-18-brijesh.singh@amd.com

											
										
										
											2022-02-09 12:10:11 -06:00
+									/* SNP guest requires that GHCB GPA must be registered. */
 									if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
 										snp_register_ghcb_early(__pa(&boot_ghcb_page));
-												x86/sev-es: Setup GHCB-based boot #VC handler

Add the infrastructure to handle #VC exceptions when the kernel runs on
virtual addresses and has mapped a GHCB. This handler will be used until
the runtime #VC handler takes over.

Since the handler runs very early, disable instrumentation for sev-es.c.

 [ bp: Make vc_ghcb_invalidate() __always_inline so that it can be
   inlined in noinstr functions like __sev_es_nmi_complete(). ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200908123816.GB3764@8bytes.org

											
										
										
											2020-09-08 14:38:16 +02:00
+								}
-												x86/sev-es: Support CPU offline/online

Add a play_dead handler when running under SEV-ES. This is needed
because the hypervisor can't deliver an SIPI request to restart the AP.
Instead, the kernel has to issue a VMGEXIT to halt the VCPU until the
hypervisor wakes it up again.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-70-joro@8bytes.org

											
										
										
											2020-09-07 15:16:10 +02:00
+								#ifdef CONFIG_HOTPLUG_CPU
 								static void sev_es_ap_hlt_loop(void)
 								{
 									struct ghcb_state state;
 									struct ghcb *ghcb;
-												x86/sev: Make sure IRQs are disabled while GHCB is active

The #VC handler only cares about IRQs being disabled while the GHCB is
active, as it must not be interrupted by something which could cause
another #VC while it holds the GHCB (NMI is the exception for which the
backup GHCB exits).

Make sure nothing interrupts the code path while the GHCB is active
by making sure that callers of __sev_{get,put}_ghcb() have disabled
interrupts upfront.

 [ bp: Massage commit message. ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210618115409.22735-2-joro@8bytes.org

											
										
										
											2021-06-18 13:54:08 +02:00
+									ghcb = __sev_get_ghcb(&state);
-												x86/sev-es: Support CPU offline/online

Add a play_dead handler when running under SEV-ES. This is needed
because the hypervisor can't deliver an SIPI request to restart the AP.
Instead, the kernel has to issue a VMGEXIT to halt the VCPU until the
hypervisor wakes it up again.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-70-joro@8bytes.org

											
										
										
											2020-09-07 15:16:10 +02:00
 									while (true) {
 										vc_ghcb_invalidate(ghcb);
 										ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP);
 										ghcb_set_sw_exit_info_1(ghcb, 0);
 										ghcb_set_sw_exit_info_2(ghcb, 0);
 										sev_es_wr_ghcb_msr(__pa(ghcb));
 										VMGEXIT();
 										/* Wakeup signal? */
 										if (ghcb_sw_exit_info_2_is_valid(ghcb) &&
 										    ghcb->save.sw_exit_info_2)
 											break;
 									}
-												x86/sev: Make sure IRQs are disabled while GHCB is active

The #VC handler only cares about IRQs being disabled while the GHCB is
active, as it must not be interrupted by something which could cause
another #VC while it holds the GHCB (NMI is the exception for which the
backup GHCB exits).

Make sure nothing interrupts the code path while the GHCB is active
by making sure that callers of __sev_{get,put}_ghcb() have disabled
interrupts upfront.

 [ bp: Massage commit message. ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210618115409.22735-2-joro@8bytes.org

											
										
										
											2021-06-18 13:54:08 +02:00
+									__sev_put_ghcb(&state);
-												x86/sev-es: Support CPU offline/online

Add a play_dead handler when running under SEV-ES. This is needed
because the hypervisor can't deliver an SIPI request to restart the AP.
Instead, the kernel has to issue a VMGEXIT to halt the VCPU until the
hypervisor wakes it up again.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-70-joro@8bytes.org

											
										
										
											2020-09-07 15:16:10 +02:00
+								}
 								/*
 								 * Play_dead handler when running under SEV-ES. This is needed because
 								 * the hypervisor can't deliver an SIPI request to restart the AP.
 								 * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the
 								 * hypervisor wakes it up again.
 								 */
 								static void sev_es_play_dead(void)
 								{
 									play_dead_common();
 									/* IRQs now disabled */
 									sev_es_ap_hlt_loop();
 									/*
 									 * If we get here, the VCPU was woken up again. Jump to CPU
 									 * startup code to get it back online.
 									 */
-												x86/smpboot: Rename start_cpu0() to soft_restart_cpu()

This is used in the SEV play_dead() implementation to re-online CPUs. But
that has nothing to do with CPU0.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Tested-by: Helge Deller <deller@gmx.de> # parisc
Tested-by: Guilherme G. Piccoli <gpiccoli@igalia.com> # Steam Deck
Link: https://lore.kernel.org/r/20230512205255.662319599@linutronix.de

											
										
										
											2023-05-12 23:07:03 +02:00
+									soft_restart_cpu();
-												x86/sev-es: Support CPU offline/online

Add a play_dead handler when running under SEV-ES. This is needed
because the hypervisor can't deliver an SIPI request to restart the AP.
Instead, the kernel has to issue a VMGEXIT to halt the VCPU until the
hypervisor wakes it up again.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-70-joro@8bytes.org

											
										
										
											2020-09-07 15:16:10 +02:00
+								}
 								#else  /* CONFIG_HOTPLUG_CPU */
 								#define sev_es_play_dead	native_play_dead
 								#endif /* CONFIG_HOTPLUG_CPU */
 								#ifdef CONFIG_SMP
 								static void __init sev_es_setup_play_dead(void)
 								{
 									smp_ops.play_dead = sev_es_play_dead;
 								}
 								#else
 								static inline void sev_es_setup_play_dead(void) { }
 								#endif
-												x86/sev-es: Setup per-CPU GHCBs for the runtime handler

The runtime handler needs one GHCB per-CPU. Set them up and map them
unencrypted.

 [ bp: Touchups and simplification. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-42-joro@8bytes.org

											
										
										
											2020-09-07 15:15:42 +02:00
+								static void __init alloc_runtime_data(int cpu)
 								{
 									struct sev_es_runtime_data *data;
-												x86/sev: Take NUMA node into account when allocating memory for per-CPU SEV data

per-CPU SEV data is dominantly accessed from their own local CPUs,
so allocate them node-local to improve performance.

Signed-off-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Nikunj A Dadhania <nikunj@amd.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Link: https://lore.kernel.org/r/20240412030130.49704-1-lirongqing@baidu.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

											
										
										
											2024-04-12 11:01:30 +08:00
+									data = memblock_alloc_node(sizeof(*data), PAGE_SIZE, cpu_to_node(cpu));
-												x86/sev-es: Setup per-CPU GHCBs for the runtime handler

The runtime handler needs one GHCB per-CPU. Set them up and map them
unencrypted.

 [ bp: Touchups and simplification. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-42-joro@8bytes.org

											
										
										
											2020-09-07 15:15:42 +02:00
+									if (!data)
 										panic("Can't allocate SEV-ES runtime data");
 									per_cpu(runtime_data, cpu) = data;
-												x86/sev: Use kernel provided SVSM Calling Areas

The SVSM Calling Area (CA) is used to communicate between Linux and the
SVSM. Since the firmware supplied CA for the BSP is likely to be in
reserved memory, switch off that CA to a kernel provided CA so that access
and use of the CA is available during boot. The CA switch is done using
the SVSM core protocol SVSM_CORE_REMAP_CA call.

An SVSM call is executed by filling out the SVSM CA and setting the proper
register state as documented by the SVSM protocol. The SVSM is invoked by
by requesting the hypervisor to run VMPL0.

Once it is safe to allocate/reserve memory, allocate a CA for each CPU.
After allocating the new CAs, the BSP will switch from the boot CA to the
per-CPU CA. The CA for an AP is identified to the SVSM when creating the
VMSA in preparation for booting the AP.

  [ bp: Heavily simplify svsm_issue_call() asm, other touchups. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/fa8021130bcc3bcf14d722a25548cb0cdf325456.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:46 -05:00
 									if (snp_vmpl) {
 										struct svsm_ca *caa;
 										/* Allocate the SVSM CA page if an SVSM is present */
-												mm/memblock: add memblock_alloc_or_panic interface

Before SLUB initialization, various subsystems used memblock_alloc to
allocate memory.  In most cases, when memory allocation fails, an
immediate panic is required.  To simplify this behavior and reduce
repetitive checks, introduce `memblock_alloc_or_panic`.  This function
ensures that memory allocation failures result in a panic automatically,
improving code readability and consistency across subsystems that require
this behavior.

[guoweikang.kernel@gmail.com: arch/s390: save_area_alloc default failure behavior changed to panic]
  Link: https://lkml.kernel.org/r/20250109033136.2845676-1-guoweikang.kernel@gmail.com
  Link: https://lore.kernel.org/lkml/Z2fknmnNtiZbCc7x@kernel.org/
Link: https://lkml.kernel.org/r/20250102072528.650926-1-guoweikang.kernel@gmail.com
Signed-off-by: Guo Weikang <guoweikang.kernel@gmail.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>	[m68k]
Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com>	[s390]
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

											
										
										
											2025-01-02 15:25:28 +08:00
+										caa = memblock_alloc_or_panic(sizeof(*caa), PAGE_SIZE);
-												x86/sev: Use kernel provided SVSM Calling Areas

The SVSM Calling Area (CA) is used to communicate between Linux and the
SVSM. Since the firmware supplied CA for the BSP is likely to be in
reserved memory, switch off that CA to a kernel provided CA so that access
and use of the CA is available during boot. The CA switch is done using
the SVSM core protocol SVSM_CORE_REMAP_CA call.

An SVSM call is executed by filling out the SVSM CA and setting the proper
register state as documented by the SVSM protocol. The SVSM is invoked by
by requesting the hypervisor to run VMPL0.

Once it is safe to allocate/reserve memory, allocate a CA for each CPU.
After allocating the new CAs, the BSP will switch from the boot CA to the
per-CPU CA. The CA for an AP is identified to the SVSM when creating the
VMSA in preparation for booting the AP.

  [ bp: Heavily simplify svsm_issue_call() asm, other touchups. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/fa8021130bcc3bcf14d722a25548cb0cdf325456.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:46 -05:00
 										per_cpu(svsm_caa, cpu) = caa;
 										per_cpu(svsm_caa_pa, cpu) = __pa(caa);
 									}
-												x86/sev-es: Setup per-CPU GHCBs for the runtime handler

The runtime handler needs one GHCB per-CPU. Set them up and map them
unencrypted.

 [ bp: Touchups and simplification. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-42-joro@8bytes.org

											
										
										
											2020-09-07 15:15:42 +02:00
+								}
 								static void __init init_ghcb(int cpu)
 								{
 									struct sev_es_runtime_data *data;
 									int err;
 									data = per_cpu(runtime_data, cpu);
 									err = early_set_memory_decrypted((unsigned long)&data->ghcb_page,
 													 sizeof(data->ghcb_page));
 									if (err)
 										panic("Can't map GHCBs unencrypted");
 									memset(&data->ghcb_page, 0, sizeof(data->ghcb_page));
-												x86/sev-es: Add a Runtime #VC Exception Handler

Add the handlers for #VC exceptions invoked at runtime.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-47-joro@8bytes.org

											
										
										
											2020-09-07 15:15:47 +02:00
 									data->ghcb_active = false;
 									data->backup_ghcb_active = false;
-												x86/sev-es: Setup per-CPU GHCBs for the runtime handler

The runtime handler needs one GHCB per-CPU. Set them up and map them
unencrypted.

 [ bp: Touchups and simplification. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-42-joro@8bytes.org

											
										
										
											2020-09-07 15:15:42 +02:00
+								}
 								void __init sev_es_init_vc_handling(void)
 								{
 									int cpu;
 									BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE);
-												x86/sev: Replace occurrences of sev_es_active() with cc_platform_has()

Replace uses of sev_es_active() with the more generic cc_platform_has()
using CC_ATTR_GUEST_STATE_ENCRYPT. If future support is added for other
memory encyrption techonologies, the use of CC_ATTR_GUEST_STATE_ENCRYPT
can be updated, as required.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20210928191009.32551-8-bp@alien8.de

											
										
										
											2021-09-08 17:58:38 -05:00
+									if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
-												x86/sev-es: Setup per-CPU GHCBs for the runtime handler

The runtime handler needs one GHCB per-CPU. Set them up and map them
unencrypted.

 [ bp: Touchups and simplification. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-42-joro@8bytes.org

											
										
										
											2020-09-07 15:15:42 +02:00
+										return;
-												x86/sev-es: Check required CPU features for SEV-ES

Make sure the machine supports RDRAND, otherwise there is no trusted
source of randomness in the system.

To also check this in the pre-decompression stage, make has_cpuflag()
not depend on CONFIG_RANDOMIZE_BASE anymore.

Signed-off-by: Martin Radev <martin.b.radev@gmail.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lkml.kernel.org/r/20200907131613.12703-73-joro@8bytes.org

											
										
										
											2020-09-07 15:16:13 +02:00
+									if (!sev_es_check_cpu_features())
 										panic("SEV-ES CPU Features missing");
-												x86/sev: Check SEV-SNP features support

Version 2 of the GHCB specification added the advertisement of features
that are supported by the hypervisor. If the hypervisor supports SEV-SNP
then it must set the SEV-SNP features bit to indicate that the base
functionality is supported.

Check that feature bit while establishing the GHCB; if failed, terminate
the guest.

Version 2 of the GHCB specification adds several new Non-Automatic Exits
(NAEs), most of them are optional except the hypervisor feature. Now
that the hypervisor feature NAE is implemented, bump the GHCB maximum
supported protocol version.

While at it, move the GHCB protocol negotiation check from the #VC
exception handler to sev_enable() so that all feature detection happens
before the first #VC exception.

While at it, document why the GHCB page cannot be setup from
load_stage2_idt().

  [ bp: Massage commit message. ]

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-13-brijesh.singh@amd.com

											
										
										
											2022-02-09 12:10:06 -06:00
+									/*
 									 * SNP is supported in v2 of the GHCB spec which mandates support for HV
 									 * features.
 									 */
 									if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) {
 										sev_hv_features = get_hv_features();
 										if (!(sev_hv_features & GHCB_HV_FT_SNP))
 											sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
 									}
-												x86/sev-es: Setup per-CPU GHCBs for the runtime handler

The runtime handler needs one GHCB per-CPU. Set them up and map them
unencrypted.

 [ bp: Touchups and simplification. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-42-joro@8bytes.org

											
										
										
											2020-09-07 15:15:42 +02:00
+									/* Initialize per-cpu GHCB pages */
 									for_each_possible_cpu(cpu) {
 										alloc_runtime_data(cpu);
 										init_ghcb(cpu);
 									}
-												x86/sev-es: Add a Runtime #VC Exception Handler

Add the handlers for #VC exceptions invoked at runtime.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-47-joro@8bytes.org

											
										
										
											2020-09-07 15:15:47 +02:00
-												x86/sev: Use kernel provided SVSM Calling Areas

The SVSM Calling Area (CA) is used to communicate between Linux and the
SVSM. Since the firmware supplied CA for the BSP is likely to be in
reserved memory, switch off that CA to a kernel provided CA so that access
and use of the CA is available during boot. The CA switch is done using
the SVSM core protocol SVSM_CORE_REMAP_CA call.

An SVSM call is executed by filling out the SVSM CA and setting the proper
register state as documented by the SVSM protocol. The SVSM is invoked by
by requesting the hypervisor to run VMPL0.

Once it is safe to allocate/reserve memory, allocate a CA for each CPU.
After allocating the new CAs, the BSP will switch from the boot CA to the
per-CPU CA. The CA for an AP is identified to the SVSM when creating the
VMSA in preparation for booting the AP.

  [ bp: Heavily simplify svsm_issue_call() asm, other touchups. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/fa8021130bcc3bcf14d722a25548cb0cdf325456.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:46 -05:00
+									/* If running under an SVSM, switch to the per-cpu CA */
 									if (snp_vmpl) {
 										struct svsm_call call = {};
 										unsigned long flags;
 										int ret;
 										local_irq_save(flags);
 										/*
 										 * SVSM_CORE_REMAP_CA call:
 										 *   RAX = 0 (Protocol=0, CallID=0)
 										 *   RCX = New CA GPA
 										 */
 										call.caa = svsm_get_caa();
 										call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA);
 										call.rcx = this_cpu_read(svsm_caa_pa);
 										ret = svsm_perform_call_protocol(&call);
 										if (ret)
 											panic("Can't remap the SVSM CA, ret=%d, rax_out=0x%llx\n",
 											      ret, call.rax_out);
 										sev_cfg.use_cas = true;
 										local_irq_restore(flags);
 									}
-												x86/sev-es: Support CPU offline/online

Add a play_dead handler when running under SEV-ES. This is needed
because the hypervisor can't deliver an SIPI request to restart the AP.
Instead, the kernel has to issue a VMGEXIT to halt the VCPU until the
hypervisor wakes it up again.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-70-joro@8bytes.org

											
										
										
											2020-09-07 15:16:10 +02:00
+									sev_es_setup_play_dead();
-												x86/sev-es: Add a Runtime #VC Exception Handler

Add the handlers for #VC exceptions invoked at runtime.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-47-joro@8bytes.org

											
										
										
											2020-09-07 15:15:47 +02:00
+									/* Secondary CPUs use the runtime #VC handler */
-												x86/sev: Split up runtime #VC handler for correct state tracking

Split up the #VC handler code into a from-user and a from-kernel part.
This allows clean and correct state tracking, as the #VC handler needs
to enter NMI-state when raised from kernel mode and plain IRQ state when
raised from user-mode.

Fixes: 62441a1fb532 ("x86/sev-es: Correctly track IRQ states in runtime #VC handler")
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210618115409.22735-3-joro@8bytes.org

											
										
										
											2021-06-18 13:54:09 +02:00
+									initial_vc_handler = (unsigned long)kernel_exc_vmm_communication;
-												x86/sev-es: Setup per-CPU GHCBs for the runtime handler

The runtime handler needs one GHCB per-CPU. Set them up and map them
unencrypted.

 [ bp: Touchups and simplification. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907131613.12703-42-joro@8bytes.org

											
										
										
											2020-09-07 15:15:42 +02:00
+								}
-												x86/sev: Skip ROM range scans and validation for SEV-SNP guests

SEV-SNP requires encrypted memory to be validated before access.
Because the ROM memory range is not part of the e820 table, it is not
pre-validated by the BIOS. Therefore, if a SEV-SNP guest kernel wishes
to access this range, the guest must first validate the range.

The current SEV-SNP code does indeed scan the ROM range during early
boot and thus attempts to validate the ROM range in probe_roms().
However, this behavior is neither sufficient nor necessary for the
following reasons:

* With regards to sufficiency, if EFI_CONFIG_TABLES are not enabled and
  CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK is set, the kernel will
  attempt to access the memory at SMBIOS_ENTRY_POINT_SCAN_START (which
  falls in the ROM range) prior to validation.

  For example, Project Oak Stage 0 provides a minimal guest firmware
  that currently meets these configuration conditions, meaning guests
  booting atop Oak Stage 0 firmware encounter a problematic call chain
  during dmi_setup() -> dmi_scan_machine() that results in a crash
  during boot if SEV-SNP is enabled.

* With regards to necessity, SEV-SNP guests generally read garbage
  (which changes across boots) from the ROM range, meaning these scans
  are unnecessary. The guest reads garbage because the legacy ROM range
  is unencrypted data but is accessed via an encrypted PMD during early
  boot (where the PMD is marked as encrypted due to potentially mapping
  actually-encrypted data in other PMD-contained ranges).

In one exceptional case, EISA probing treats the ROM range as
unencrypted data, which is inconsistent with other probing.

Continuing to allow SEV-SNP guests to use garbage and to inconsistently
classify ROM range encryption status can trigger undesirable behavior.
For instance, if garbage bytes appear to be a valid signature, memory
may be unnecessarily reserved for the ROM range. Future code or other
use cases may result in more problematic (arbitrary) behavior that
should be avoided.

While one solution would be to overhaul the early PMD mapping to always
treat the ROM region of the PMD as unencrypted, SEV-SNP guests do not
currently rely on data from the ROM region during early boot (and even
if they did, they would be mostly relying on garbage data anyways).

As a simpler solution, skip the ROM range scans (and the otherwise-
necessary range validation) during SEV-SNP guest early boot. The
potential SEV-SNP guest crash due to lack of ROM range validation is
thus avoided by simply not accessing the ROM range.

In most cases, skip the scans by overriding problematic x86_init
functions during sme_early_init() to SNP-safe variants, which can be
likened to x86_init overrides done for other platforms (ex: Xen); such
overrides also avoid the spread of cc_platform_has() checks throughout
the tree.

In the exceptional EISA case, still use cc_platform_has() for the
simplest change, given (1) checks for guest type (ex: Xen domain status)
are already performed here, and (2) these checks occur in a subsys
initcall instead of an x86_init function.

  [ bp: Massage commit message, remove "we"s. ]

Fixes: 9704c07bf9f7 ("x86/kernel: Validate ROM memory before accessing when SEV-SNP is active")
Signed-off-by: Kevin Loughlin <kevinloughlin@google.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Cc: <stable@kernel.org>
Link: https://lore.kernel.org/r/20240313121546.2964854-1-kevinloughlin@google.com

											
										
										
											2024-03-13 12:15:46 +00:00
+								/*
 								 * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are
 								 * enabled, as the alternative (fallback) logic for DMI probing in the legacy
 								 * ROM region can cause a crash since this region is not pre-validated.
 								 */
 								void __init snp_dmi_setup(void)
 								{
 									if (efi_enabled(EFI_CONFIG_TABLES))
 										dmi_setup();
 								}
-												x86/sev: Add a sev= cmdline option

For debugging purposes it is very useful to have a way to see the full
contents of the SNP CPUID table provided to a guest. Add an sev=debug
kernel command-line option to do so.

Also introduce some infrastructure so that additional options can be
specified via sev=option1[,option2] over time in a consistent manner.

  [ bp: Massage, simplify string parsing. ]

Suggested-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-41-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:50 -06:00
+								static void dump_cpuid_table(void)
 								{
 									const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
 									int i = 0;
 									pr_info("count=%d reserved=0x%x reserved2=0x%llx\n",
 										cpuid_table->count, cpuid_table->__reserved1, cpuid_table->__reserved2);
 									for (i = 0; i < SNP_CPUID_COUNT_MAX; i++) {
 										const struct snp_cpuid_fn *fn = &cpuid_table->fn[i];
 										pr_info("index=%3d fn=0x%08x subfn=0x%08x: eax=0x%08x ebx=0x%08x ecx=0x%08x edx=0x%08x xcr0_in=0x%016llx xss_in=0x%016llx reserved=0x%016llx\n",
 											i, fn->eax_in, fn->ecx_in, fn->eax, fn->ebx, fn->ecx,
 											fn->edx, fn->xcr0_in, fn->xss_in, fn->__reserved);
 									}
 								}
-												x86/sev: Use firmware-validated CPUID for SEV-SNP guests

SEV-SNP guests will be provided the location of special 'secrets' and
'CPUID' pages via the Confidential Computing blob. This blob is
provided to the run-time kernel either through a boot_params field that
was initialized by the boot/compressed kernel, or via a setup_data
structure as defined by the Linux Boot Protocol.

Locate the Confidential Computing blob from these sources and, if found,
use the provided CPUID page/table address to create a copy that the
run-time kernel will use when servicing CPUID instructions via a #VC
handler.

Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-40-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:49 -06:00
+								/*
 								 * It is useful from an auditing/testing perspective to provide an easy way
 								 * for the guest owner to know that the CPUID table has been initialized as
 								 * expected, but that initialization happens too early in boot to print any
 								 * sort of indicator, and there's not really any other good place to do it,
 								 * so do it here.
-												x86/sev: Allow non-VMPL0 execution when an SVSM is present

To allow execution at a level other than VMPL0, an SVSM must be present.
Allow the SEV-SNP guest to continue booting if an SVSM is detected and
the hypervisor supports the SVSM feature as indicated in the GHCB
hypervisor features bitmap.

  [ bp: Massage a bit. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/2ce7cf281cce1d0cba88f3f576687ef75dc3c953.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:56 -05:00
+								 *
 								 * If running as an SNP guest, report the current VM privilege level (VMPL).
-												x86/sev: Use firmware-validated CPUID for SEV-SNP guests

SEV-SNP guests will be provided the location of special 'secrets' and
'CPUID' pages via the Confidential Computing blob. This blob is
provided to the run-time kernel either through a boot_params field that
was initialized by the boot/compressed kernel, or via a setup_data
structure as defined by the Linux Boot Protocol.

Locate the Confidential Computing blob from these sources and, if found,
use the provided CPUID page/table address to create a copy that the
run-time kernel will use when servicing CPUID instructions via a #VC
handler.

Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-40-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:49 -06:00
+								 */
-												x86/sev: Allow non-VMPL0 execution when an SVSM is present

To allow execution at a level other than VMPL0, an SVSM must be present.
Allow the SEV-SNP guest to continue booting if an SVSM is detected and
the hypervisor supports the SVSM feature as indicated in the GHCB
hypervisor features bitmap.

  [ bp: Massage a bit. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/2ce7cf281cce1d0cba88f3f576687ef75dc3c953.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:56 -05:00
+								static int __init report_snp_info(void)
-												x86/sev: Use firmware-validated CPUID for SEV-SNP guests

SEV-SNP guests will be provided the location of special 'secrets' and
'CPUID' pages via the Confidential Computing blob. This blob is
provided to the run-time kernel either through a boot_params field that
was initialized by the boot/compressed kernel, or via a setup_data
structure as defined by the Linux Boot Protocol.

Locate the Confidential Computing blob from these sources and, if found,
use the provided CPUID page/table address to create a copy that the
run-time kernel will use when servicing CPUID instructions via a #VC
handler.

Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-40-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:49 -06:00
+								{
 									const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
-												x86/sev: Allow non-VMPL0 execution when an SVSM is present

To allow execution at a level other than VMPL0, an SVSM must be present.
Allow the SEV-SNP guest to continue booting if an SVSM is detected and
the hypervisor supports the SVSM feature as indicated in the GHCB
hypervisor features bitmap.

  [ bp: Massage a bit. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/2ce7cf281cce1d0cba88f3f576687ef75dc3c953.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:56 -05:00
+									if (cpuid_table->count) {
 										pr_info("Using SNP CPUID table, %d entries present.\n",
 											cpuid_table->count);
-												x86/sev: Use firmware-validated CPUID for SEV-SNP guests

SEV-SNP guests will be provided the location of special 'secrets' and
'CPUID' pages via the Confidential Computing blob. This blob is
provided to the run-time kernel either through a boot_params field that
was initialized by the boot/compressed kernel, or via a setup_data
structure as defined by the Linux Boot Protocol.

Locate the Confidential Computing blob from these sources and, if found,
use the provided CPUID page/table address to create a copy that the
run-time kernel will use when servicing CPUID instructions via a #VC
handler.

Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-40-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:49 -06:00
-												x86/sev: Allow non-VMPL0 execution when an SVSM is present

To allow execution at a level other than VMPL0, an SVSM must be present.
Allow the SEV-SNP guest to continue booting if an SVSM is detected and
the hypervisor supports the SVSM feature as indicated in the GHCB
hypervisor features bitmap.

  [ bp: Massage a bit. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/2ce7cf281cce1d0cba88f3f576687ef75dc3c953.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:56 -05:00
+										if (sev_cfg.debug)
 											dump_cpuid_table();
 									}
-												x86/sev: Use firmware-validated CPUID for SEV-SNP guests

SEV-SNP guests will be provided the location of special 'secrets' and
'CPUID' pages via the Confidential Computing blob. This blob is
provided to the run-time kernel either through a boot_params field that
was initialized by the boot/compressed kernel, or via a setup_data
structure as defined by the Linux Boot Protocol.

Locate the Confidential Computing blob from these sources and, if found,
use the provided CPUID page/table address to create a copy that the
run-time kernel will use when servicing CPUID instructions via a #VC
handler.

Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-40-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:49 -06:00
-												x86/sev: Allow non-VMPL0 execution when an SVSM is present

To allow execution at a level other than VMPL0, an SVSM must be present.
Allow the SEV-SNP guest to continue booting if an SVSM is detected and
the hypervisor supports the SVSM feature as indicated in the GHCB
hypervisor features bitmap.

  [ bp: Massage a bit. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/2ce7cf281cce1d0cba88f3f576687ef75dc3c953.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:56 -05:00
+									if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
 										pr_info("SNP running at VMPL%u.\n", snp_vmpl);
-												x86/sev: Add a sev= cmdline option

For debugging purposes it is very useful to have a way to see the full
contents of the SNP CPUID table provided to a guest. Add an sev=debug
kernel command-line option to do so.

Also introduce some infrastructure so that additional options can be
specified via sev=option1[,option2] over time in a consistent manner.

  [ bp: Massage, simplify string parsing. ]

Suggested-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-41-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:50 -06:00
-												x86/sev: Use firmware-validated CPUID for SEV-SNP guests

SEV-SNP guests will be provided the location of special 'secrets' and
'CPUID' pages via the Confidential Computing blob. This blob is
provided to the run-time kernel either through a boot_params field that
was initialized by the boot/compressed kernel, or via a setup_data
structure as defined by the Linux Boot Protocol.

Locate the Confidential Computing blob from these sources and, if found,
use the provided CPUID page/table address to create a copy that the
run-time kernel will use when servicing CPUID instructions via a #VC
handler.

Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-40-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:49 -06:00
+									return 0;
 								}
-												x86/sev: Allow non-VMPL0 execution when an SVSM is present

To allow execution at a level other than VMPL0, an SVSM must be present.
Allow the SEV-SNP guest to continue booting if an SVSM is detected and
the hypervisor supports the SVSM feature as indicated in the GHCB
hypervisor features bitmap.

  [ bp: Massage a bit. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/2ce7cf281cce1d0cba88f3f576687ef75dc3c953.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:56 -05:00
+								arch_initcall(report_snp_info);
-												x86/sev: Add a sev= cmdline option

For debugging purposes it is very useful to have a way to see the full
contents of the SNP CPUID table provided to a guest. Add an sev=debug
kernel command-line option to do so.

Also introduce some infrastructure so that additional options can be
specified via sev=option1[,option2] over time in a consistent manner.

  [ bp: Massage, simplify string parsing. ]

Suggested-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-41-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:50 -06:00
-												x86/sev: Extend the config-fs attestation support for an SVSM

When an SVSM is present, the guest can also request attestation reports
from it. These SVSM attestation reports can be used to attest the SVSM
and any services running within the SVSM.

Extend the config-fs attestation support to provide such. This involves
creating four new config-fs attributes:

  - 'service-provider' (input)
    This attribute is used to determine whether the attestation request
    should be sent to the specified service provider or to the SEV
    firmware. The SVSM service provider is represented by the value
    'svsm'.

  - 'service_guid' (input)
    Used for requesting the attestation of a single service within the
    service provider. A null GUID implies that the SVSM_ATTEST_SERVICES
    call should be used to request the attestation report. A non-null
    GUID implies that the SVSM_ATTEST_SINGLE_SERVICE call should be used.

  - 'service_manifest_version' (input)
    Used with the SVSM_ATTEST_SINGLE_SERVICE call, the service version
    represents a specific service manifest version be used for the
    attestation report.

  - 'manifestblob' (output)
    Used to return the service manifest associated with the attestation
    report.

Only display these new attributes when running under an SVSM.

  [ bp: Massage.
   - s/svsm_attestation_call/svsm_attest_call/g ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/965015dce3c76bb8724839d50c5dea4e4b5d598f.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:55 -05:00
+								static void update_attest_input(struct svsm_call *call, struct svsm_attest_call *input)
 								{
 									/* If (new) lengths have been returned, propagate them up */
 									if (call->rcx_out != call->rcx)
 										input->manifest_buf.len = call->rcx_out;
 									if (call->rdx_out != call->rdx)
 										input->certificates_buf.len = call->rdx_out;
 									if (call->r8_out != call->r8)
 										input->report_buf.len = call->r8_out;
 								}
 								int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call,
 											      struct svsm_attest_call *input)
 								{
 									struct svsm_attest_call *ac;
 									unsigned long flags;
 									u64 attest_call_pa;
 									int ret;
 									if (!snp_vmpl)
 										return -EINVAL;
 									local_irq_save(flags);
 									call->caa = svsm_get_caa();
 									ac = (struct svsm_attest_call *)call->caa->svsm_buffer;
 									attest_call_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer);
 									*ac = *input;
 									/*
 									 * Set input registers for the request and set RDX and R8 to known
 									 * values in order to detect length values being returned in them.
 									 */
 									call->rax = call_id;
 									call->rcx = attest_call_pa;
 									call->rdx = -1;
 									call->r8 = -1;
 									ret = svsm_perform_call_protocol(call);
 									update_attest_input(call, input);
 									local_irq_restore(flags);
 									return ret;
 								}
 								EXPORT_SYMBOL_GPL(snp_issue_svsm_attest_req);
-												x86/sev: Drop unnecessary parameter in snp_issue_guest_request()

Commit

  3e385c0d6ce8 ("virt: sev-guest: Move SNP Guest Request data pages handling under snp_cmd_mutex")

moved @input from snp_msg_desc to snp_guest_req which is passed to
snp_issue_guest_request().

Drop the extra parameter.

No functional change intended.

Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Dionna Glaze <dionnaglaze@google.com>
Link: https://lore.kernel.org/20250611040842.2667262-5-aik@amd.com

											
										
										
											2025-06-11 14:08:42 +10:00
+								static int snp_issue_guest_request(struct snp_guest_req *req)
-												x86/sev: Provide support for SNP guest request NAEs

Version 2 of GHCB specification provides SNP_GUEST_REQUEST and
SNP_EXT_GUEST_REQUEST NAE that can be used by the SNP guest to
communicate with the PSP.

While at it, add a snp_issue_guest_request() helper that will be used by
driver or other subsystem to issue the request to PSP.

See SEV-SNP firmware and GHCB spec for more details.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-42-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:51 -06:00
+								{
-												x86/sev: Drop unnecessary parameter in snp_issue_guest_request()

Commit

  3e385c0d6ce8 ("virt: sev-guest: Move SNP Guest Request data pages handling under snp_cmd_mutex")

moved @input from snp_msg_desc to snp_guest_req which is passed to
snp_issue_guest_request().

Drop the extra parameter.

No functional change intended.

Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Dionna Glaze <dionnaglaze@google.com>
Link: https://lore.kernel.org/20250611040842.2667262-5-aik@amd.com

											
										
										
											2025-06-11 14:08:42 +10:00
+									struct snp_req_data *input = &req->input;
-												x86/sev: Provide support for SNP guest request NAEs

Version 2 of GHCB specification provides SNP_GUEST_REQUEST and
SNP_EXT_GUEST_REQUEST NAE that can be used by the SNP guest to
communicate with the PSP.

While at it, add a snp_issue_guest_request() helper that will be used by
driver or other subsystem to issue the request to PSP.

See SEV-SNP firmware and GHCB spec for more details.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-42-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:51 -06:00
+									struct ghcb_state state;
 									struct es_em_ctxt ctxt;
 									unsigned long flags;
 									struct ghcb *ghcb;
 									int ret;
-												virt: sev-guest: Contain snp_guest_request_ioctl in sev-guest

SNP Guest Request uses only exitinfo2 which is a return value from GHCB, has
meaning beyond ioctl and therefore belongs to struct snp_guest_req.

Move exitinfo2 there and remove snp_guest_request_ioctl from the SEV platform
code.

No functional change intended.

Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Dionna Glaze <dionnaglaze@google.com>
Link: https://lore.kernel.org/20250611040842.2667262-2-aik@amd.com

											
										
										
											2025-06-11 14:08:39 +10:00
+									req->exitinfo2 = SEV_RET_NO_FW_CALL;
-												x86/sev: Provide support for SNP guest request NAEs

Version 2 of GHCB specification provides SNP_GUEST_REQUEST and
SNP_EXT_GUEST_REQUEST NAE that can be used by the SNP guest to
communicate with the PSP.

While at it, add a snp_issue_guest_request() helper that will be used by
driver or other subsystem to issue the request to PSP.

See SEV-SNP firmware and GHCB spec for more details.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-42-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:51 -06:00
 									/*
 									 * __sev_get_ghcb() needs to run with IRQs disabled because it is using
 									 * a per-CPU GHCB.
 									 */
 									local_irq_save(flags);
 									ghcb = __sev_get_ghcb(&state);
 									if (!ghcb) {
 										ret = -EIO;
 										goto e_restore_irq;
 									}
 									vc_ghcb_invalidate(ghcb);
-												virt: sev-guest: Consolidate SNP guest messaging parameters to a struct

Add a snp_guest_req structure to eliminate the need to pass a long list of
parameters. This structure will be used to call the SNP Guest message
request API, simplifying the function arguments.

Update the snp_issue_guest_request() prototype to include the new guest
request structure.

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20241009092850.197575-5-nikunj@amd.com

											
										
										
											2024-10-09 14:58:35 +05:30
+									if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
-												x86/sev: Provide support for SNP guest request NAEs

Version 2 of GHCB specification provides SNP_GUEST_REQUEST and
SNP_EXT_GUEST_REQUEST NAE that can be used by the SNP guest to
communicate with the PSP.

While at it, add a snp_issue_guest_request() helper that will be used by
driver or other subsystem to issue the request to PSP.

See SEV-SNP firmware and GHCB spec for more details.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-42-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:51 -06:00
+										ghcb_set_rax(ghcb, input->data_gpa);
 										ghcb_set_rbx(ghcb, input->data_npages);
 									}
-												virt: sev-guest: Consolidate SNP guest messaging parameters to a struct

Add a snp_guest_req structure to eliminate the need to pass a long list of
parameters. This structure will be used to call the SNP Guest message
request API, simplifying the function arguments.

Update the snp_issue_guest_request() prototype to include the new guest
request structure.

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20241009092850.197575-5-nikunj@amd.com

											
										
										
											2024-10-09 14:58:35 +05:30
+									ret = sev_es_ghcb_hv_call(ghcb, &ctxt, req->exit_code, input->req_gpa, input->resp_gpa);
-												x86/sev: Provide support for SNP guest request NAEs

Version 2 of GHCB specification provides SNP_GUEST_REQUEST and
SNP_EXT_GUEST_REQUEST NAE that can be used by the SNP guest to
communicate with the PSP.

While at it, add a snp_issue_guest_request() helper that will be used by
driver or other subsystem to issue the request to PSP.

See SEV-SNP firmware and GHCB spec for more details.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-42-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:51 -06:00
+									if (ret)
 										goto e_put;
-												virt: sev-guest: Contain snp_guest_request_ioctl in sev-guest

SNP Guest Request uses only exitinfo2 which is a return value from GHCB, has
meaning beyond ioctl and therefore belongs to struct snp_guest_req.

Move exitinfo2 there and remove snp_guest_request_ioctl from the SEV platform
code.

No functional change intended.

Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Dionna Glaze <dionnaglaze@google.com>
Link: https://lore.kernel.org/20250611040842.2667262-2-aik@amd.com

											
										
										
											2025-06-11 14:08:39 +10:00
+									req->exitinfo2 = ghcb->save.sw_exit_info_2;
 									switch (req->exitinfo2) {
-												virt/coco/sev-guest: Convert the sw_exit_info_2 checking to a switch-case

snp_issue_guest_request() checks the value returned by the hypervisor in
sw_exit_info_2 and returns a different error depending on it.

Convert those checks into a switch-case to make it more readable when
more error values are going to be checked in the future.

No functional changes.

Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20230307192449.24732-8-bp@alien8.de

											
										
										
											2023-02-16 10:50:11 +01:00
+									case 0:
 										break;
-												x86/sev: Change snp_guest_issue_request()'s fw_err argument

The GHCB specification declares that the firmware error value for
a guest request will be stored in the lower 32 bits of EXIT_INFO_2.  The
upper 32 bits are for the VMM's own error code. The fw_err argument to
snp_guest_issue_request() is thus a misnomer, and callers will need
access to all 64 bits.

The type of unsigned long also causes problems, since sw_exit_info2 is
u64 (unsigned long long) vs the argument's unsigned long*. Change this
type for issuing the guest request. Pass the ioctl command struct's error
field directly instead of in a local variable, since an incomplete guest
request may not set the error code, and uninitialized stack memory would
be written back to user space.

The firmware might not even be called, so bookend the call with the no
firmware call error and clear the error.

Since the "fw_err" field is really exitinfo2 split into the upper bits'
vmm error code and lower bits' firmware error code, convert the 64 bit
value to a union.

  [ bp:
   - Massage commit message
   - adjust code
   - Fix a build issue as
   Reported-by: kernel test robot <lkp@intel.com>
   Link: https://lore.kernel.org/oe-kbuild-all/202303070609.vX6wp2Af-lkp@intel.com
   - print exitinfo2 in hex
   Tom:
    - Correct -EIO exit case. ]

Signed-off-by: Dionna Glaze <dionnaglaze@google.com>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230214164638.1189804-5-dionnaglaze@google.com
Link: https://lore.kernel.org/r/20230307192449.24732-12-bp@alien8.de

											
										
										
											2023-03-07 20:24:49 +01:00
+									case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_BUSY):
-												virt/coco/sev-guest: Add throttling awareness

A potentially malicious SEV guest can constantly hammer the hypervisor
using this driver to send down requests and thus prevent or at least
considerably hinder other guests from issuing requests to the secure
processor which is a shared platform resource.

Therefore, the host is permitted and encouraged to throttle such guest
requests.

Add the capability to handle the case when the hypervisor throttles
excessive numbers of requests issued by the guest. Otherwise, the VM
platform communication key will be disabled, preventing the guest from
attesting itself.

Realistically speaking, a well-behaved guest should not even care about
throttling. During its lifetime, it would end up issuing a handful of
requests which the hardware can easily handle.

This is more to address the case of a malicious guest. Such guest should
get throttled and if its VMPCK gets disabled, then that's its own
wrongdoing and perhaps that guest even deserves it.

To the implementation: the hypervisor signals with SNP_GUEST_REQ_ERR_BUSY
that the guest requests should be throttled. That error code is returned
in the upper 32-bit half of exitinfo2 and this is part of the GHCB spec
v2.

So the guest is given a throttling period of 1 minute in which it
retries the request every 2 seconds. This is a good default but if it
turns out to not pan out in practice, it can be tweaked later.

For safety, since the encryption algorithm in GHCBv2 is AES_GCM, control
must remain in the kernel to complete the request with the current
sequence number. Returning without finishing the request allows the
guest to make another request but with different message contents. This
is IV reuse, and breaks cryptographic protections.

  [ bp:
    - Rewrite commit message and do a simplified version.
    - The stable tags are supposed to denote that a cleanup should go
      upfront before backporting this so that any future fixes to this
      can preserve the sanity of the backporter(s). ]

Fixes: d5af44dde546 ("x86/sev: Provide support for SNP guest request NAEs")
Signed-off-by: Dionna Glaze <dionnaglaze@google.com>
Co-developed-by: Borislav Petkov (AMD) <bp@alien8.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Cc: <stable@kernel.org> # d6fd48eff750 ("virt/coco/sev-guest: Check SEV_SNP attribute at probe time")
Cc: <stable@kernel.org> # 970ab823743f (" virt/coco/sev-guest: Simplify extended guest request handling")
Cc: <stable@kernel.org> # c5a338274bdb ("virt/coco/sev-guest: Remove the disable_vmpck label in handle_guest_request()")
Cc: <stable@kernel.org> # 0fdb6cc7c89c ("virt/coco/sev-guest: Carve out the request issuing logic into a helper")
Cc: <stable@kernel.org> # d25bae7dc7b0 ("virt/coco/sev-guest: Do some code style cleanups")
Cc: <stable@kernel.org> # fa4ae42cc60a ("virt/coco/sev-guest: Convert the sw_exit_info_2 checking to a switch-case")
Link: https://lore.kernel.org/r/20230214164638.1189804-2-dionnaglaze@google.com

											
										
										
											2023-02-16 11:08:02 +01:00
+										ret = -EAGAIN;
 										break;
-												x86/sev: Change snp_guest_issue_request()'s fw_err argument

The GHCB specification declares that the firmware error value for
a guest request will be stored in the lower 32 bits of EXIT_INFO_2.  The
upper 32 bits are for the VMM's own error code. The fw_err argument to
snp_guest_issue_request() is thus a misnomer, and callers will need
access to all 64 bits.

The type of unsigned long also causes problems, since sw_exit_info2 is
u64 (unsigned long long) vs the argument's unsigned long*. Change this
type for issuing the guest request. Pass the ioctl command struct's error
field directly instead of in a local variable, since an incomplete guest
request may not set the error code, and uninitialized stack memory would
be written back to user space.

The firmware might not even be called, so bookend the call with the no
firmware call error and clear the error.

Since the "fw_err" field is really exitinfo2 split into the upper bits'
vmm error code and lower bits' firmware error code, convert the 64 bit
value to a union.

  [ bp:
   - Massage commit message
   - adjust code
   - Fix a build issue as
   Reported-by: kernel test robot <lkp@intel.com>
   Link: https://lore.kernel.org/oe-kbuild-all/202303070609.vX6wp2Af-lkp@intel.com
   - print exitinfo2 in hex
   Tom:
    - Correct -EIO exit case. ]

Signed-off-by: Dionna Glaze <dionnaglaze@google.com>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230214164638.1189804-5-dionnaglaze@google.com
Link: https://lore.kernel.org/r/20230307192449.24732-12-bp@alien8.de

											
										
										
											2023-03-07 20:24:49 +01:00
+									case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN):
-												x86/sev: Provide support for SNP guest request NAEs

Version 2 of GHCB specification provides SNP_GUEST_REQUEST and
SNP_EXT_GUEST_REQUEST NAE that can be used by the SNP guest to
communicate with the PSP.

While at it, add a snp_issue_guest_request() helper that will be used by
driver or other subsystem to issue the request to PSP.

See SEV-SNP firmware and GHCB spec for more details.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-42-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:51 -06:00
+										/* Number of expected pages are returned in RBX */
-												virt: sev-guest: Consolidate SNP guest messaging parameters to a struct

Add a snp_guest_req structure to eliminate the need to pass a long list of
parameters. This structure will be used to call the SNP Guest message
request API, simplifying the function arguments.

Update the snp_issue_guest_request() prototype to include the new guest
request structure.

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20241009092850.197575-5-nikunj@amd.com

											
										
										
											2024-10-09 14:58:35 +05:30
+										if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
-												x86/sev: Provide support for SNP guest request NAEs

Version 2 of GHCB specification provides SNP_GUEST_REQUEST and
SNP_EXT_GUEST_REQUEST NAE that can be used by the SNP guest to
communicate with the PSP.

While at it, add a snp_issue_guest_request() helper that will be used by
driver or other subsystem to issue the request to PSP.

See SEV-SNP firmware and GHCB spec for more details.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-42-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:51 -06:00
+											input->data_npages = ghcb_get_rbx(ghcb);
-												virt/coco/sev-guest: Simplify extended guest request handling

Return a specific error code - -ENOSPC - to signal the too small cert
data buffer instead of checking exit code and exitinfo2.

While at it, hoist the *fw_err assignment in snp_issue_guest_request()
so that a proper error value is returned to the callers.

  [ Tom: check override_err instead of err. ]

Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230307192449.24732-4-bp@alien8.de

											
										
										
											2023-02-15 11:39:41 +01:00
+											ret = -ENOSPC;
-												virt/coco/sev-guest: Convert the sw_exit_info_2 checking to a switch-case

snp_issue_guest_request() checks the value returned by the hypervisor in
sw_exit_info_2 and returns a different error depending on it.

Convert those checks into a switch-case to make it more readable when
more error values are going to be checked in the future.

No functional changes.

Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20230307192449.24732-8-bp@alien8.de

											
										
										
											2023-02-16 10:50:11 +01:00
+											break;
-												virt/coco/sev-guest: Simplify extended guest request handling

Return a specific error code - -ENOSPC - to signal the too small cert
data buffer instead of checking exit code and exitinfo2.

While at it, hoist the *fw_err assignment in snp_issue_guest_request()
so that a proper error value is returned to the callers.

  [ Tom: check override_err instead of err. ]

Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230307192449.24732-4-bp@alien8.de

											
										
										
											2023-02-15 11:39:41 +01:00
+										}
-												virt/coco/sev-guest: Convert the sw_exit_info_2 checking to a switch-case

snp_issue_guest_request() checks the value returned by the hypervisor in
sw_exit_info_2 and returns a different error depending on it.

Convert those checks into a switch-case to make it more readable when
more error values are going to be checked in the future.

No functional changes.

Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20230307192449.24732-8-bp@alien8.de

											
										
										
											2023-02-16 10:50:11 +01:00
+										fallthrough;
 									default:
 										ret = -EIO;
 										break;
-												x86/sev: Provide support for SNP guest request NAEs

Version 2 of GHCB specification provides SNP_GUEST_REQUEST and
SNP_EXT_GUEST_REQUEST NAE that can be used by the SNP guest to
communicate with the PSP.

While at it, add a snp_issue_guest_request() helper that will be used by
driver or other subsystem to issue the request to PSP.

See SEV-SNP firmware and GHCB spec for more details.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-42-brijesh.singh@amd.com

											
										
										
											2022-03-07 15:33:51 -06:00
+									}
 								e_put:
 									__sev_put_ghcb(&state);
 								e_restore_irq:
 									local_irq_restore(flags);
 									return ret;
 								}
-												x86/sev: Register SEV-SNP guest request platform device

Version 2 of the GHCB specification provides a Non Automatic Exit (NAE)
event type that can be used by the SEV-SNP guest to communicate with the
PSP without risk from a malicious hypervisor who wishes to read, alter,
drop or replay the messages sent.

SNP_LAUNCH_UPDATE can insert two special pages into the guest’s memory:
the secrets page and the CPUID page. The PSP firmware populates the
contents of the secrets page. The secrets page contains encryption keys
used by the guest to interact with the firmware. Because the secrets
page is encrypted with the guest’s memory encryption key, the hypervisor
cannot read the keys. See SEV-SNP firmware spec for further details on
the secrets page format.

Create a platform device that the SEV-SNP guest driver can bind to get
the platform resources such as encryption key and message id to use to
communicate with the PSP. The SEV-SNP guest driver provides a userspace
interface to get the attestation report, key derivation, extended
attestation report etc.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-43-brijesh.singh@amd.com

											
										
										
											2022-02-24 10:56:21 -06:00
-												x86/sev: Add SVSM vTPM probe/send_command functions

Add two new functions to probe and send commands to the SVSM vTPM. They
leverage the two calls defined by the AMD SVSM specification [1] for the vTPM
protocol: SVSM_VTPM_QUERY and SVSM_VTPM_CMD.

Expose snp_svsm_vtpm_send_command() to be used by a TPM driver.

  [1] "Secure VM Service Module for SEV-SNP Guests"
      Publication # 58019 Revision: 1.00

  [ bp: Some doc touchups. ]

Co-developed-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Co-developed-by: Claudio Carvalho <cclaudio@linux.ibm.com>
Signed-off-by: Claudio Carvalho <cclaudio@linux.ibm.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Link: https://lore.kernel.org/r/20250403100943.120738-2-sgarzare@redhat.com

											
										
										
											2025-04-03 12:09:39 +02:00
+								/**
 								 * snp_svsm_vtpm_probe() - Probe if SVSM provides a vTPM device
 								 *
 								 * Check that there is SVSM and that it supports at least TPM_SEND_COMMAND
 								 * which is the only request used so far.
 								 *
 								 * Return: true if the platform provides a vTPM SVSM device, false otherwise.
 								 */
 								static bool snp_svsm_vtpm_probe(void)
 								{
 									struct svsm_call call = {};
 									/* The vTPM device is available only if a SVSM is present */
 									if (!snp_vmpl)
 										return false;
 									call.caa = svsm_get_caa();
 									call.rax = SVSM_VTPM_CALL(SVSM_VTPM_QUERY);
 									if (svsm_perform_call_protocol(&call))
 										return false;
 									/* Check platform commands contains TPM_SEND_COMMAND - platform command 8 */
 									return call.rcx_out & BIT_ULL(8);
 								}
 								/**
 								 * snp_svsm_vtpm_send_command() - Execute a vTPM operation on SVSM
 								 * @buffer: A buffer used to both send the command and receive the response.
 								 *
 								 * Execute a SVSM_VTPM_CMD call as defined by
 								 * "Secure VM Service Module for SEV-SNP Guests" Publication # 58019 Revision: 1.00
 								 *
 								 * All command request/response buffers have a common structure as specified by
 								 * the following table:
 								 *     Byte      Size       In/Out    Description
 								 *     Offset    (Bytes)
 								 *     0x000     4          In        Platform command
 								 *                          Out       Platform command response size
 								 *
 								 * Each command can build upon this common request/response structure to create
 								 * a structure specific to the command. See include/linux/tpm_svsm.h for more
 								 * details.
 								 *
 								 * Return: 0 on success, -errno on failure
 								 */
 								int snp_svsm_vtpm_send_command(u8 *buffer)
 								{
 									struct svsm_call call = {};
 									call.caa = svsm_get_caa();
 									call.rax = SVSM_VTPM_CALL(SVSM_VTPM_CMD);
 									call.rcx = __pa(buffer);
 									return svsm_perform_call_protocol(&call);
 								}
 								EXPORT_SYMBOL_GPL(snp_svsm_vtpm_send_command);
-												virt: sevguest: Change driver name to reflect generic SEV support

During patch review, it was decided the SNP guest driver name should not
be SEV-SNP specific, but should be generic for use with anything SEV.
However, this feedback was missed and the driver name, and many of the
driver functions and structures, are SEV-SNP name specific. Rename the
driver to "sev-guest" (to match the misc device that is created) and
update some of the function and structure names, too.

While in the file, adjust the one pr_err() message to be a dev_err()
message so that the message, if issued, uses the driver name.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/307710bb5515c9088a19fd0b930268c7300479b2.1650464054.git.thomas.lendacky@amd.com

											
										
										
											2022-04-20 09:14:13 -05:00
+								static struct platform_device sev_guest_device = {
 									.name		= "sev-guest",
-												x86/sev: Register SEV-SNP guest request platform device

Version 2 of the GHCB specification provides a Non Automatic Exit (NAE)
event type that can be used by the SEV-SNP guest to communicate with the
PSP without risk from a malicious hypervisor who wishes to read, alter,
drop or replay the messages sent.

SNP_LAUNCH_UPDATE can insert two special pages into the guest’s memory:
the secrets page and the CPUID page. The PSP firmware populates the
contents of the secrets page. The secrets page contains encryption keys
used by the guest to interact with the firmware. Because the secrets
page is encrypted with the guest’s memory encryption key, the hypervisor
cannot read the keys. See SEV-SNP firmware spec for further details on
the secrets page format.

Create a platform device that the SEV-SNP guest driver can bind to get
the platform resources such as encryption key and message id to use to
communicate with the PSP. The SEV-SNP guest driver provides a userspace
interface to get the attestation report, key derivation, extended
attestation report etc.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-43-brijesh.singh@amd.com

											
										
										
											2022-02-24 10:56:21 -06:00
+									.id		= -1,
 								};
-												x86/sev: Register tpm-svsm platform device

SNP platform can provide a vTPM device emulated by SVSM.

The "tpm-svsm" device can be handled by the platform driver registered by the
x86/sev core code.

Register the platform device only when SVSM is available and it supports vTPM
commands as checked by snp_svsm_vtpm_probe().

  [ bp: Massage commit message. ]

Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Link: https://lore.kernel.org/r/20250410135118.133240-5-sgarzare@redhat.com

											
										
										
											2025-04-10 15:51:16 +02:00
+								static struct platform_device tpm_svsm_device = {
 									.name		= "tpm-svsm",
 									.id		= -1,
 								};
-												x86/sev: Register SEV-SNP guest request platform device

Version 2 of the GHCB specification provides a Non Automatic Exit (NAE)
event type that can be used by the SEV-SNP guest to communicate with the
PSP without risk from a malicious hypervisor who wishes to read, alter,
drop or replay the messages sent.

SNP_LAUNCH_UPDATE can insert two special pages into the guest’s memory:
the secrets page and the CPUID page. The PSP firmware populates the
contents of the secrets page. The secrets page contains encryption keys
used by the guest to interact with the firmware. Because the secrets
page is encrypted with the guest’s memory encryption key, the hypervisor
cannot read the keys. See SEV-SNP firmware spec for further details on
the secrets page format.

Create a platform device that the SEV-SNP guest driver can bind to get
the platform resources such as encryption key and message id to use to
communicate with the PSP. The SEV-SNP guest driver provides a userspace
interface to get the attestation report, key derivation, extended
attestation report etc.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-43-brijesh.singh@amd.com

											
										
										
											2022-02-24 10:56:21 -06:00
+								static int __init snp_init_platform_device(void)
 								{
 									if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
 										return -ENODEV;
-												virt: sevguest: Change driver name to reflect generic SEV support

During patch review, it was decided the SNP guest driver name should not
be SEV-SNP specific, but should be generic for use with anything SEV.
However, this feedback was missed and the driver name, and many of the
driver functions and structures, are SEV-SNP name specific. Rename the
driver to "sev-guest" (to match the misc device that is created) and
update some of the function and structure names, too.

While in the file, adjust the one pr_err() message to be a dev_err()
message so that the message, if issued, uses the driver name.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/307710bb5515c9088a19fd0b930268c7300479b2.1650464054.git.thomas.lendacky@amd.com

											
										
										
											2022-04-20 09:14:13 -05:00
+									if (platform_device_register(&sev_guest_device))
-												x86/sev: Register SEV-SNP guest request platform device

Version 2 of the GHCB specification provides a Non Automatic Exit (NAE)
event type that can be used by the SEV-SNP guest to communicate with the
PSP without risk from a malicious hypervisor who wishes to read, alter,
drop or replay the messages sent.

SNP_LAUNCH_UPDATE can insert two special pages into the guest’s memory:
the secrets page and the CPUID page. The PSP firmware populates the
contents of the secrets page. The secrets page contains encryption keys
used by the guest to interact with the firmware. Because the secrets
page is encrypted with the guest’s memory encryption key, the hypervisor
cannot read the keys. See SEV-SNP firmware spec for further details on
the secrets page format.

Create a platform device that the SEV-SNP guest driver can bind to get
the platform resources such as encryption key and message id to use to
communicate with the PSP. The SEV-SNP guest driver provides a userspace
interface to get the attestation report, key derivation, extended
attestation report etc.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-43-brijesh.singh@amd.com

											
										
										
											2022-02-24 10:56:21 -06:00
+										return -ENODEV;
-												x86/sev: Register tpm-svsm platform device

SNP platform can provide a vTPM device emulated by SVSM.

The "tpm-svsm" device can be handled by the platform driver registered by the
x86/sev core code.

Register the platform device only when SVSM is available and it supports vTPM
commands as checked by snp_svsm_vtpm_probe().

  [ bp: Massage commit message. ]

Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Link: https://lore.kernel.org/r/20250410135118.133240-5-sgarzare@redhat.com

											
										
										
											2025-04-10 15:51:16 +02:00
+									if (snp_svsm_vtpm_probe() &&
 									    platform_device_register(&tpm_svsm_device))
 										return -ENODEV;
 									pr_info("SNP guest platform devices initialized.\n");
-												x86/sev: Register SEV-SNP guest request platform device

Version 2 of the GHCB specification provides a Non Automatic Exit (NAE)
event type that can be used by the SEV-SNP guest to communicate with the
PSP without risk from a malicious hypervisor who wishes to read, alter,
drop or replay the messages sent.

SNP_LAUNCH_UPDATE can insert two special pages into the guest’s memory:
the secrets page and the CPUID page. The PSP firmware populates the
contents of the secrets page. The secrets page contains encryption keys
used by the guest to interact with the firmware. Because the secrets
page is encrypted with the guest’s memory encryption key, the hypervisor
cannot read the keys. See SEV-SNP firmware spec for further details on
the secrets page format.

Create a platform device that the SEV-SNP guest driver can bind to get
the platform resources such as encryption key and message id to use to
communicate with the PSP. The SEV-SNP guest driver provides a userspace
interface to get the attestation report, key derivation, extended
attestation report etc.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220307213356.2797205-43-brijesh.singh@amd.com

											
										
										
											2022-02-24 10:56:21 -06:00
+									return 0;
 								}
 								device_initcall(snp_init_platform_device);
-												crypto: ccp: Add panic notifier for SEV/SNP firmware shutdown on kdump

Add a kdump safe version of sev_firmware_shutdown() and register it as a
crash_kexec_post_notifier so it will be invoked during panic/crash to do
SEV/SNP shutdown. This is required for transitioning all IOMMU pages to
reclaim/hypervisor state, otherwise re-init of IOMMU pages during
crashdump kernel boot fails and panics the crashdump kernel.

This panic notifier runs in atomic context, hence it ensures not to
acquire any locks/mutexes and polls for PSP command completion instead
of depending on PSP command completion interrupt.

  [ mdr: Remove use of "we" in comments. ]

Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20240126041126.1927228-21-michael.roth@amd.com

											
										
										
											2024-01-25 22:11:20 -06:00
-												x86/sev: Dump SEV_STATUS

It is, and will be even more useful in the future, to dump the SEV
features enabled according to SEV_STATUS. Do so:

  [    0.542753] Memory Encryption Features active: AMD SEV SEV-ES SEV-SNP
  [    0.544425] SEV: Status: SEV SEV-ES SEV-SNP DebugSwap

Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Nikunj A Dadhania <nikunj@amd.com>
Link: https://lore.kernel.org/r/20240219094216.GAZdMieDHKiI8aaP3n@fat_crate.local

											
										
										
											2024-02-19 10:42:16 +01:00
+								void sev_show_status(void)
 								{
 									int i;
 									pr_info("Status: ");
 									for (i = 0; i < MSR_AMD64_SNP_RESV_BIT; i++) {
 										if (sev_status & BIT_ULL(i)) {
 											if (!sev_status_feat_names[i])
 												continue;
 											pr_cont("%s ", sev_status_feat_names[i]);
 										}
 									}
 									pr_cont("\n");
 								}
-												x86/sev: Use kernel provided SVSM Calling Areas

The SVSM Calling Area (CA) is used to communicate between Linux and the
SVSM. Since the firmware supplied CA for the BSP is likely to be in
reserved memory, switch off that CA to a kernel provided CA so that access
and use of the CA is available during boot. The CA switch is done using
the SVSM core protocol SVSM_CORE_REMAP_CA call.

An SVSM call is executed by filling out the SVSM CA and setting the proper
register state as documented by the SVSM protocol. The SVSM is invoked by
by requesting the hypervisor to run VMPL0.

Once it is safe to allocate/reserve memory, allocate a CA for each CPU.
After allocating the new CAs, the BSP will switch from the boot CA to the
per-CPU CA. The CA for an AP is identified to the SVSM when creating the
VMSA in preparation for booting the AP.

  [ bp: Heavily simplify svsm_issue_call() asm, other touchups. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/fa8021130bcc3bcf14d722a25548cb0cdf325456.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:46 -05:00
 								void __init snp_update_svsm_ca(void)
 								{
 									if (!snp_vmpl)
 										return;
 									/* Update the CAA to a proper kernel address */
 									boot_svsm_caa = &boot_svsm_ca_page;
 								}
-												x86/sev: Provide guest VMPL level to userspace

Requesting an attestation report from userspace involves providing the VMPL
level for the report. Currently any value from 0-3 is valid because Linux
enforces running at VMPL0.

When an SVSM is present, though, Linux will not be running at VMPL0 and only
VMPL values starting at the VMPL level Linux is running at to 3 are valid. In
order to allow userspace to determine the minimum VMPL value that can be
supplied to an attestation report, create a sysfs entry that can be used to
retrieve the current VMPL level of the kernel.

  [ bp: Add CONFIG_SYSFS ifdeffery. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/fff846da0d8d561f9fdaf297dcf8cd907545a25b.1717600736.git.thomas.lendacky@amd.com

											
										
										
											2024-06-05 10:18:50 -05:00
 								#ifdef CONFIG_SYSFS
 								static ssize_t vmpl_show(struct kobject *kobj,
 											 struct kobj_attribute *attr, char *buf)
 								{
 									return sysfs_emit(buf, "%d\n", snp_vmpl);
 								}
 								static struct kobj_attribute vmpl_attr = __ATTR_RO(vmpl);
 								static struct attribute *vmpl_attrs[] = {
 									&vmpl_attr.attr,
 									NULL
 								};
 								static struct attribute_group sev_attr_group = {
 									.attrs = vmpl_attrs,
 								};
 								static int __init sev_sysfs_init(void)
 								{
 									struct kobject *sev_kobj;
 									struct device *dev_root;
 									int ret;
 									if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
 										return -ENODEV;
 									dev_root = bus_get_dev_root(&cpu_subsys);
 									if (!dev_root)
 										return -ENODEV;
 									sev_kobj = kobject_create_and_add("sev", &dev_root->kobj);
 									put_device(dev_root);
 									if (!sev_kobj)
 										return -ENOMEM;
 									ret = sysfs_create_group(sev_kobj, &sev_attr_group);
 									if (ret)
 										kobject_put(sev_kobj);
 									return ret;
 								}
 								arch_initcall(sev_sysfs_init);
 								#endif // CONFIG_SYSFS
-												x86/sev: Carve out and export SNP guest messaging init routines

Currently, the sev-guest driver is the only user of SNP guest messaging. All
routines for initializing SNP guest messaging are implemented within the
sev-guest driver and are not available during early boot.

In preparation for adding Secure TSC guest support, carve out APIs to allocate
and initialize the guest messaging descriptor context and make it part of
coco/sev/core.c. As there is no user of sev_guest_platform_data anymore,
remove the structure.

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250106124633.1418972-4-nikunj@amd.com

											
										
										
											2025-01-06 18:16:23 +05:30
 								static void free_shared_pages(void *buf, size_t sz)
 								{
 									unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
 									int ret;
 									if (!buf)
 										return;
 									ret = set_memory_encrypted((unsigned long)buf, npages);
 									if (ret) {
 										WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n");
 										return;
 									}
 									__free_pages(virt_to_page(buf), get_order(sz));
 								}
 								static void *alloc_shared_pages(size_t sz)
 								{
 									unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
 									struct page *page;
 									int ret;
 									page = alloc_pages(GFP_KERNEL_ACCOUNT, get_order(sz));
 									if (!page)
 										return NULL;
 									ret = set_memory_decrypted((unsigned long)page_address(page), npages);
 									if (ret) {
 										pr_err("failed to mark page shared, ret=%d\n", ret);
 										__free_pages(page, get_order(sz));
 										return NULL;
 									}
 									return page_address(page);
 								}
 								static u8 *get_vmpck(int id, struct snp_secrets_page *secrets, u32 **seqno)
 								{
 									u8 *key = NULL;
 									switch (id) {
 									case 0:
 										*seqno = &secrets->os_area.msg_seqno_0;
 										key = secrets->vmpck0;
 										break;
 									case 1:
 										*seqno = &secrets->os_area.msg_seqno_1;
 										key = secrets->vmpck1;
 										break;
 									case 2:
 										*seqno = &secrets->os_area.msg_seqno_2;
 										key = secrets->vmpck2;
 										break;
 									case 3:
 										*seqno = &secrets->os_area.msg_seqno_3;
 										key = secrets->vmpck3;
 										break;
 									default:
 										break;
 									}
 									return key;
 								}
 								static struct aesgcm_ctx *snp_init_crypto(u8 *key, size_t keylen)
 								{
 									struct aesgcm_ctx *ctx;
 									ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 									if (!ctx)
 										return NULL;
 									if (aesgcm_expandkey(ctx, key, keylen, AUTHTAG_LEN)) {
 										pr_err("Crypto context initialization failed\n");
 										kfree(ctx);
 										return NULL;
 									}
 									return ctx;
 								}
 								int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id)
 								{
 									/* Adjust the default VMPCK key based on the executing VMPL level */
 									if (vmpck_id == -1)
 										vmpck_id = snp_vmpl;
 									mdesc->vmpck = get_vmpck(vmpck_id, mdesc->secrets, &mdesc->os_area_msg_seqno);
 									if (!mdesc->vmpck) {
 										pr_err("Invalid VMPCK%d communication key\n", vmpck_id);
 										return -EINVAL;
 									}
 									/* Verify that VMPCK is not zero. */
 									if (!memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) {
 										pr_err("Empty VMPCK%d communication key\n", vmpck_id);
 										return -EINVAL;
 									}
 									mdesc->vmpck_id = vmpck_id;
 									mdesc->ctx = snp_init_crypto(mdesc->vmpck, VMPCK_KEY_LEN);
 									if (!mdesc->ctx)
 										return -ENOMEM;
 									return 0;
 								}
 								EXPORT_SYMBOL_GPL(snp_msg_init);
 								struct snp_msg_desc *snp_msg_alloc(void)
 								{
 									struct snp_msg_desc *mdesc;
 									void __iomem *mem;
 									BUILD_BUG_ON(sizeof(struct snp_guest_msg) > PAGE_SIZE);
 									mdesc = kzalloc(sizeof(struct snp_msg_desc), GFP_KERNEL);
 									if (!mdesc)
 										return ERR_PTR(-ENOMEM);
-												x86/sev: Share the sev_secrets_pa value again

This commits breaks SNP guests:

  234cf67fc3bd ("x86/sev: Split off startup code from core code")

The SNP guest boots, but no longer has access to the VMPCK keys needed
to communicate with the ASP, which is used, for example, to obtain an
attestation report.

The secrets_pa value is defined as static in both startup.c and
core.c. It is set by a function in startup.c and so when used in
core.c its value will be 0.

Share it again and add the sev_ prefix to put it into the global
SEV symbols namespace.

[ mingo: Renamed to sev_secrets_pa ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
Cc: Kevin Loughlin <kevinloughlin@google.com>
Link: https://lore.kernel.org/r/cf878810-81ed-3017-52c6-ce6aa41b5f01@amd.com

											
										
										
											2025-04-23 10:22:31 -05:00
+									mem = ioremap_encrypted(sev_secrets_pa, PAGE_SIZE);
-												x86/sev: Carve out and export SNP guest messaging init routines

Currently, the sev-guest driver is the only user of SNP guest messaging. All
routines for initializing SNP guest messaging are implemented within the
sev-guest driver and are not available during early boot.

In preparation for adding Secure TSC guest support, carve out APIs to allocate
and initialize the guest messaging descriptor context and make it part of
coco/sev/core.c. As there is no user of sev_guest_platform_data anymore,
remove the structure.

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250106124633.1418972-4-nikunj@amd.com

											
										
										
											2025-01-06 18:16:23 +05:30
+									if (!mem)
 										goto e_free_mdesc;
 									mdesc->secrets = (__force struct snp_secrets_page *)mem;
 									/* Allocate the shared page used for the request and response message. */
 									mdesc->request = alloc_shared_pages(sizeof(struct snp_guest_msg));
 									if (!mdesc->request)
 										goto e_unmap;
 									mdesc->response = alloc_shared_pages(sizeof(struct snp_guest_msg));
 									if (!mdesc->response)
 										goto e_free_request;
 									return mdesc;
 								e_free_request:
 									free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg));
 								e_unmap:
 									iounmap(mem);
 								e_free_mdesc:
 									kfree(mdesc);
 									return ERR_PTR(-ENOMEM);
 								}
 								EXPORT_SYMBOL_GPL(snp_msg_alloc);
 								void snp_msg_free(struct snp_msg_desc *mdesc)
 								{
 									if (!mdesc)
 										return;
 									kfree(mdesc->ctx);
 									free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg));
 									free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg));
 									iounmap((__force void __iomem *)mdesc->secrets);
 									memset(mdesc, 0, sizeof(*mdesc));
 									kfree(mdesc);
 								}
 								EXPORT_SYMBOL_GPL(snp_msg_free);
-												x86/sev: Relocate SNP guest messaging routines to common code

At present, the SEV guest driver exclusively handles SNP guest messaging. All
routines for sending guest messages are embedded within it.

To support Secure TSC, SEV-SNP guests must communicate with the AMD Security
Processor during early boot. However, these guest messaging functions are not
accessible during early boot since they are currently part of the guest
driver.

Hence, relocate the core SNP guest messaging functions to SEV common code and
provide an API for sending SNP guest messages.

No functional change, but just an export symbol added for
snp_send_guest_request() and dropped the export symbol on
snp_issue_guest_request() and made it static.

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250106124633.1418972-5-nikunj@amd.com

											
										
										
											2025-01-06 18:16:24 +05:30
 								/* Mutex to serialize the shared buffer access and command handling. */
 								static DEFINE_MUTEX(snp_cmd_mutex);
 								/*
 								 * If an error is received from the host or AMD Secure Processor (ASP) there
 								 * are two options. Either retry the exact same encrypted request or discontinue
 								 * using the VMPCK.
 								 *
 								 * This is because in the current encryption scheme GHCB v2 uses AES-GCM to
 								 * encrypt the requests. The IV for this scheme is the sequence number. GCM
 								 * cannot tolerate IV reuse.
 								 *
 								 * The ASP FW v1.51 only increments the sequence numbers on a successful
 								 * guest<->ASP back and forth and only accepts messages at its exact sequence
 								 * number.
 								 *
 								 * So if the sequence number were to be reused the encryption scheme is
 								 * vulnerable. If the sequence number were incremented for a fresh IV the ASP
 								 * will reject the request.
 								 */
 								static void snp_disable_vmpck(struct snp_msg_desc *mdesc)
 								{
 									pr_alert("Disabling VMPCK%d communication key to prevent IV reuse.\n",
 										  mdesc->vmpck_id);
 									memzero_explicit(mdesc->vmpck, VMPCK_KEY_LEN);
 									mdesc->vmpck = NULL;
 								}
 								static inline u64 __snp_get_msg_seqno(struct snp_msg_desc *mdesc)
 								{
 									u64 count;
 									lockdep_assert_held(&snp_cmd_mutex);
 									/* Read the current message sequence counter from secrets pages */
 									count = *mdesc->os_area_msg_seqno;
 									return count + 1;
 								}
 								/* Return a non-zero on success */
 								static u64 snp_get_msg_seqno(struct snp_msg_desc *mdesc)
 								{
 									u64 count = __snp_get_msg_seqno(mdesc);
 									/*
 									 * The message sequence counter for the SNP guest request is a  64-bit
 									 * value but the version 2 of GHCB specification defines a 32-bit storage
 									 * for it. If the counter exceeds the 32-bit value then return zero.
 									 * The caller should check the return value, but if the caller happens to
 									 * not check the value and use it, then the firmware treats zero as an
 									 * invalid number and will fail the  message request.
 									 */
 									if (count >= UINT_MAX) {
 										pr_err("request message sequence counter overflow\n");
 										return 0;
 									}
 									return count;
 								}
 								static void snp_inc_msg_seqno(struct snp_msg_desc *mdesc)
 								{
 									/*
 									 * The counter is also incremented by the PSP, so increment it by 2
 									 * and save in secrets page.
 									 */
 									*mdesc->os_area_msg_seqno += 2;
 								}
 								static int verify_and_dec_payload(struct snp_msg_desc *mdesc, struct snp_guest_req *req)
 								{
 									struct snp_guest_msg *resp_msg = &mdesc->secret_response;
 									struct snp_guest_msg *req_msg = &mdesc->secret_request;
 									struct snp_guest_msg_hdr *req_msg_hdr = &req_msg->hdr;
 									struct snp_guest_msg_hdr *resp_msg_hdr = &resp_msg->hdr;
 									struct aesgcm_ctx *ctx = mdesc->ctx;
 									u8 iv[GCM_AES_IV_SIZE] = {};
 									pr_debug("response [seqno %lld type %d version %d sz %d]\n",
 										 resp_msg_hdr->msg_seqno, resp_msg_hdr->msg_type, resp_msg_hdr->msg_version,
 										 resp_msg_hdr->msg_sz);
 									/* Copy response from shared memory to encrypted memory. */
 									memcpy(resp_msg, mdesc->response, sizeof(*resp_msg));
 									/* Verify that the sequence counter is incremented by 1 */
 									if (unlikely(resp_msg_hdr->msg_seqno != (req_msg_hdr->msg_seqno + 1)))
 										return -EBADMSG;
 									/* Verify response message type and version number. */
 									if (resp_msg_hdr->msg_type != (req_msg_hdr->msg_type + 1) ||
 									    resp_msg_hdr->msg_version != req_msg_hdr->msg_version)
 										return -EBADMSG;
 									/*
 									 * If the message size is greater than our buffer length then return
 									 * an error.
 									 */
 									if (unlikely((resp_msg_hdr->msg_sz + ctx->authsize) > req->resp_sz))
 										return -EBADMSG;
 									/* Decrypt the payload */
 									memcpy(iv, &resp_msg_hdr->msg_seqno, min(sizeof(iv), sizeof(resp_msg_hdr->msg_seqno)));
 									if (!aesgcm_decrypt(ctx, req->resp_buf, resp_msg->payload, resp_msg_hdr->msg_sz,
 											    &resp_msg_hdr->algo, AAD_LEN, iv, resp_msg_hdr->authtag))
 										return -EBADMSG;
 									return 0;
 								}
 								static int enc_payload(struct snp_msg_desc *mdesc, u64 seqno, struct snp_guest_req *req)
 								{
 									struct snp_guest_msg *msg = &mdesc->secret_request;
 									struct snp_guest_msg_hdr *hdr = &msg->hdr;
 									struct aesgcm_ctx *ctx = mdesc->ctx;
 									u8 iv[GCM_AES_IV_SIZE] = {};
 									memset(msg, 0, sizeof(*msg));
 									hdr->algo = SNP_AEAD_AES_256_GCM;
 									hdr->hdr_version = MSG_HDR_VER;
 									hdr->hdr_sz = sizeof(*hdr);
 									hdr->msg_type = req->msg_type;
 									hdr->msg_version = req->msg_version;
 									hdr->msg_seqno = seqno;
 									hdr->msg_vmpck = req->vmpck_id;
 									hdr->msg_sz = req->req_sz;
 									/* Verify the sequence number is non-zero */
 									if (!hdr->msg_seqno)
 										return -ENOSR;
 									pr_debug("request [seqno %lld type %d version %d sz %d]\n",
 										 hdr->msg_seqno, hdr->msg_type, hdr->msg_version, hdr->msg_sz);
 									if (WARN_ON((req->req_sz + ctx->authsize) > sizeof(msg->payload)))
 										return -EBADMSG;
 									memcpy(iv, &hdr->msg_seqno, min(sizeof(iv), sizeof(hdr->msg_seqno)));
 									aesgcm_encrypt(ctx, msg->payload, req->req_buf, req->req_sz, &hdr->algo,
 										       AAD_LEN, iv, hdr->authtag);
 									return 0;
 								}
-												virt: sev-guest: Contain snp_guest_request_ioctl in sev-guest

SNP Guest Request uses only exitinfo2 which is a return value from GHCB, has
meaning beyond ioctl and therefore belongs to struct snp_guest_req.

Move exitinfo2 there and remove snp_guest_request_ioctl from the SEV platform
code.

No functional change intended.

Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Dionna Glaze <dionnaglaze@google.com>
Link: https://lore.kernel.org/20250611040842.2667262-2-aik@amd.com

											
										
										
											2025-06-11 14:08:39 +10:00
+								static int __handle_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req)
-												x86/sev: Relocate SNP guest messaging routines to common code

At present, the SEV guest driver exclusively handles SNP guest messaging. All
routines for sending guest messages are embedded within it.

To support Secure TSC, SEV-SNP guests must communicate with the AMD Security
Processor during early boot. However, these guest messaging functions are not
accessible during early boot since they are currently part of the guest
driver.

Hence, relocate the core SNP guest messaging functions to SEV common code and
provide an API for sending SNP guest messages.

No functional change, but just an export symbol added for
snp_send_guest_request() and dropped the export symbol on
snp_issue_guest_request() and made it static.

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250106124633.1418972-5-nikunj@amd.com

											
										
										
											2025-01-06 18:16:24 +05:30
+								{
 									unsigned long req_start = jiffies;
 									unsigned int override_npages = 0;
 									u64 override_err = 0;
 									int rc;
 								retry_request:
 									/*
 									 * Call firmware to process the request. In this function the encrypted
 									 * message enters shared memory with the host. So after this call the
 									 * sequence number must be incremented or the VMPCK must be deleted to
 									 * prevent reuse of the IV.
 									 */
-												x86/sev: Drop unnecessary parameter in snp_issue_guest_request()

Commit

  3e385c0d6ce8 ("virt: sev-guest: Move SNP Guest Request data pages handling under snp_cmd_mutex")

moved @input from snp_msg_desc to snp_guest_req which is passed to
snp_issue_guest_request().

Drop the extra parameter.

No functional change intended.

Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Dionna Glaze <dionnaglaze@google.com>
Link: https://lore.kernel.org/20250611040842.2667262-5-aik@amd.com

											
										
										
											2025-06-11 14:08:42 +10:00
+									rc = snp_issue_guest_request(req);
-												x86/sev: Relocate SNP guest messaging routines to common code

At present, the SEV guest driver exclusively handles SNP guest messaging. All
routines for sending guest messages are embedded within it.

To support Secure TSC, SEV-SNP guests must communicate with the AMD Security
Processor during early boot. However, these guest messaging functions are not
accessible during early boot since they are currently part of the guest
driver.

Hence, relocate the core SNP guest messaging functions to SEV common code and
provide an API for sending SNP guest messages.

No functional change, but just an export symbol added for
snp_send_guest_request() and dropped the export symbol on
snp_issue_guest_request() and made it static.

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250106124633.1418972-5-nikunj@amd.com

											
										
										
											2025-01-06 18:16:24 +05:30
+									switch (rc) {
 									case -ENOSPC:
 										/*
 										 * If the extended guest request fails due to having too
 										 * small of a certificate data buffer, retry the same
 										 * guest request without the extended data request in
 										 * order to increment the sequence number and thus avoid
 										 * IV reuse.
 										 */
-												virt: sev-guest: Move SNP Guest Request data pages handling under snp_cmd_mutex

Compared to the SNP Guest Request, the "Extended" version adds data pages for
receiving certificates. If not enough pages provided, the HV can report to the
VM how much is needed so the VM can reallocate and repeat.

Commit

  ae596615d93d ("virt: sev-guest: Reduce the scope of SNP command mutex")

moved handling of the allocated/desired pages number out of scope of said
mutex and create a possibility for a race (multiple instances trying to
trigger Extended request in a VM) as there is just one instance of
snp_msg_desc per /dev/sev-guest and no locking other than snp_cmd_mutex.

Fix the issue by moving the data blob/size and the GHCB input struct
(snp_req_data) into snp_guest_req which is allocated on stack now and accessed
by the GHCB caller under that mutex.

Stop allocating SEV_FW_BLOB_MAX_SIZE in snp_msg_alloc() as only one of four
callers needs it. Free the received blob in get_ext_report() right after it is
copied to the userspace. Possible future users of snp_send_guest_request() are
likely to have different ideas about the buffer size anyways.

Fixes: ae596615d93d ("virt: sev-guest: Reduce the scope of SNP command mutex")
Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Nikunj A Dadhania <nikunj@amd.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20250307013700.437505-3-aik@amd.com

											
										
										
											2025-03-07 12:37:00 +11:00
+										override_npages = req->input.data_npages;
-												x86/sev: Relocate SNP guest messaging routines to common code

At present, the SEV guest driver exclusively handles SNP guest messaging. All
routines for sending guest messages are embedded within it.

To support Secure TSC, SEV-SNP guests must communicate with the AMD Security
Processor during early boot. However, these guest messaging functions are not
accessible during early boot since they are currently part of the guest
driver.

Hence, relocate the core SNP guest messaging functions to SEV common code and
provide an API for sending SNP guest messages.

No functional change, but just an export symbol added for
snp_send_guest_request() and dropped the export symbol on
snp_issue_guest_request() and made it static.

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250106124633.1418972-5-nikunj@amd.com

											
										
										
											2025-01-06 18:16:24 +05:30
+										req->exit_code	= SVM_VMGEXIT_GUEST_REQUEST;
 										/*
 										 * Override the error to inform callers the given extended
 										 * request buffer size was too small and give the caller the
 										 * required buffer size.
 										 */
 										override_err = SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN);
 										/*
 										 * If this call to the firmware succeeds, the sequence number can
 										 * be incremented allowing for continued use of the VMPCK. If
 										 * there is an error reflected in the return value, this value
 										 * is checked further down and the result will be the deletion
 										 * of the VMPCK and the error code being propagated back to the
 										 * user as an ioctl() return code.
 										 */
 										goto retry_request;
 									/*
 									 * The host may return SNP_GUEST_VMM_ERR_BUSY if the request has been
 									 * throttled. Retry in the driver to avoid returning and reusing the
 									 * message sequence number on a different message.
 									 */
 									case -EAGAIN:
 										if (jiffies - req_start > SNP_REQ_MAX_RETRY_DURATION) {
 											rc = -ETIMEDOUT;
 											break;
 										}
 										schedule_timeout_killable(SNP_REQ_RETRY_DELAY);
 										goto retry_request;
 									}
 									/*
 									 * Increment the message sequence number. There is no harm in doing
 									 * this now because decryption uses the value stored in the response
 									 * structure and any failure will wipe the VMPCK, preventing further
 									 * use anyway.
 									 */
 									snp_inc_msg_seqno(mdesc);
 									if (override_err) {
-												virt: sev-guest: Contain snp_guest_request_ioctl in sev-guest

SNP Guest Request uses only exitinfo2 which is a return value from GHCB, has
meaning beyond ioctl and therefore belongs to struct snp_guest_req.

Move exitinfo2 there and remove snp_guest_request_ioctl from the SEV platform
code.

No functional change intended.

Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Dionna Glaze <dionnaglaze@google.com>
Link: https://lore.kernel.org/20250611040842.2667262-2-aik@amd.com

											
										
										
											2025-06-11 14:08:39 +10:00
+										req->exitinfo2 = override_err;
-												x86/sev: Relocate SNP guest messaging routines to common code

At present, the SEV guest driver exclusively handles SNP guest messaging. All
routines for sending guest messages are embedded within it.

To support Secure TSC, SEV-SNP guests must communicate with the AMD Security
Processor during early boot. However, these guest messaging functions are not
accessible during early boot since they are currently part of the guest
driver.

Hence, relocate the core SNP guest messaging functions to SEV common code and
provide an API for sending SNP guest messages.

No functional change, but just an export symbol added for
snp_send_guest_request() and dropped the export symbol on
snp_issue_guest_request() and made it static.

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250106124633.1418972-5-nikunj@amd.com

											
										
										
											2025-01-06 18:16:24 +05:30
 										/*
 										 * If an extended guest request was issued and the supplied certificate
 										 * buffer was not large enough, a standard guest request was issued to
 										 * prevent IV reuse. If the standard request was successful, return -EIO
 										 * back to the caller as would have originally been returned.
 										 */
 										if (!rc && override_err == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN))
 											rc = -EIO;
 									}
 									if (override_npages)
-												virt: sev-guest: Move SNP Guest Request data pages handling under snp_cmd_mutex

Compared to the SNP Guest Request, the "Extended" version adds data pages for
receiving certificates. If not enough pages provided, the HV can report to the
VM how much is needed so the VM can reallocate and repeat.

Commit

  ae596615d93d ("virt: sev-guest: Reduce the scope of SNP command mutex")

moved handling of the allocated/desired pages number out of scope of said
mutex and create a possibility for a race (multiple instances trying to
trigger Extended request in a VM) as there is just one instance of
snp_msg_desc per /dev/sev-guest and no locking other than snp_cmd_mutex.

Fix the issue by moving the data blob/size and the GHCB input struct
(snp_req_data) into snp_guest_req which is allocated on stack now and accessed
by the GHCB caller under that mutex.

Stop allocating SEV_FW_BLOB_MAX_SIZE in snp_msg_alloc() as only one of four
callers needs it. Free the received blob in get_ext_report() right after it is
copied to the userspace. Possible future users of snp_send_guest_request() are
likely to have different ideas about the buffer size anyways.

Fixes: ae596615d93d ("virt: sev-guest: Reduce the scope of SNP command mutex")
Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Nikunj A Dadhania <nikunj@amd.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20250307013700.437505-3-aik@amd.com

											
										
										
											2025-03-07 12:37:00 +11:00
+										req->input.data_npages = override_npages;
-												x86/sev: Relocate SNP guest messaging routines to common code

At present, the SEV guest driver exclusively handles SNP guest messaging. All
routines for sending guest messages are embedded within it.

To support Secure TSC, SEV-SNP guests must communicate with the AMD Security
Processor during early boot. However, these guest messaging functions are not
accessible during early boot since they are currently part of the guest
driver.

Hence, relocate the core SNP guest messaging functions to SEV common code and
provide an API for sending SNP guest messages.

No functional change, but just an export symbol added for
snp_send_guest_request() and dropped the export symbol on
snp_issue_guest_request() and made it static.

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250106124633.1418972-5-nikunj@amd.com

											
										
										
											2025-01-06 18:16:24 +05:30
 									return rc;
 								}
-												virt: sev-guest: Contain snp_guest_request_ioctl in sev-guest

SNP Guest Request uses only exitinfo2 which is a return value from GHCB, has
meaning beyond ioctl and therefore belongs to struct snp_guest_req.

Move exitinfo2 there and remove snp_guest_request_ioctl from the SEV platform
code.

No functional change intended.

Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Dionna Glaze <dionnaglaze@google.com>
Link: https://lore.kernel.org/20250611040842.2667262-2-aik@amd.com

											
										
										
											2025-06-11 14:08:39 +10:00
+								int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req)
-												x86/sev: Relocate SNP guest messaging routines to common code

At present, the SEV guest driver exclusively handles SNP guest messaging. All
routines for sending guest messages are embedded within it.

To support Secure TSC, SEV-SNP guests must communicate with the AMD Security
Processor during early boot. However, these guest messaging functions are not
accessible during early boot since they are currently part of the guest
driver.

Hence, relocate the core SNP guest messaging functions to SEV common code and
provide an API for sending SNP guest messages.

No functional change, but just an export symbol added for
snp_send_guest_request() and dropped the export symbol on
snp_issue_guest_request() and made it static.

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250106124633.1418972-5-nikunj@amd.com

											
										
										
											2025-01-06 18:16:24 +05:30
+								{
 									u64 seqno;
 									int rc;
-												x86/sev: Document requirement for linear mapping of guest request buffers

The Guest Request supports 3 types of messages now, the largest is the
extended variant of MSG_REPORT_REQ: sizeof(snp_ext_report_req)==112.  These
used to be allocated on stack and then moved to the SNP guest platform device
(snp_guest_dev) for the reason explained in

  db10cb9b5746 ("virt: sevguest: Fix passing a stack buffer as a scatterlist target"):

aesgcm_encrypt() and aesgcm_decrypt() are used for guest messages and might
potentially use a crypto accelerator which requires DMA buffers to be in the
linear mapping.

Add a comment, warn and return an error when the buffers are not in linear
mapping.

Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Dionna Glaze <dionnaglaze@google.com>
Link: https://lore.kernel.org/20250611040842.2667262-4-aik@amd.com

											
										
										
											2025-06-11 14:08:41 +10:00
+									/*
 									 * enc_payload() calls aesgcm_encrypt(), which can potentially offload to HW.
 									 * The offload's DMA SG list of data to encrypt has to be in linear mapping.
 									 */
 									if (!virt_addr_valid(req->req_buf) || !virt_addr_valid(req->resp_buf)) {
 										pr_warn("AES-GSM buffers must be in linear mapping");
 										return -EINVAL;
 									}
-												x86/sev: Relocate SNP guest messaging routines to common code

At present, the SEV guest driver exclusively handles SNP guest messaging. All
routines for sending guest messages are embedded within it.

To support Secure TSC, SEV-SNP guests must communicate with the AMD Security
Processor during early boot. However, these guest messaging functions are not
accessible during early boot since they are currently part of the guest
driver.

Hence, relocate the core SNP guest messaging functions to SEV common code and
provide an API for sending SNP guest messages.

No functional change, but just an export symbol added for
snp_send_guest_request() and dropped the export symbol on
snp_issue_guest_request() and made it static.

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250106124633.1418972-5-nikunj@amd.com

											
										
										
											2025-01-06 18:16:24 +05:30
+									guard(mutex)(&snp_cmd_mutex);
 									/* Check if the VMPCK is not empty */
 									if (!mdesc->vmpck || !memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) {
 										pr_err_ratelimited("VMPCK is disabled\n");
 										return -ENOTTY;
 									}
 									/* Get message sequence and verify that its a non-zero */
 									seqno = snp_get_msg_seqno(mdesc);
 									if (!seqno)
 										return -EIO;
 									/* Clear shared memory's response for the host to populate. */
 									memset(mdesc->response, 0, sizeof(struct snp_guest_msg));
 									/* Encrypt the userspace provided payload in mdesc->secret_request. */
 									rc = enc_payload(mdesc, seqno, req);
 									if (rc)
 										return rc;
 									/*
 									 * Write the fully encrypted request to the shared unencrypted
 									 * request page.
 									 */
 									memcpy(mdesc->request, &mdesc->secret_request, sizeof(mdesc->secret_request));
-												virt: sev-guest: Move SNP Guest Request data pages handling under snp_cmd_mutex

Compared to the SNP Guest Request, the "Extended" version adds data pages for
receiving certificates. If not enough pages provided, the HV can report to the
VM how much is needed so the VM can reallocate and repeat.

Commit

  ae596615d93d ("virt: sev-guest: Reduce the scope of SNP command mutex")

moved handling of the allocated/desired pages number out of scope of said
mutex and create a possibility for a race (multiple instances trying to
trigger Extended request in a VM) as there is just one instance of
snp_msg_desc per /dev/sev-guest and no locking other than snp_cmd_mutex.

Fix the issue by moving the data blob/size and the GHCB input struct
(snp_req_data) into snp_guest_req which is allocated on stack now and accessed
by the GHCB caller under that mutex.

Stop allocating SEV_FW_BLOB_MAX_SIZE in snp_msg_alloc() as only one of four
callers needs it. Free the received blob in get_ext_report() right after it is
copied to the userspace. Possible future users of snp_send_guest_request() are
likely to have different ideas about the buffer size anyways.

Fixes: ae596615d93d ("virt: sev-guest: Reduce the scope of SNP command mutex")
Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Nikunj A Dadhania <nikunj@amd.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20250307013700.437505-3-aik@amd.com

											
										
										
											2025-03-07 12:37:00 +11:00
+									/* Initialize the input address for guest request */
 									req->input.req_gpa = __pa(mdesc->request);
 									req->input.resp_gpa = __pa(mdesc->response);
 									req->input.data_gpa = req->certs_data ? __pa(req->certs_data) : 0;
-												virt: sev-guest: Contain snp_guest_request_ioctl in sev-guest

SNP Guest Request uses only exitinfo2 which is a return value from GHCB, has
meaning beyond ioctl and therefore belongs to struct snp_guest_req.

Move exitinfo2 there and remove snp_guest_request_ioctl from the SEV platform
code.

No functional change intended.

Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Dionna Glaze <dionnaglaze@google.com>
Link: https://lore.kernel.org/20250611040842.2667262-2-aik@amd.com

											
										
										
											2025-06-11 14:08:39 +10:00
+									rc = __handle_guest_request(mdesc, req);
-												x86/sev: Relocate SNP guest messaging routines to common code

At present, the SEV guest driver exclusively handles SNP guest messaging. All
routines for sending guest messages are embedded within it.

To support Secure TSC, SEV-SNP guests must communicate with the AMD Security
Processor during early boot. However, these guest messaging functions are not
accessible during early boot since they are currently part of the guest
driver.

Hence, relocate the core SNP guest messaging functions to SEV common code and
provide an API for sending SNP guest messages.

No functional change, but just an export symbol added for
snp_send_guest_request() and dropped the export symbol on
snp_issue_guest_request() and made it static.

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250106124633.1418972-5-nikunj@amd.com

											
										
										
											2025-01-06 18:16:24 +05:30
+									if (rc) {
 										if (rc == -EIO &&
-												virt: sev-guest: Contain snp_guest_request_ioctl in sev-guest

SNP Guest Request uses only exitinfo2 which is a return value from GHCB, has
meaning beyond ioctl and therefore belongs to struct snp_guest_req.

Move exitinfo2 there and remove snp_guest_request_ioctl from the SEV platform
code.

No functional change intended.

Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Dionna Glaze <dionnaglaze@google.com>
Link: https://lore.kernel.org/20250611040842.2667262-2-aik@amd.com

											
										
										
											2025-06-11 14:08:39 +10:00
+										    req->exitinfo2 == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN))
-												x86/sev: Relocate SNP guest messaging routines to common code

At present, the SEV guest driver exclusively handles SNP guest messaging. All
routines for sending guest messages are embedded within it.

To support Secure TSC, SEV-SNP guests must communicate with the AMD Security
Processor during early boot. However, these guest messaging functions are not
accessible during early boot since they are currently part of the guest
driver.

Hence, relocate the core SNP guest messaging functions to SEV common code and
provide an API for sending SNP guest messages.

No functional change, but just an export symbol added for
snp_send_guest_request() and dropped the export symbol on
snp_issue_guest_request() and made it static.

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250106124633.1418972-5-nikunj@amd.com

											
										
										
											2025-01-06 18:16:24 +05:30
+											return rc;
 										pr_alert("Detected error from ASP request. rc: %d, exitinfo2: 0x%llx\n",
-												virt: sev-guest: Contain snp_guest_request_ioctl in sev-guest

SNP Guest Request uses only exitinfo2 which is a return value from GHCB, has
meaning beyond ioctl and therefore belongs to struct snp_guest_req.

Move exitinfo2 there and remove snp_guest_request_ioctl from the SEV platform
code.

No functional change intended.

Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Dionna Glaze <dionnaglaze@google.com>
Link: https://lore.kernel.org/20250611040842.2667262-2-aik@amd.com

											
										
										
											2025-06-11 14:08:39 +10:00
+											 rc, req->exitinfo2);
-												x86/sev: Relocate SNP guest messaging routines to common code

At present, the SEV guest driver exclusively handles SNP guest messaging. All
routines for sending guest messages are embedded within it.

To support Secure TSC, SEV-SNP guests must communicate with the AMD Security
Processor during early boot. However, these guest messaging functions are not
accessible during early boot since they are currently part of the guest
driver.

Hence, relocate the core SNP guest messaging functions to SEV common code and
provide an API for sending SNP guest messages.

No functional change, but just an export symbol added for
snp_send_guest_request() and dropped the export symbol on
snp_issue_guest_request() and made it static.

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20250106124633.1418972-5-nikunj@amd.com

											
										
										
											2025-01-06 18:16:24 +05:30
 										snp_disable_vmpck(mdesc);
 										return rc;
 									}
 									rc = verify_and_dec_payload(mdesc, req);
 									if (rc) {
 										pr_alert("Detected unexpected decode failure from ASP. rc: %d\n", rc);
 										snp_disable_vmpck(mdesc);
 										return rc;
 									}
 									return 0;
 								}
 								EXPORT_SYMBOL_GPL(snp_send_guest_request);
-												x86/sev: Add Secure TSC support for SNP guests

Add support for Secure TSC in SNP-enabled guests. Secure TSC allows guests
to securely use RDTSC/RDTSCP instructions, ensuring that the parameters used
cannot be altered by the hypervisor once the guest is launched.

Secure TSC-enabled guests need to query TSC information from the AMD Security
Processor. This communication channel is encrypted between the AMD Security
Processor and the guest, with the hypervisor acting merely as a conduit to
deliver the guest messages to the AMD Security Processor. Each message is
protected with AEAD (AES-256 GCM).

  [ bp: Zap a stray newline over amd_cc_platform_has() while at it,
    simplify CC_ATTR_GUEST_SNP_SECURE_TSC check ]

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20250106124633.1418972-6-nikunj@amd.com

											
										
										
											2025-01-06 18:16:25 +05:30
 								static int __init snp_get_tsc_info(void)
 								{
 									struct snp_tsc_info_resp *tsc_resp;
 									struct snp_tsc_info_req *tsc_req;
 									struct snp_msg_desc *mdesc;
-												x86/sev: Allocate request in TSC_INFO_REQ on stack

Allocate a 88 byte request structure on stack and skip needless
kzalloc/kfree.

While at this, correct indent.

No functional change intended.

Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Dionna Glaze <dionnaglaze@google.com>
Link: https://lore.kernel.org/20250611040842.2667262-3-aik@amd.com

											
										
										
											2025-06-11 14:08:40 +10:00
+									struct snp_guest_req req = {};
-												x86/sev: Add Secure TSC support for SNP guests

Add support for Secure TSC in SNP-enabled guests. Secure TSC allows guests
to securely use RDTSC/RDTSCP instructions, ensuring that the parameters used
cannot be altered by the hypervisor once the guest is launched.

Secure TSC-enabled guests need to query TSC information from the AMD Security
Processor. This communication channel is encrypted between the AMD Security
Processor and the guest, with the hypervisor acting merely as a conduit to
deliver the guest messages to the AMD Security Processor. Each message is
protected with AEAD (AES-256 GCM).

  [ bp: Zap a stray newline over amd_cc_platform_has() while at it,
    simplify CC_ATTR_GUEST_SNP_SECURE_TSC check ]

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20250106124633.1418972-6-nikunj@amd.com

											
										
										
											2025-01-06 18:16:25 +05:30
+									int rc = -ENOMEM;
 									tsc_req = kzalloc(sizeof(*tsc_req), GFP_KERNEL);
 									if (!tsc_req)
 										return rc;
 									/*
 									 * The intermediate response buffer is used while decrypting the
 									 * response payload. Make sure that it has enough space to cover
 									 * the authtag.
 									 */
 									tsc_resp = kzalloc(sizeof(*tsc_resp) + AUTHTAG_LEN, GFP_KERNEL);
 									if (!tsc_resp)
 										goto e_free_tsc_req;
 									mdesc = snp_msg_alloc();
 									if (IS_ERR_OR_NULL(mdesc))
-												x86/sev: Allocate request in TSC_INFO_REQ on stack

Allocate a 88 byte request structure on stack and skip needless
kzalloc/kfree.

While at this, correct indent.

No functional change intended.

Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Dionna Glaze <dionnaglaze@google.com>
Link: https://lore.kernel.org/20250611040842.2667262-3-aik@amd.com

											
										
										
											2025-06-11 14:08:40 +10:00
+										goto e_free_tsc_resp;
-												x86/sev: Add Secure TSC support for SNP guests

Add support for Secure TSC in SNP-enabled guests. Secure TSC allows guests
to securely use RDTSC/RDTSCP instructions, ensuring that the parameters used
cannot be altered by the hypervisor once the guest is launched.

Secure TSC-enabled guests need to query TSC information from the AMD Security
Processor. This communication channel is encrypted between the AMD Security
Processor and the guest, with the hypervisor acting merely as a conduit to
deliver the guest messages to the AMD Security Processor. Each message is
protected with AEAD (AES-256 GCM).

  [ bp: Zap a stray newline over amd_cc_platform_has() while at it,
    simplify CC_ATTR_GUEST_SNP_SECURE_TSC check ]

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20250106124633.1418972-6-nikunj@amd.com

											
										
										
											2025-01-06 18:16:25 +05:30
 									rc = snp_msg_init(mdesc, snp_vmpl);
 									if (rc)
 										goto e_free_mdesc;
-												x86/sev: Allocate request in TSC_INFO_REQ on stack

Allocate a 88 byte request structure on stack and skip needless
kzalloc/kfree.

While at this, correct indent.

No functional change intended.

Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Dionna Glaze <dionnaglaze@google.com>
Link: https://lore.kernel.org/20250611040842.2667262-3-aik@amd.com

											
										
										
											2025-06-11 14:08:40 +10:00
+									req.msg_version = MSG_HDR_VER;
 									req.msg_type = SNP_MSG_TSC_INFO_REQ;
 									req.vmpck_id = snp_vmpl;
 									req.req_buf = tsc_req;
 									req.req_sz = sizeof(*tsc_req);
 									req.resp_buf = (void *)tsc_resp;
 									req.resp_sz = sizeof(*tsc_resp) + AUTHTAG_LEN;
 									req.exit_code = SVM_VMGEXIT_GUEST_REQUEST;
-												x86/sev: Add Secure TSC support for SNP guests

Add support for Secure TSC in SNP-enabled guests. Secure TSC allows guests
to securely use RDTSC/RDTSCP instructions, ensuring that the parameters used
cannot be altered by the hypervisor once the guest is launched.

Secure TSC-enabled guests need to query TSC information from the AMD Security
Processor. This communication channel is encrypted between the AMD Security
Processor and the guest, with the hypervisor acting merely as a conduit to
deliver the guest messages to the AMD Security Processor. Each message is
protected with AEAD (AES-256 GCM).

  [ bp: Zap a stray newline over amd_cc_platform_has() while at it,
    simplify CC_ATTR_GUEST_SNP_SECURE_TSC check ]

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20250106124633.1418972-6-nikunj@amd.com

											
										
										
											2025-01-06 18:16:25 +05:30
-												x86/sev: Allocate request in TSC_INFO_REQ on stack

Allocate a 88 byte request structure on stack and skip needless
kzalloc/kfree.

While at this, correct indent.

No functional change intended.

Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Dionna Glaze <dionnaglaze@google.com>
Link: https://lore.kernel.org/20250611040842.2667262-3-aik@amd.com

											
										
										
											2025-06-11 14:08:40 +10:00
+									rc = snp_send_guest_request(mdesc, &req);
-												x86/sev: Add Secure TSC support for SNP guests

Add support for Secure TSC in SNP-enabled guests. Secure TSC allows guests
to securely use RDTSC/RDTSCP instructions, ensuring that the parameters used
cannot be altered by the hypervisor once the guest is launched.

Secure TSC-enabled guests need to query TSC information from the AMD Security
Processor. This communication channel is encrypted between the AMD Security
Processor and the guest, with the hypervisor acting merely as a conduit to
deliver the guest messages to the AMD Security Processor. Each message is
protected with AEAD (AES-256 GCM).

  [ bp: Zap a stray newline over amd_cc_platform_has() while at it,
    simplify CC_ATTR_GUEST_SNP_SECURE_TSC check ]

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20250106124633.1418972-6-nikunj@amd.com

											
										
										
											2025-01-06 18:16:25 +05:30
+									if (rc)
 										goto e_request;
 									pr_debug("%s: response status 0x%x scale 0x%llx offset 0x%llx factor 0x%x\n",
 										 __func__, tsc_resp->status, tsc_resp->tsc_scale, tsc_resp->tsc_offset,
 										 tsc_resp->tsc_factor);
 									if (!tsc_resp->status) {
 										snp_tsc_scale = tsc_resp->tsc_scale;
 										snp_tsc_offset = tsc_resp->tsc_offset;
 									} else {
 										pr_err("Failed to get TSC info, response status 0x%x\n", tsc_resp->status);
 										rc = -EIO;
 									}
 								e_request:
 									/* The response buffer contains sensitive data, explicitly clear it. */
 									memzero_explicit(tsc_resp, sizeof(*tsc_resp) + AUTHTAG_LEN);
 								e_free_mdesc:
 									snp_msg_free(mdesc);
-												x86/sev: Allocate request in TSC_INFO_REQ on stack

Allocate a 88 byte request structure on stack and skip needless
kzalloc/kfree.

While at this, correct indent.

No functional change intended.

Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Dionna Glaze <dionnaglaze@google.com>
Link: https://lore.kernel.org/20250611040842.2667262-3-aik@amd.com

											
										
										
											2025-06-11 14:08:40 +10:00
+								e_free_tsc_resp:
-												x86/sev: Add Secure TSC support for SNP guests

Add support for Secure TSC in SNP-enabled guests. Secure TSC allows guests
to securely use RDTSC/RDTSCP instructions, ensuring that the parameters used
cannot be altered by the hypervisor once the guest is launched.

Secure TSC-enabled guests need to query TSC information from the AMD Security
Processor. This communication channel is encrypted between the AMD Security
Processor and the guest, with the hypervisor acting merely as a conduit to
deliver the guest messages to the AMD Security Processor. Each message is
protected with AEAD (AES-256 GCM).

  [ bp: Zap a stray newline over amd_cc_platform_has() while at it,
    simplify CC_ATTR_GUEST_SNP_SECURE_TSC check ]

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20250106124633.1418972-6-nikunj@amd.com

											
										
										
											2025-01-06 18:16:25 +05:30
+									kfree(tsc_resp);
 								e_free_tsc_req:
 									kfree(tsc_req);
 									return rc;
 								}
 								void __init snp_secure_tsc_prepare(void)
 								{
 									if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
 										return;
 									if (snp_get_tsc_info()) {
 										pr_alert("Unable to retrieve Secure TSC info from ASP\n");
 										sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC);
 									}
 									pr_debug("SecureTSC enabled");
 								}
-												x86/tsc: Init the TSC for Secure TSC guests

Use the GUEST_TSC_FREQ MSR to discover the TSC frequency instead of
relying on kvm-clock based frequency calibration.  Override both CPU and
TSC frequency calibration callbacks with securetsc_get_tsc_khz(). Since
the difference between CPU base and TSC frequency does not apply in this
case, the same callback is being used.

  [ bp: Carve out from
    https://lore.kernel.org/r/20250106124633.1418972-11-nikunj@amd.com ]

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20250106124633.1418972-11-nikunj@amd.com

											
										
										
											2025-01-06 18:16:30 +05:30
 								static unsigned long securetsc_get_tsc_khz(void)
 								{
 									return snp_tsc_freq_khz;
 								}
 								void __init snp_secure_tsc_init(void)
 								{
-												x86/sev: Use TSC_FACTOR for Secure TSC frequency calculation

When using Secure TSC, the GUEST_TSC_FREQ MSR reports a frequency based on
the nominal P0 frequency, which deviates slightly (typically ~0.2%) from
the actual mean TSC frequency due to clocking parameters.

Over extended VM uptime, this discrepancy accumulates, causing clock skew
between the hypervisor and a SEV-SNP VM, leading to early timer interrupts as
perceived by the guest.

The guest kernel relies on the reported nominal frequency for TSC-based
timekeeping, while the actual frequency set during SNP_LAUNCH_START may
differ. This mismatch results in inaccurate time calculations, causing the
guest to perceive hrtimers as firing earlier than expected.

Utilize the TSC_FACTOR from the SEV firmware's secrets page (see "Secrets
Page Format" in the SNP Firmware ABI Specification) to calculate the mean
TSC frequency, ensuring accurate timekeeping and mitigating clock skew in
SEV-SNP VMs.

Use early_ioremap_encrypted() to map the secrets page as
ioremap_encrypted() uses kmalloc() which is not available during early TSC
initialization and causes a panic.

  [ bp: Drop the silly dummy var:
    https://lore.kernel.org/r/20250630192726.GBaGLlHl84xIopx4Pt@fat_crate.local ]

Fixes: 73bbf3b0fbba ("x86/tsc: Init the TSC for Secure TSC guests")
Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/20250630081858.485187-1-nikunj@amd.com

											
										
										
											2025-06-30 13:48:58 +05:30
+									struct snp_secrets_page *secrets;
 									unsigned long tsc_freq_mhz;
 									void *mem;
-												x86/tsc: Init the TSC for Secure TSC guests

Use the GUEST_TSC_FREQ MSR to discover the TSC frequency instead of
relying on kvm-clock based frequency calibration.  Override both CPU and
TSC frequency calibration callbacks with securetsc_get_tsc_khz(). Since
the difference between CPU base and TSC frequency does not apply in this
case, the same callback is being used.

  [ bp: Carve out from
    https://lore.kernel.org/r/20250106124633.1418972-11-nikunj@amd.com ]

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20250106124633.1418972-11-nikunj@amd.com

											
										
										
											2025-01-06 18:16:30 +05:30
 									if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC))
 										return;
-												x86/sev: Use TSC_FACTOR for Secure TSC frequency calculation

When using Secure TSC, the GUEST_TSC_FREQ MSR reports a frequency based on
the nominal P0 frequency, which deviates slightly (typically ~0.2%) from
the actual mean TSC frequency due to clocking parameters.

Over extended VM uptime, this discrepancy accumulates, causing clock skew
between the hypervisor and a SEV-SNP VM, leading to early timer interrupts as
perceived by the guest.

The guest kernel relies on the reported nominal frequency for TSC-based
timekeeping, while the actual frequency set during SNP_LAUNCH_START may
differ. This mismatch results in inaccurate time calculations, causing the
guest to perceive hrtimers as firing earlier than expected.

Utilize the TSC_FACTOR from the SEV firmware's secrets page (see "Secrets
Page Format" in the SNP Firmware ABI Specification) to calculate the mean
TSC frequency, ensuring accurate timekeeping and mitigating clock skew in
SEV-SNP VMs.

Use early_ioremap_encrypted() to map the secrets page as
ioremap_encrypted() uses kmalloc() which is not available during early TSC
initialization and causes a panic.

  [ bp: Drop the silly dummy var:
    https://lore.kernel.org/r/20250630192726.GBaGLlHl84xIopx4Pt@fat_crate.local ]

Fixes: 73bbf3b0fbba ("x86/tsc: Init the TSC for Secure TSC guests")
Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/20250630081858.485187-1-nikunj@amd.com

											
										
										
											2025-06-30 13:48:58 +05:30
+									mem = early_memremap_encrypted(sev_secrets_pa, PAGE_SIZE);
 									if (!mem) {
 										pr_err("Unable to get TSC_FACTOR: failed to map the SNP secrets page.\n");
 										sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC);
 									}
 									secrets = (__force struct snp_secrets_page *)mem;
-												x86/tsc: Init the TSC for Secure TSC guests

Use the GUEST_TSC_FREQ MSR to discover the TSC frequency instead of
relying on kvm-clock based frequency calibration.  Override both CPU and
TSC frequency calibration callbacks with securetsc_get_tsc_khz(). Since
the difference between CPU base and TSC frequency does not apply in this
case, the same callback is being used.

  [ bp: Carve out from
    https://lore.kernel.org/r/20250106124633.1418972-11-nikunj@amd.com ]

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20250106124633.1418972-11-nikunj@amd.com

											
										
										
											2025-01-06 18:16:30 +05:30
+									setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
-												x86/msr: Rename 'rdmsrl()' to 'rdmsrq()'

Suggested-by: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Juergen Gross <jgross@suse.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Xin Li <xin@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>

											
										
										
											2025-04-09 22:28:54 +02:00
+									rdmsrq(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz);
-												x86/sev: Use TSC_FACTOR for Secure TSC frequency calculation

When using Secure TSC, the GUEST_TSC_FREQ MSR reports a frequency based on
the nominal P0 frequency, which deviates slightly (typically ~0.2%) from
the actual mean TSC frequency due to clocking parameters.

Over extended VM uptime, this discrepancy accumulates, causing clock skew
between the hypervisor and a SEV-SNP VM, leading to early timer interrupts as
perceived by the guest.

The guest kernel relies on the reported nominal frequency for TSC-based
timekeeping, while the actual frequency set during SNP_LAUNCH_START may
differ. This mismatch results in inaccurate time calculations, causing the
guest to perceive hrtimers as firing earlier than expected.

Utilize the TSC_FACTOR from the SEV firmware's secrets page (see "Secrets
Page Format" in the SNP Firmware ABI Specification) to calculate the mean
TSC frequency, ensuring accurate timekeeping and mitigating clock skew in
SEV-SNP VMs.

Use early_ioremap_encrypted() to map the secrets page as
ioremap_encrypted() uses kmalloc() which is not available during early TSC
initialization and causes a panic.

  [ bp: Drop the silly dummy var:
    https://lore.kernel.org/r/20250630192726.GBaGLlHl84xIopx4Pt@fat_crate.local ]

Fixes: 73bbf3b0fbba ("x86/tsc: Init the TSC for Secure TSC guests")
Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/20250630081858.485187-1-nikunj@amd.com

											
										
										
											2025-06-30 13:48:58 +05:30
 									/* Extract the GUEST TSC MHZ from BIT[17:0], rest is reserved space */
 									tsc_freq_mhz &= GENMASK_ULL(17, 0);
 									snp_tsc_freq_khz = SNP_SCALE_TSC_FREQ(tsc_freq_mhz * 1000, secrets->tsc_factor);
-												x86/tsc: Init the TSC for Secure TSC guests

Use the GUEST_TSC_FREQ MSR to discover the TSC frequency instead of
relying on kvm-clock based frequency calibration.  Override both CPU and
TSC frequency calibration callbacks with securetsc_get_tsc_khz(). Since
the difference between CPU base and TSC frequency does not apply in this
case, the same callback is being used.

  [ bp: Carve out from
    https://lore.kernel.org/r/20250106124633.1418972-11-nikunj@amd.com ]

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20250106124633.1418972-11-nikunj@amd.com

											
										
										
											2025-01-06 18:16:30 +05:30
 									x86_platform.calibrate_cpu = securetsc_get_tsc_khz;
 									x86_platform.calibrate_tsc = securetsc_get_tsc_khz;
-												x86/sev: Use TSC_FACTOR for Secure TSC frequency calculation

When using Secure TSC, the GUEST_TSC_FREQ MSR reports a frequency based on
the nominal P0 frequency, which deviates slightly (typically ~0.2%) from
the actual mean TSC frequency due to clocking parameters.

Over extended VM uptime, this discrepancy accumulates, causing clock skew
between the hypervisor and a SEV-SNP VM, leading to early timer interrupts as
perceived by the guest.

The guest kernel relies on the reported nominal frequency for TSC-based
timekeeping, while the actual frequency set during SNP_LAUNCH_START may
differ. This mismatch results in inaccurate time calculations, causing the
guest to perceive hrtimers as firing earlier than expected.

Utilize the TSC_FACTOR from the SEV firmware's secrets page (see "Secrets
Page Format" in the SNP Firmware ABI Specification) to calculate the mean
TSC frequency, ensuring accurate timekeeping and mitigating clock skew in
SEV-SNP VMs.

Use early_ioremap_encrypted() to map the secrets page as
ioremap_encrypted() uses kmalloc() which is not available during early TSC
initialization and causes a panic.

  [ bp: Drop the silly dummy var:
    https://lore.kernel.org/r/20250630192726.GBaGLlHl84xIopx4Pt@fat_crate.local ]

Fixes: 73bbf3b0fbba ("x86/tsc: Init the TSC for Secure TSC guests")
Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/20250630081858.485187-1-nikunj@amd.com

											
										
										
											2025-06-30 13:48:58 +05:30
 									early_memunmap(mem, PAGE_SIZE);
-												x86/tsc: Init the TSC for Secure TSC guests

Use the GUEST_TSC_FREQ MSR to discover the TSC frequency instead of
relying on kvm-clock based frequency calibration.  Override both CPU and
TSC frequency calibration callbacks with securetsc_get_tsc_khz(). Since
the difference between CPU base and TSC frequency does not apply in this
case, the same callback is being used.

  [ bp: Carve out from
    https://lore.kernel.org/r/20250106124633.1418972-11-nikunj@amd.com ]

Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20250106124633.1418972-11-nikunj@amd.com

											
										
										
											2025-01-06 18:16:30 +05:30
+								}