mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-10-31 08:44:41 +00:00 
			
		
		
		
	* fix latent bug in how usage of large pages is determined for
confidential VMs * fix "underline too short" in docs * eliminate log spam from limited APIC timer periods * disallow pre-faulting of memory before SEV-SNP VMs are initialized * delay clearing and encrypting private memory until it is added to guest page tables * this change also enables another small cleanup: the checks in SNP_LAUNCH_UPDATE that limit it to non-populated, private pages can now be moved in the common kvm_gmem_populate() function -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmar0uEUHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroMf9Af9EZ0k0HHltM+iUSqKW+hcfnyjRSlh MI2m8ZFF4Ra4a/H2CYWbUZSZd6U2TGQoy0cz8vN12uiaaRFSXHAzkoy1zhJGYujq ljCUx46Ovo6DDfA1ve9jPdHQNOKWy6Js8yheP+i58Pau1u9fWTewfvWnrwkMgnfD lkrSfnWhw7aBy7jTSd8KflRU/IugP2/ApsIhrjZZ9sFGncAwPBbb8NL/u5tI/l6f VDp1in5a5gk2PhVRVzvINUxNzhcyuQ0wC07N+B4H+3U0NLg4CwiTBJr/yz0OOWz6 ThA20/fLTrs5jc2f5APk1EjGT8pqeMJYydI2FdqafSfY0PcTZJtXvzgdSw== =CwzF -----END PGP SIGNATURE----- Merge branch 'kvm-fixes' into HEAD * fix latent bug in how usage of large pages is determined for confidential VMs * fix "underline too short" in docs * eliminate log spam from limited APIC timer periods * disallow pre-faulting of memory before SEV-SNP VMs are initialized * delay clearing and encrypting private memory until it is added to guest page tables * this change also enables another small cleanup: the checks in SNP_LAUNCH_UPDATE that limit it to non-populated, private pages can now be moved in the common kvm_gmem_populate() function
This commit is contained in:
		
						commit
						1773014a97
					
				
					 12 changed files with 210 additions and 155 deletions
				
			
		|  | @ -6368,7 +6368,7 @@ a single guest_memfd file, but the bound ranges must not overlap). | |||
| See KVM_SET_USER_MEMORY_REGION2 for additional details. | ||||
| 
 | ||||
| 4.143 KVM_PRE_FAULT_MEMORY | ||||
| ------------------------ | ||||
| --------------------------- | ||||
| 
 | ||||
| :Capability: KVM_CAP_PRE_FAULT_MEMORY | ||||
| :Architectures: none | ||||
|  | @ -6405,6 +6405,12 @@ for the current vCPU state.  KVM maps memory as if the vCPU generated a | |||
| stage-2 read page fault, e.g. faults in memory as needed, but doesn't break | ||||
| CoW.  However, KVM does not mark any newly created stage-2 PTE as Accessed. | ||||
| 
 | ||||
| In the case of confidential VM types where there is an initial set up of | ||||
| private guest memory before the guest is 'finalized'/measured, this ioctl | ||||
| should only be issued after completing all the necessary setup to put the | ||||
| guest into a 'finalized' state so that the above semantics can be reliably | ||||
| ensured. | ||||
| 
 | ||||
| In some cases, multiple vCPUs might share the page tables.  In this | ||||
| case, the ioctl can be called in parallel. | ||||
| 
 | ||||
|  |  | |||
|  | @ -1305,6 +1305,7 @@ struct kvm_arch { | |||
| 	u8 vm_type; | ||||
| 	bool has_private_mem; | ||||
| 	bool has_protected_state; | ||||
| 	bool pre_fault_allowed; | ||||
| 	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; | ||||
| 	struct list_head active_mmu_pages; | ||||
| 	struct list_head zapped_obsolete_pages; | ||||
|  |  | |||
|  | @ -141,8 +141,8 @@ config KVM_AMD_SEV | |||
| 	depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m) | ||||
| 	select ARCH_HAS_CC_PLATFORM | ||||
| 	select KVM_GENERIC_PRIVATE_MEM | ||||
| 	select HAVE_KVM_GMEM_PREPARE | ||||
| 	select HAVE_KVM_GMEM_INVALIDATE | ||||
| 	select HAVE_KVM_ARCH_GMEM_PREPARE | ||||
| 	select HAVE_KVM_ARCH_GMEM_INVALIDATE | ||||
| 	help | ||||
| 	  Provides support for launching Encrypted VMs (SEV) and Encrypted VMs | ||||
| 	  with Encrypted State (SEV-ES) on AMD processors. | ||||
|  |  | |||
|  | @ -1743,7 +1743,7 @@ static void limit_periodic_timer_frequency(struct kvm_lapic *apic) | |||
| 		s64 min_period = min_timer_period_us * 1000LL; | ||||
| 
 | ||||
| 		if (apic->lapic_timer.period < min_period) { | ||||
| 			pr_info_ratelimited( | ||||
| 			pr_info_once( | ||||
| 			    "vcpu %i: requested %lld ns " | ||||
| 			    "lapic timer period limited to %lld ns\n", | ||||
| 			    apic->vcpu->vcpu_id, | ||||
|  |  | |||
|  | @ -4335,7 +4335,7 @@ static u8 kvm_max_private_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, | |||
| 	if (req_max_level) | ||||
| 		max_level = min(max_level, req_max_level); | ||||
| 
 | ||||
| 	return req_max_level; | ||||
| 	return max_level; | ||||
| } | ||||
| 
 | ||||
| static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu, | ||||
|  | @ -4743,6 +4743,9 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, | |||
| 	u64 end; | ||||
| 	int r; | ||||
| 
 | ||||
| 	if (!vcpu->kvm->arch.pre_fault_allowed) | ||||
| 		return -EOPNOTSUPP; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * reload is efficient when called repeatedly, so we can do it on | ||||
| 	 * every iteration. | ||||
|  | @ -7510,7 +7513,7 @@ static bool hugepage_has_attrs(struct kvm *kvm, struct kvm_memory_slot *slot, | |||
| 	const unsigned long end = start + KVM_PAGES_PER_HPAGE(level); | ||||
| 
 | ||||
| 	if (level == PG_LEVEL_2M) | ||||
| 		return kvm_range_has_memory_attributes(kvm, start, end, attrs); | ||||
| 		return kvm_range_has_memory_attributes(kvm, start, end, ~0, attrs); | ||||
| 
 | ||||
| 	for (gfn = start; gfn < end; gfn += KVM_PAGES_PER_HPAGE(level - 1)) { | ||||
| 		if (hugepage_test_mixed(slot, gfn, level - 1) || | ||||
|  |  | |||
|  | @ -2279,18 +2279,11 @@ static int sev_gmem_post_populate(struct kvm *kvm, gfn_t gfn_start, kvm_pfn_t pf | |||
| 		bool assigned; | ||||
| 		int level; | ||||
| 
 | ||||
| 		if (!kvm_mem_is_private(kvm, gfn)) { | ||||
| 			pr_debug("%s: Failed to ensure GFN 0x%llx has private memory attribute set\n", | ||||
| 				 __func__, gfn); | ||||
| 			ret = -EINVAL; | ||||
| 			goto err; | ||||
| 		} | ||||
| 
 | ||||
| 		ret = snp_lookup_rmpentry((u64)pfn + i, &assigned, &level); | ||||
| 		if (ret || assigned) { | ||||
| 			pr_debug("%s: Failed to ensure GFN 0x%llx RMP entry is initial shared state, ret: %d assigned: %d\n", | ||||
| 				 __func__, gfn, ret, assigned); | ||||
| 			ret = -EINVAL; | ||||
| 			ret = ret ? -EINVAL : -EEXIST; | ||||
| 			goto err; | ||||
| 		} | ||||
| 
 | ||||
|  | @ -2549,6 +2542,14 @@ static int snp_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) | |||
| 	data->gctx_paddr = __psp_pa(sev->snp_context); | ||||
| 	ret = sev_issue_cmd(kvm, SEV_CMD_SNP_LAUNCH_FINISH, data, &argp->error); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Now that there will be no more SNP_LAUNCH_UPDATE ioctls, private pages | ||||
| 	 * can be given to the guest simply by marking the RMP entry as private. | ||||
| 	 * This can happen on first access and also with KVM_PRE_FAULT_MEMORY. | ||||
| 	 */ | ||||
| 	if (!ret) | ||||
| 		kvm->arch.pre_fault_allowed = true; | ||||
| 
 | ||||
| 	kfree(id_auth); | ||||
| 
 | ||||
| e_free_id_block: | ||||
|  |  | |||
|  | @ -4949,6 +4949,7 @@ static int svm_vm_init(struct kvm *kvm) | |||
| 		to_kvm_sev_info(kvm)->need_init = true; | ||||
| 
 | ||||
| 		kvm->arch.has_private_mem = (type == KVM_X86_SNP_VM); | ||||
| 		kvm->arch.pre_fault_allowed = !kvm->arch.has_private_mem; | ||||
| 	} | ||||
| 
 | ||||
| 	if (!pause_filter_count || !pause_filter_thresh) | ||||
|  |  | |||
|  | @ -12646,6 +12646,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
| 	kvm->arch.vm_type = type; | ||||
| 	kvm->arch.has_private_mem = | ||||
| 		(type == KVM_X86_SW_PROTECTED_VM); | ||||
| 	/* Decided by the vendor code for other VM types.  */ | ||||
| 	kvm->arch.pre_fault_allowed = | ||||
| 		type == KVM_X86_DEFAULT_VM || type == KVM_X86_SW_PROTECTED_VM; | ||||
| 
 | ||||
| 	ret = kvm_page_track_init(kvm); | ||||
| 	if (ret) | ||||
|  | @ -13641,19 +13644,14 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) | |||
| } | ||||
| EXPORT_SYMBOL_GPL(kvm_arch_no_poll); | ||||
| 
 | ||||
| #ifdef CONFIG_HAVE_KVM_GMEM_PREPARE | ||||
| bool kvm_arch_gmem_prepare_needed(struct kvm *kvm) | ||||
| { | ||||
| 	return kvm->arch.vm_type == KVM_X86_SNP_VM; | ||||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE | ||||
| int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order) | ||||
| { | ||||
| 	return kvm_x86_call(gmem_prepare)(kvm, pfn, gfn, max_order); | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| #ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE | ||||
| #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE | ||||
| void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end) | ||||
| { | ||||
| 	kvm_x86_call(gmem_invalidate)(start, end); | ||||
|  |  | |||
|  | @ -2414,7 +2414,7 @@ static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn | |||
| } | ||||
| 
 | ||||
| bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end, | ||||
| 				     unsigned long attrs); | ||||
| 				     unsigned long mask, unsigned long attrs); | ||||
| bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, | ||||
| 					struct kvm_gfn_range *range); | ||||
| bool kvm_arch_post_set_memory_attributes(struct kvm *kvm, | ||||
|  | @ -2445,11 +2445,11 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, | |||
| } | ||||
| #endif /* CONFIG_KVM_PRIVATE_MEM */ | ||||
| 
 | ||||
| #ifdef CONFIG_HAVE_KVM_GMEM_PREPARE | ||||
| #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE | ||||
| int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order); | ||||
| bool kvm_arch_gmem_prepare_needed(struct kvm *kvm); | ||||
| #endif | ||||
| 
 | ||||
| #ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM | ||||
| /**
 | ||||
|  * kvm_gmem_populate() - Populate/prepare a GPA range with guest data | ||||
|  * | ||||
|  | @ -2476,8 +2476,9 @@ typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, | |||
| 
 | ||||
| long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages, | ||||
| 		       kvm_gmem_populate_cb post_populate, void *opaque); | ||||
| #endif | ||||
| 
 | ||||
| #ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE | ||||
| #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE | ||||
| void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end); | ||||
| #endif | ||||
| 
 | ||||
|  |  | |||
|  | @ -113,10 +113,10 @@ config KVM_GENERIC_PRIVATE_MEM | |||
|        select KVM_PRIVATE_MEM | ||||
|        bool | ||||
| 
 | ||||
| config HAVE_KVM_GMEM_PREPARE | ||||
| config HAVE_KVM_ARCH_GMEM_PREPARE | ||||
|        bool | ||||
|        depends on KVM_PRIVATE_MEM | ||||
| 
 | ||||
| config HAVE_KVM_GMEM_INVALIDATE | ||||
| config HAVE_KVM_ARCH_GMEM_INVALIDATE | ||||
|        bool | ||||
|        depends on KVM_PRIVATE_MEM | ||||
|  |  | |||
|  | @ -13,84 +13,93 @@ struct kvm_gmem { | |||
| 	struct list_head entry; | ||||
| }; | ||||
| 
 | ||||
| static int kvm_gmem_prepare_folio(struct inode *inode, pgoff_t index, struct folio *folio) | ||||
| /**
 | ||||
|  * folio_file_pfn - like folio_file_page, but return a pfn. | ||||
|  * @folio: The folio which contains this index. | ||||
|  * @index: The index we want to look up. | ||||
|  * | ||||
|  * Return: The pfn for this index. | ||||
|  */ | ||||
| static inline kvm_pfn_t folio_file_pfn(struct folio *folio, pgoff_t index) | ||||
| { | ||||
| #ifdef CONFIG_HAVE_KVM_GMEM_PREPARE | ||||
| 	struct list_head *gmem_list = &inode->i_mapping->i_private_list; | ||||
| 	struct kvm_gmem *gmem; | ||||
| 	return folio_pfn(folio) + (index & (folio_nr_pages(folio) - 1)); | ||||
| } | ||||
| 
 | ||||
| 	list_for_each_entry(gmem, gmem_list, entry) { | ||||
| 		struct kvm_memory_slot *slot; | ||||
| 		struct kvm *kvm = gmem->kvm; | ||||
| 		struct page *page; | ||||
| 		kvm_pfn_t pfn; | ||||
| 		gfn_t gfn; | ||||
| 		int rc; | ||||
| 
 | ||||
| 		if (!kvm_arch_gmem_prepare_needed(kvm)) | ||||
| 			continue; | ||||
| 
 | ||||
| 		slot = xa_load(&gmem->bindings, index); | ||||
| 		if (!slot) | ||||
| 			continue; | ||||
| 
 | ||||
| 		page = folio_file_page(folio, index); | ||||
| 		pfn = page_to_pfn(page); | ||||
| 		gfn = slot->base_gfn + index - slot->gmem.pgoff; | ||||
| 		rc = kvm_arch_gmem_prepare(kvm, gfn, pfn, compound_order(compound_head(page))); | ||||
| 		if (rc) { | ||||
| 			pr_warn_ratelimited("gmem: Failed to prepare folio for index %lx GFN %llx PFN %llx error %d.\n", | ||||
| 					    index, gfn, pfn, rc); | ||||
| 			return rc; | ||||
| 		} | ||||
| static int __kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot, | ||||
| 				    pgoff_t index, struct folio *folio) | ||||
| { | ||||
| #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE | ||||
| 	kvm_pfn_t pfn = folio_file_pfn(folio, index); | ||||
| 	gfn_t gfn = slot->base_gfn + index - slot->gmem.pgoff; | ||||
| 	int rc = kvm_arch_gmem_prepare(kvm, gfn, pfn, folio_order(folio)); | ||||
| 	if (rc) { | ||||
| 		pr_warn_ratelimited("gmem: Failed to prepare folio for index %lx GFN %llx PFN %llx error %d.\n", | ||||
| 				    index, gfn, pfn, rc); | ||||
| 		return rc; | ||||
| 	} | ||||
| 
 | ||||
| #endif | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, bool prepare) | ||||
| static inline void kvm_gmem_mark_prepared(struct folio *folio) | ||||
| { | ||||
| 	struct folio *folio; | ||||
| 	folio_mark_uptodate(folio); | ||||
| } | ||||
| 
 | ||||
| 	/* TODO: Support huge pages. */ | ||||
| 	folio = filemap_grab_folio(inode->i_mapping, index); | ||||
| 	if (IS_ERR(folio)) | ||||
| 		return folio; | ||||
| /*
 | ||||
|  * Process @folio, which contains @gfn, so that the guest can use it. | ||||
|  * The folio must be locked and the gfn must be contained in @slot. | ||||
|  * On successful return the guest sees a zero page so as to avoid | ||||
|  * leaking host data and the up-to-date flag is set. | ||||
|  */ | ||||
| static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot, | ||||
| 				  gfn_t gfn, struct folio *folio) | ||||
| { | ||||
| 	unsigned long nr_pages, i; | ||||
| 	pgoff_t index; | ||||
| 	int r; | ||||
| 
 | ||||
| 	nr_pages = folio_nr_pages(folio); | ||||
| 	for (i = 0; i < nr_pages; i++) | ||||
| 		clear_highpage(folio_page(folio, i)); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Use the up-to-date flag to track whether or not the memory has been | ||||
| 	 * zeroed before being handed off to the guest.  There is no backing | ||||
| 	 * storage for the memory, so the folio will remain up-to-date until | ||||
| 	 * it's removed. | ||||
| 	 * Preparing huge folios should always be safe, since it should | ||||
| 	 * be possible to split them later if needed. | ||||
| 	 * | ||||
| 	 * TODO: Skip clearing pages when trusted firmware will do it when | ||||
| 	 * assigning memory to the guest. | ||||
| 	 * Right now the folio order is always going to be zero, but the | ||||
| 	 * code is ready for huge folios.  The only assumption is that | ||||
| 	 * the base pgoff of memslots is naturally aligned with the | ||||
| 	 * requested page order, ensuring that huge folios can also use | ||||
| 	 * huge page table entries for GPA->HPA mapping. | ||||
| 	 * | ||||
| 	 * The order will be passed when creating the guest_memfd, and | ||||
| 	 * checked when creating memslots. | ||||
| 	 */ | ||||
| 	if (!folio_test_uptodate(folio)) { | ||||
| 		unsigned long nr_pages = folio_nr_pages(folio); | ||||
| 		unsigned long i; | ||||
| 	WARN_ON(!IS_ALIGNED(slot->gmem.pgoff, 1 << folio_order(folio))); | ||||
| 	index = gfn - slot->base_gfn + slot->gmem.pgoff; | ||||
| 	index = ALIGN_DOWN(index, 1 << folio_order(folio)); | ||||
| 	r = __kvm_gmem_prepare_folio(kvm, slot, index, folio); | ||||
| 	if (!r) | ||||
| 		kvm_gmem_mark_prepared(folio); | ||||
| 
 | ||||
| 		for (i = 0; i < nr_pages; i++) | ||||
| 			clear_highpage(folio_page(folio, i)); | ||||
| 	return r; | ||||
| } | ||||
| 
 | ||||
| 		folio_mark_uptodate(folio); | ||||
| 	} | ||||
| 
 | ||||
| 	if (prepare) { | ||||
| 		int r =	kvm_gmem_prepare_folio(inode, index, folio); | ||||
| 		if (r < 0) { | ||||
| 			folio_unlock(folio); | ||||
| 			folio_put(folio); | ||||
| 			return ERR_PTR(r); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Ignore accessed, referenced, and dirty flags.  The memory is | ||||
| 	 * unevictable and there is no storage to write back to. | ||||
| 	 */ | ||||
| 	return folio; | ||||
| /*
 | ||||
|  * Returns a locked folio on success.  The caller is responsible for | ||||
|  * setting the up-to-date flag before the memory is mapped into the guest. | ||||
|  * There is no backing storage for the memory, so the folio will remain | ||||
|  * up-to-date until it's removed. | ||||
|  * | ||||
|  * Ignore accessed, referenced, and dirty flags.  The memory is | ||||
|  * unevictable and there is no storage to write back to. | ||||
|  */ | ||||
| static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index) | ||||
| { | ||||
| 	/* TODO: Support huge pages. */ | ||||
| 	return filemap_grab_folio(inode->i_mapping, index); | ||||
| } | ||||
| 
 | ||||
| static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start, | ||||
|  | @ -190,7 +199,7 @@ static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len) | |||
| 			break; | ||||
| 		} | ||||
| 
 | ||||
| 		folio = kvm_gmem_get_folio(inode, index, true); | ||||
| 		folio = kvm_gmem_get_folio(inode, index); | ||||
| 		if (IS_ERR(folio)) { | ||||
| 			r = PTR_ERR(folio); | ||||
| 			break; | ||||
|  | @ -343,7 +352,7 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol | |||
| 	return MF_DELAYED; | ||||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE | ||||
| #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE | ||||
| static void kvm_gmem_free_folio(struct folio *folio) | ||||
| { | ||||
| 	struct page *page = folio_page(folio, 0); | ||||
|  | @ -358,7 +367,7 @@ static const struct address_space_operations kvm_gmem_aops = { | |||
| 	.dirty_folio = noop_dirty_folio, | ||||
| 	.migrate_folio	= kvm_gmem_migrate_folio, | ||||
| 	.error_remove_folio = kvm_gmem_error_folio, | ||||
| #ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE | ||||
| #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE | ||||
| 	.free_folio = kvm_gmem_free_folio, | ||||
| #endif | ||||
| }; | ||||
|  | @ -541,64 +550,76 @@ void kvm_gmem_unbind(struct kvm_memory_slot *slot) | |||
| 	fput(file); | ||||
| } | ||||
| 
 | ||||
| static int __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot, | ||||
| 		       gfn_t gfn, kvm_pfn_t *pfn, int *max_order, bool prepare) | ||||
| /* Returns a locked folio on success.  */ | ||||
| static struct folio * | ||||
| __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot, | ||||
| 		   gfn_t gfn, kvm_pfn_t *pfn, bool *is_prepared, | ||||
| 		   int *max_order) | ||||
| { | ||||
| 	pgoff_t index = gfn - slot->base_gfn + slot->gmem.pgoff; | ||||
| 	struct kvm_gmem *gmem = file->private_data; | ||||
| 	struct folio *folio; | ||||
| 	struct page *page; | ||||
| 	int r; | ||||
| 
 | ||||
| 	if (file != slot->gmem.file) { | ||||
| 		WARN_ON_ONCE(slot->gmem.file); | ||||
| 		return -EFAULT; | ||||
| 		return ERR_PTR(-EFAULT); | ||||
| 	} | ||||
| 
 | ||||
| 	gmem = file->private_data; | ||||
| 	if (xa_load(&gmem->bindings, index) != slot) { | ||||
| 		WARN_ON_ONCE(xa_load(&gmem->bindings, index)); | ||||
| 		return -EIO; | ||||
| 		return ERR_PTR(-EIO); | ||||
| 	} | ||||
| 
 | ||||
| 	folio = kvm_gmem_get_folio(file_inode(file), index, prepare); | ||||
| 	folio = kvm_gmem_get_folio(file_inode(file), index); | ||||
| 	if (IS_ERR(folio)) | ||||
| 		return PTR_ERR(folio); | ||||
| 		return folio; | ||||
| 
 | ||||
| 	if (folio_test_hwpoison(folio)) { | ||||
| 		folio_unlock(folio); | ||||
| 		folio_put(folio); | ||||
| 		return -EHWPOISON; | ||||
| 		return ERR_PTR(-EHWPOISON); | ||||
| 	} | ||||
| 
 | ||||
| 	page = folio_file_page(folio, index); | ||||
| 
 | ||||
| 	*pfn = page_to_pfn(page); | ||||
| 	*pfn = folio_file_pfn(folio, index); | ||||
| 	if (max_order) | ||||
| 		*max_order = 0; | ||||
| 
 | ||||
| 	r = 0; | ||||
| 
 | ||||
| 	folio_unlock(folio); | ||||
| 
 | ||||
| 	return r; | ||||
| 	*is_prepared = folio_test_uptodate(folio); | ||||
| 	return folio; | ||||
| } | ||||
| 
 | ||||
| int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, | ||||
| 		     gfn_t gfn, kvm_pfn_t *pfn, int *max_order) | ||||
| { | ||||
| 	struct file *file = kvm_gmem_get_file(slot); | ||||
| 	int r; | ||||
| 	struct folio *folio; | ||||
| 	bool is_prepared = false; | ||||
| 	int r = 0; | ||||
| 
 | ||||
| 	if (!file) | ||||
| 		return -EFAULT; | ||||
| 
 | ||||
| 	r = __kvm_gmem_get_pfn(file, slot, gfn, pfn, max_order, true); | ||||
| 	folio = __kvm_gmem_get_pfn(file, slot, gfn, pfn, &is_prepared, max_order); | ||||
| 	if (IS_ERR(folio)) { | ||||
| 		r = PTR_ERR(folio); | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	if (!is_prepared) | ||||
| 		r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio); | ||||
| 
 | ||||
| 	folio_unlock(folio); | ||||
| 	if (r < 0) | ||||
| 		folio_put(folio); | ||||
| 
 | ||||
| out: | ||||
| 	fput(file); | ||||
| 	return r; | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn); | ||||
| 
 | ||||
| #ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM | ||||
| long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages, | ||||
| 		       kvm_gmem_populate_cb post_populate, void *opaque) | ||||
| { | ||||
|  | @ -625,7 +646,9 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long | |||
| 
 | ||||
| 	npages = min_t(ulong, slot->npages - (start_gfn - slot->base_gfn), npages); | ||||
| 	for (i = 0; i < npages; i += (1 << max_order)) { | ||||
| 		struct folio *folio; | ||||
| 		gfn_t gfn = start_gfn + i; | ||||
| 		bool is_prepared = false; | ||||
| 		kvm_pfn_t pfn; | ||||
| 
 | ||||
| 		if (signal_pending(current)) { | ||||
|  | @ -633,18 +656,39 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long | |||
| 			break; | ||||
| 		} | ||||
| 
 | ||||
| 		ret = __kvm_gmem_get_pfn(file, slot, gfn, &pfn, &max_order, false); | ||||
| 		if (ret) | ||||
| 		folio = __kvm_gmem_get_pfn(file, slot, gfn, &pfn, &is_prepared, &max_order); | ||||
| 		if (IS_ERR(folio)) { | ||||
| 			ret = PTR_ERR(folio); | ||||
| 			break; | ||||
| 		} | ||||
| 
 | ||||
| 		if (!IS_ALIGNED(gfn, (1 << max_order)) || | ||||
| 		    (npages - i) < (1 << max_order)) | ||||
| 			max_order = 0; | ||||
| 		if (is_prepared) { | ||||
| 			folio_unlock(folio); | ||||
| 			folio_put(folio); | ||||
| 			ret = -EEXIST; | ||||
| 			break; | ||||
| 		} | ||||
| 
 | ||||
| 		folio_unlock(folio); | ||||
| 		WARN_ON(!IS_ALIGNED(gfn, 1 << max_order) || | ||||
| 			(npages - i) < (1 << max_order)); | ||||
| 
 | ||||
| 		ret = -EINVAL; | ||||
| 		while (!kvm_range_has_memory_attributes(kvm, gfn, gfn + (1 << max_order), | ||||
| 							KVM_MEMORY_ATTRIBUTE_PRIVATE, | ||||
| 							KVM_MEMORY_ATTRIBUTE_PRIVATE)) { | ||||
| 			if (!max_order) | ||||
| 				goto put_folio_and_exit; | ||||
| 			max_order--; | ||||
| 		} | ||||
| 
 | ||||
| 		p = src ? src + i * PAGE_SIZE : NULL; | ||||
| 		ret = post_populate(kvm, gfn, pfn, p, max_order, opaque); | ||||
| 		if (!ret) | ||||
| 			kvm_gmem_mark_prepared(folio); | ||||
| 
 | ||||
| 		put_page(pfn_to_page(pfn)); | ||||
| put_folio_and_exit: | ||||
| 		folio_put(folio); | ||||
| 		if (ret) | ||||
| 			break; | ||||
| 	} | ||||
|  | @ -655,3 +699,4 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long | |||
| 	return ret && !i ? ret : i; | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(kvm_gmem_populate); | ||||
| #endif | ||||
|  |  | |||
|  | @ -2398,42 +2398,6 @@ static int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, | |||
| #endif /* CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */ | ||||
| 
 | ||||
| #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES | ||||
| /*
 | ||||
|  * Returns true if _all_ gfns in the range [@start, @end) have attributes | ||||
|  * matching @attrs. | ||||
|  */ | ||||
| bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end, | ||||
| 				     unsigned long attrs) | ||||
| { | ||||
| 	XA_STATE(xas, &kvm->mem_attr_array, start); | ||||
| 	unsigned long index; | ||||
| 	bool has_attrs; | ||||
| 	void *entry; | ||||
| 
 | ||||
| 	rcu_read_lock(); | ||||
| 
 | ||||
| 	if (!attrs) { | ||||
| 		has_attrs = !xas_find(&xas, end - 1); | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	has_attrs = true; | ||||
| 	for (index = start; index < end; index++) { | ||||
| 		do { | ||||
| 			entry = xas_next(&xas); | ||||
| 		} while (xas_retry(&xas, entry)); | ||||
| 
 | ||||
| 		if (xas.xa_index != index || xa_to_value(entry) != attrs) { | ||||
| 			has_attrs = false; | ||||
| 			break; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| out: | ||||
| 	rcu_read_unlock(); | ||||
| 	return has_attrs; | ||||
| } | ||||
| 
 | ||||
| static u64 kvm_supported_mem_attributes(struct kvm *kvm) | ||||
| { | ||||
| 	if (!kvm || kvm_arch_has_private_mem(kvm)) | ||||
|  | @ -2442,6 +2406,41 @@ static u64 kvm_supported_mem_attributes(struct kvm *kvm) | |||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Returns true if _all_ gfns in the range [@start, @end) have attributes | ||||
|  * such that the bits in @mask match @attrs. | ||||
|  */ | ||||
| bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end, | ||||
| 				     unsigned long mask, unsigned long attrs) | ||||
| { | ||||
| 	XA_STATE(xas, &kvm->mem_attr_array, start); | ||||
| 	unsigned long index; | ||||
| 	void *entry; | ||||
| 
 | ||||
| 	mask &= kvm_supported_mem_attributes(kvm); | ||||
| 	if (attrs & ~mask) | ||||
| 		return false; | ||||
| 
 | ||||
| 	if (end == start + 1) | ||||
| 		return (kvm_get_memory_attributes(kvm, start) & mask) == attrs; | ||||
| 
 | ||||
| 	guard(rcu)(); | ||||
| 	if (!attrs) | ||||
| 		return !xas_find(&xas, end - 1); | ||||
| 
 | ||||
| 	for (index = start; index < end; index++) { | ||||
| 		do { | ||||
| 			entry = xas_next(&xas); | ||||
| 		} while (xas_retry(&xas, entry)); | ||||
| 
 | ||||
| 		if (xas.xa_index != index || | ||||
| 		    (xa_to_value(entry) & mask) != attrs) | ||||
| 			return false; | ||||
| 	} | ||||
| 
 | ||||
| 	return true; | ||||
| } | ||||
| 
 | ||||
| static __always_inline void kvm_handle_gfn_range(struct kvm *kvm, | ||||
| 						 struct kvm_mmu_notifier_range *range) | ||||
| { | ||||
|  | @ -2534,7 +2533,7 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end, | |||
| 	mutex_lock(&kvm->slots_lock); | ||||
| 
 | ||||
| 	/* Nothing to do if the entire range as the desired attributes. */ | ||||
| 	if (kvm_range_has_memory_attributes(kvm, start, end, attributes)) | ||||
| 	if (kvm_range_has_memory_attributes(kvm, start, end, ~0, attributes)) | ||||
| 		goto out_unlock; | ||||
| 
 | ||||
| 	/*
 | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Paolo Bonzini
						Paolo Bonzini