Skip to content

Commit 2a62345

Browse files
mdrothsean-jc
authored andcommitted
KVM: guest_memfd: GUP source pages prior to populating guest memory
Currently the post-populate callbacks handle copying source pages into private GPA ranges backed by guest_memfd, where kvm_gmem_populate() acquires the filemap invalidate lock, then calls a post-populate callback which may issue a get_user_pages() on the source pages prior to copying them into the private GPA (e.g. TDX). This will not be compatible with in-place conversion, where the userspace page fault path will attempt to acquire the filemap invalidate lock while holding the mm->mmap_lock, leading to a potential ABBA deadlock. Address this by hoisting the GUP above the filemap invalidate lock so that these page faults path can be taken early, prior to acquiring the filemap invalidate lock. It's not currently clear whether this issue is reachable with the current implementation of guest_memfd, which doesn't support in-place conversion, however it does provide a consistent mechanism to provide stable source/target PFNs to callbacks rather than punting to vendor-specific code, which allows for more commonality across architectures, which may be worthwhile even without in-place conversion. As part of this change, also begin enforcing that the 'src' argument to kvm_gmem_populate() must be page-aligned, as this greatly reduces the complexity around how the post-populate callbacks are implemented, and since no current in-tree users support using a non-page-aligned 'src' argument. Suggested-by: Sean Christopherson <seanjc@google.com> Co-developed-by: Sean Christopherson <seanjc@google.com> Co-developed-by: Vishal Annapurve <vannapurve@google.com> Signed-off-by: Vishal Annapurve <vannapurve@google.com> Tested-by: Vishal Annapurve <vannapurve@google.com> Tested-by: Kai Huang <kai.huang@intel.com> Signed-off-by: Michael Roth <michael.roth@amd.com> Tested-by: Yan Zhao <yan.y.zhao@intel.com> Reviewed-by: Yan Zhao <yan.y.zhao@intel.com> Link: https://patch.msgid.link/20260108214622.1084057-7-michael.roth@amd.com [sean: avoid local "p" variable] Signed-off-by: Sean Christopherson <seanjc@google.com>
1 parent 189fd1b commit 2a62345

File tree

4 files changed

+78
-58
lines changed

4 files changed

+78
-58
lines changed

arch/x86/kvm/svm/sev.c

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2268,7 +2268,7 @@ struct sev_gmem_populate_args {
22682268
};
22692269

22702270
static int sev_gmem_post_populate(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
2271-
void __user *src, void *opaque)
2271+
struct page *src_page, void *opaque)
22722272
{
22732273
struct sev_gmem_populate_args *sev_populate_args = opaque;
22742274
struct sev_data_snp_launch_update fw_args = {0};
@@ -2277,7 +2277,7 @@ static int sev_gmem_post_populate(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
22772277
int level;
22782278
int ret;
22792279

2280-
if (WARN_ON_ONCE(sev_populate_args->type != KVM_SEV_SNP_PAGE_TYPE_ZERO && !src))
2280+
if (WARN_ON_ONCE(sev_populate_args->type != KVM_SEV_SNP_PAGE_TYPE_ZERO && !src_page))
22812281
return -EINVAL;
22822282

22832283
ret = snp_lookup_rmpentry((u64)pfn, &assigned, &level);
@@ -2288,15 +2288,14 @@ static int sev_gmem_post_populate(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
22882288
goto out;
22892289
}
22902290

2291-
if (src) {
2292-
void *vaddr = kmap_local_pfn(pfn);
2291+
if (src_page) {
2292+
void *src_vaddr = kmap_local_page(src_page);
2293+
void *dst_vaddr = kmap_local_pfn(pfn);
22932294

2294-
if (copy_from_user(vaddr, src, PAGE_SIZE)) {
2295-
kunmap_local(vaddr);
2296-
ret = -EFAULT;
2297-
goto out;
2298-
}
2299-
kunmap_local(vaddr);
2295+
memcpy(dst_vaddr, src_vaddr, PAGE_SIZE);
2296+
2297+
kunmap_local(src_vaddr);
2298+
kunmap_local(dst_vaddr);
23002299
}
23012300

23022301
ret = rmp_make_private(pfn, gfn << PAGE_SHIFT, PG_LEVEL_4K,
@@ -2326,17 +2325,19 @@ static int sev_gmem_post_populate(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
23262325
if (ret && !snp_page_reclaim(kvm, pfn) &&
23272326
sev_populate_args->type == KVM_SEV_SNP_PAGE_TYPE_CPUID &&
23282327
sev_populate_args->fw_error == SEV_RET_INVALID_PARAM) {
2329-
void *vaddr = kmap_local_pfn(pfn);
2328+
void *src_vaddr = kmap_local_page(src_page);
2329+
void *dst_vaddr = kmap_local_pfn(pfn);
23302330

2331-
if (copy_to_user(src, vaddr, PAGE_SIZE))
2332-
pr_debug("Failed to write CPUID page back to userspace\n");
2331+
memcpy(src_vaddr, dst_vaddr, PAGE_SIZE);
23332332

2334-
kunmap_local(vaddr);
2333+
kunmap_local(src_vaddr);
2334+
kunmap_local(dst_vaddr);
23352335
}
23362336

23372337
out:
2338-
pr_debug("%s: exiting with return code %d (fw_error %d)\n",
2339-
__func__, ret, sev_populate_args->fw_error);
2338+
if (ret)
2339+
pr_debug("%s: error updating GFN %llx, return code %d (fw_error %d)\n",
2340+
__func__, gfn, ret, sev_populate_args->fw_error);
23402341
return ret;
23412342
}
23422343

arch/x86/kvm/vmx/tdx.c

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3118,34 +3118,24 @@ struct tdx_gmem_post_populate_arg {
31183118
};
31193119

31203120
static int tdx_gmem_post_populate(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
3121-
void __user *src, void *_arg)
3121+
struct page *src_page, void *_arg)
31223122
{
31233123
struct tdx_gmem_post_populate_arg *arg = _arg;
31243124
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
31253125
u64 err, entry, level_state;
31263126
gpa_t gpa = gfn_to_gpa(gfn);
3127-
struct page *src_page;
31283127
int ret, i;
31293128

31303129
if (KVM_BUG_ON(kvm_tdx->page_add_src, kvm))
31313130
return -EIO;
31323131

3133-
/*
3134-
* Get the source page if it has been faulted in. Return failure if the
3135-
* source page has been swapped out or unmapped in primary memory.
3136-
*/
3137-
ret = get_user_pages_fast((unsigned long)src, 1, 0, &src_page);
3138-
if (ret < 0)
3139-
return ret;
3140-
if (ret != 1)
3141-
return -ENOMEM;
3132+
if (!src_page)
3133+
return -EOPNOTSUPP;
31423134

31433135
kvm_tdx->page_add_src = src_page;
31443136
ret = kvm_tdp_mmu_map_private_pfn(arg->vcpu, gfn, pfn);
31453137
kvm_tdx->page_add_src = NULL;
31463138

3147-
put_page(src_page);
3148-
31493139
if (ret || !(arg->flags & KVM_TDX_MEASURE_MEMORY_REGION))
31503140
return ret;
31513141

include/linux/kvm_host.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2566,7 +2566,7 @@ int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_ord
25662566
* @gfn: starting GFN to be populated
25672567
* @src: userspace-provided buffer containing data to copy into GFN range
25682568
* (passed to @post_populate, and incremented on each iteration
2569-
* if not NULL)
2569+
* if not NULL). Must be page-aligned.
25702570
* @npages: number of pages to copy from userspace-buffer
25712571
* @post_populate: callback to issue for each gmem page that backs the GPA
25722572
* range
@@ -2581,7 +2581,7 @@ int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_ord
25812581
* Returns the number of pages that were populated.
25822582
*/
25832583
typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
2584-
void __user *src, void *opaque);
2584+
struct page *page, void *opaque);
25852585

25862586
long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages,
25872587
kvm_gmem_populate_cb post_populate, void *opaque);

virt/kvm/guest_memfd.c

Lines changed: 56 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -820,12 +820,48 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
820820
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_gmem_get_pfn);
821821

822822
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_POPULATE
823+
824+
static long __kvm_gmem_populate(struct kvm *kvm, struct kvm_memory_slot *slot,
825+
struct file *file, gfn_t gfn, struct page *src_page,
826+
kvm_gmem_populate_cb post_populate, void *opaque)
827+
{
828+
pgoff_t index = kvm_gmem_get_index(slot, gfn);
829+
struct folio *folio;
830+
kvm_pfn_t pfn;
831+
int ret;
832+
833+
filemap_invalidate_lock(file->f_mapping);
834+
835+
folio = __kvm_gmem_get_pfn(file, slot, index, &pfn, NULL);
836+
if (IS_ERR(folio)) {
837+
ret = PTR_ERR(folio);
838+
goto out_unlock;
839+
}
840+
841+
folio_unlock(folio);
842+
843+
if (!kvm_range_has_memory_attributes(kvm, gfn, gfn + 1,
844+
KVM_MEMORY_ATTRIBUTE_PRIVATE,
845+
KVM_MEMORY_ATTRIBUTE_PRIVATE)) {
846+
ret = -EINVAL;
847+
goto out_put_folio;
848+
}
849+
850+
ret = post_populate(kvm, gfn, pfn, src_page, opaque);
851+
if (!ret)
852+
folio_mark_uptodate(folio);
853+
854+
out_put_folio:
855+
folio_put(folio);
856+
out_unlock:
857+
filemap_invalidate_unlock(file->f_mapping);
858+
return ret;
859+
}
860+
823861
long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages,
824862
kvm_gmem_populate_cb post_populate, void *opaque)
825863
{
826864
struct kvm_memory_slot *slot;
827-
void __user *p;
828-
829865
int ret = 0;
830866
long i;
831867

@@ -834,6 +870,9 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long
834870
if (WARN_ON_ONCE(npages <= 0))
835871
return -EINVAL;
836872

873+
if (WARN_ON_ONCE(!PAGE_ALIGNED(src)))
874+
return -EINVAL;
875+
837876
slot = gfn_to_memslot(kvm, start_gfn);
838877
if (!kvm_slot_has_gmem(slot))
839878
return -EINVAL;
@@ -842,47 +881,37 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long
842881
if (!file)
843882
return -EFAULT;
844883

845-
filemap_invalidate_lock(file->f_mapping);
846-
847884
npages = min_t(ulong, slot->npages - (start_gfn - slot->base_gfn), npages);
848885
for (i = 0; i < npages; i++) {
849-
struct folio *folio;
850-
gfn_t gfn = start_gfn + i;
851-
pgoff_t index = kvm_gmem_get_index(slot, gfn);
852-
kvm_pfn_t pfn;
886+
struct page *src_page = NULL;
853887

854888
if (signal_pending(current)) {
855889
ret = -EINTR;
856890
break;
857891
}
858892

859-
folio = __kvm_gmem_get_pfn(file, slot, index, &pfn, NULL);
860-
if (IS_ERR(folio)) {
861-
ret = PTR_ERR(folio);
862-
break;
893+
if (src) {
894+
unsigned long uaddr = (unsigned long)src + i * PAGE_SIZE;
895+
896+
ret = get_user_pages_fast(uaddr, 1, 0, &src_page);
897+
if (ret < 0)
898+
break;
899+
if (ret != 1) {
900+
ret = -ENOMEM;
901+
break;
902+
}
863903
}
864904

865-
folio_unlock(folio);
905+
ret = __kvm_gmem_populate(kvm, slot, file, start_gfn + i, src_page,
906+
post_populate, opaque);
866907

867-
ret = -EINVAL;
868-
if (!kvm_range_has_memory_attributes(kvm, gfn, gfn + 1,
869-
KVM_MEMORY_ATTRIBUTE_PRIVATE,
870-
KVM_MEMORY_ATTRIBUTE_PRIVATE))
871-
goto put_folio_and_exit;
908+
if (src_page)
909+
put_page(src_page);
872910

873-
p = src ? src + i * PAGE_SIZE : NULL;
874-
ret = post_populate(kvm, gfn, pfn, p, opaque);
875-
if (!ret)
876-
folio_mark_uptodate(folio);
877-
878-
put_folio_and_exit:
879-
folio_put(folio);
880911
if (ret)
881912
break;
882913
}
883914

884-
filemap_invalidate_unlock(file->f_mapping);
885-
886915
return ret && !i ? ret : i;
887916
}
888917
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_gmem_populate);

0 commit comments

Comments
 (0)