summaryrefslogtreecommitdiffstats
path: root/virt/kvm
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-07 13:11:27 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-07 13:11:27 +0000
commit34996e42f82bfd60bc2c191e5cae3c6ab233ec6c (patch)
tree62db60558cbf089714b48daeabca82bf2b20b20e /virt/kvm
parentAdding debian version 6.8.12-1. (diff)
downloadlinux-34996e42f82bfd60bc2c191e5cae3c6ab233ec6c.tar.xz
linux-34996e42f82bfd60bc2c191e5cae3c6ab233ec6c.zip
Merging upstream version 6.9.7.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'virt/kvm')
-rw-r--r--virt/kvm/Kconfig7
-rw-r--r--virt/kvm/async_pf.c44
-rw-r--r--virt/kvm/guest_memfd.c5
-rw-r--r--virt/kvm/kvm_main.c45
-rw-r--r--virt/kvm/kvm_mm.h6
-rw-r--r--virt/kvm/pfncache.c277
6 files changed, 230 insertions, 154 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 184dab4ee8..29b73eedfe 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -1,9 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
# KVM common configuration items and defaults
-config HAVE_KVM
- bool
-
config KVM_COMMON
bool
select EVENTFD
@@ -55,6 +52,9 @@ config KVM_ASYNC_PF_SYNC
config HAVE_KVM_MSI
bool
+config HAVE_KVM_READONLY_MEM
+ bool
+
config HAVE_KVM_CPU_RELAX_INTERCEPT
bool
@@ -73,6 +73,7 @@ config KVM_COMPAT
config HAVE_KVM_IRQ_BYPASS
bool
+ select IRQ_BYPASS_MANAGER
config HAVE_KVM_VCPU_ASYNC_IOCTL
bool
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 28658b9e0d..99a63bad03 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -46,8 +46,8 @@ static void async_pf_execute(struct work_struct *work)
{
struct kvm_async_pf *apf =
container_of(work, struct kvm_async_pf, work);
- struct mm_struct *mm = apf->mm;
struct kvm_vcpu *vcpu = apf->vcpu;
+ struct mm_struct *mm = vcpu->kvm->mm;
unsigned long addr = apf->addr;
gpa_t cr2_or_gpa = apf->cr2_or_gpa;
int locked = 1;
@@ -56,15 +56,24 @@ static void async_pf_execute(struct work_struct *work)
might_sleep();
/*
- * This work is run asynchronously to the task which owns
- * mm and might be done in another context, so we must
- * access remotely.
+ * Attempt to pin the VM's host address space, and simply skip gup() if
+ * acquiring a pin fail, i.e. if the process is exiting. Note, KVM
+ * holds a reference to its associated mm_struct until the very end of
+ * kvm_destroy_vm(), i.e. the struct itself won't be freed before this
+ * work item is fully processed.
*/
- mmap_read_lock(mm);
- get_user_pages_remote(mm, addr, 1, FOLL_WRITE, NULL, &locked);
- if (locked)
- mmap_read_unlock(mm);
+ if (mmget_not_zero(mm)) {
+ mmap_read_lock(mm);
+ get_user_pages_remote(mm, addr, 1, FOLL_WRITE, NULL, &locked);
+ if (locked)
+ mmap_read_unlock(mm);
+ mmput(mm);
+ }
+ /*
+ * Notify and kick the vCPU even if faulting in the page failed, e.g.
+ * so that the vCPU can retry the fault synchronously.
+ */
if (IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC))
kvm_arch_async_page_present(vcpu, apf);
@@ -74,19 +83,18 @@ static void async_pf_execute(struct work_struct *work)
apf->vcpu = NULL;
spin_unlock(&vcpu->async_pf.lock);
- if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC) && first)
- kvm_arch_async_page_present_queued(vcpu);
-
/*
- * apf may be freed by kvm_check_async_pf_completion() after
- * this point
+ * The apf struct may be freed by kvm_check_async_pf_completion() as
+ * soon as the lock is dropped. Nullify it to prevent improper usage.
*/
+ apf = NULL;
+
+ if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC) && first)
+ kvm_arch_async_page_present_queued(vcpu);
trace_kvm_async_pf_completed(addr, cr2_or_gpa);
__kvm_vcpu_wake_up(vcpu);
-
- mmput(mm);
}
static void kvm_flush_and_free_async_pf_work(struct kvm_async_pf *work)
@@ -132,10 +140,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
#ifdef CONFIG_KVM_ASYNC_PF_SYNC
flush_work(&work->work);
#else
- if (cancel_work_sync(&work->work)) {
- mmput(work->mm);
+ if (cancel_work_sync(&work->work))
kmem_cache_free(async_pf_cache, work);
- }
#endif
spin_lock(&vcpu->async_pf.lock);
}
@@ -206,8 +212,6 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
work->cr2_or_gpa = cr2_or_gpa;
work->addr = hva;
work->arch = *arch;
- work->mm = current->mm;
- mmget(work->mm);
INIT_WORK(&work->work, async_pf_execute);
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 0f4e0cf4f1..747fe251e4 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -510,8 +510,10 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
}
if (folio_test_hwpoison(folio)) {
+ folio_unlock(folio);
+ folio_put(folio);
r = -EHWPOISON;
- goto out_unlock;
+ goto out_fput;
}
page = folio_file_page(folio, index);
@@ -522,7 +524,6 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
r = 0;
-out_unlock:
folio_unlock(folio);
out_fput:
fput(file);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0f50960b0e..d9ce063c76 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -421,7 +421,7 @@ int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity,
if (WARN_ON_ONCE(!capacity))
return -EIO;
- mc->objects = kvmalloc_array(sizeof(void *), capacity, gfp);
+ mc->objects = kvmalloc_array(capacity, sizeof(void *), gfp);
if (!mc->objects)
return -ENOMEM;
@@ -832,8 +832,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
* mn_active_invalidate_count (see above) instead of
* mmu_invalidate_in_progress.
*/
- gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end,
- hva_range.may_block);
+ gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end);
/*
* If one or more memslots were found and thus zapped, notify arch code
@@ -890,7 +889,9 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
/* Pairs with the increment in range_start(). */
spin_lock(&kvm->mn_invalidate_lock);
- wake = (--kvm->mn_active_invalidate_count == 0);
+ if (!WARN_ON_ONCE(!kvm->mn_active_invalidate_count))
+ --kvm->mn_active_invalidate_count;
+ wake = !kvm->mn_active_invalidate_count;
spin_unlock(&kvm->mn_invalidate_lock);
/*
@@ -1150,10 +1151,7 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname)
&stat_fops_per_vm);
}
- ret = kvm_arch_create_vm_debugfs(kvm);
- if (ret)
- goto out_err;
-
+ kvm_arch_create_vm_debugfs(kvm);
return 0;
out_err:
kvm_destroy_vm_debugfs(kvm);
@@ -1183,9 +1181,8 @@ void __weak kvm_arch_pre_destroy_vm(struct kvm *kvm)
* Cleanup should be automatic done in kvm_destroy_vm_debugfs() recursively, so
* a per-arch destroy interface is not needed.
*/
-int __weak kvm_arch_create_vm_debugfs(struct kvm *kvm)
+void __weak kvm_arch_create_vm_debugfs(struct kvm *kvm)
{
- return 0;
}
static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
@@ -1614,7 +1611,7 @@ static int check_memory_region_flags(struct kvm *kvm,
if (mem->flags & KVM_MEM_GUEST_MEMFD)
valid_flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
-#ifdef __KVM_HAVE_READONLY_MEM
+#ifdef CONFIG_HAVE_KVM_READONLY_MEM
/*
* GUEST_MEMFD is incompatible with read-only memslots, as writes to
* read-only memslots have emulated MMIO, not page fault, semantics,
@@ -4048,6 +4045,18 @@ static bool vcpu_dy_runnable(struct kvm_vcpu *vcpu)
return false;
}
+/*
+ * By default, simply query the target vCPU's current mode when checking if a
+ * vCPU was preempted in kernel mode. All architectures except x86 (or more
+ * specifical, except VMX) allow querying whether or not a vCPU is in kernel
+ * mode even if the vCPU is NOT loaded, i.e. using kvm_arch_vcpu_in_kernel()
+ * directly for cross-vCPU checks is functionally correct and accurate.
+ */
+bool __weak kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu)
+{
+ return kvm_arch_vcpu_in_kernel(vcpu);
+}
+
bool __weak kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu)
{
return false;
@@ -4057,12 +4066,13 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
{
struct kvm *kvm = me->kvm;
struct kvm_vcpu *vcpu;
- int last_boosted_vcpu = me->kvm->last_boosted_vcpu;
+ int last_boosted_vcpu;
unsigned long i;
int yielded = 0;
int try = 3;
int pass;
+ last_boosted_vcpu = READ_ONCE(kvm->last_boosted_vcpu);
kvm_vcpu_set_in_spin_loop(me, true);
/*
* We boost the priority of a VCPU that is runnable but not
@@ -4084,16 +4094,23 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
continue;
if (kvm_vcpu_is_blocking(vcpu) && !vcpu_dy_runnable(vcpu))
continue;
+
+ /*
+ * Treat the target vCPU as being in-kernel if it has a
+ * pending interrupt, as the vCPU trying to yield may
+ * be spinning waiting on IPI delivery, i.e. the target
+ * vCPU is in-kernel for the purposes of directed yield.
+ */
if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode &&
!kvm_arch_dy_has_pending_interrupt(vcpu) &&
- !kvm_arch_vcpu_in_kernel(vcpu))
+ !kvm_arch_vcpu_preempted_in_kernel(vcpu))
continue;
if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
continue;
yielded = kvm_vcpu_yield_to(vcpu);
if (yielded > 0) {
- kvm->last_boosted_vcpu = i;
+ WRITE_ONCE(kvm->last_boosted_vcpu, i);
break;
} else if (yielded < 0) {
try--;
diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h
index ecefc7ec51..715f19669d 100644
--- a/virt/kvm/kvm_mm.h
+++ b/virt/kvm/kvm_mm.h
@@ -26,13 +26,11 @@ kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible,
#ifdef CONFIG_HAVE_KVM_PFNCACHE
void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
unsigned long start,
- unsigned long end,
- bool may_block);
+ unsigned long end);
#else
static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
unsigned long start,
- unsigned long end,
- bool may_block)
+ unsigned long end)
{
}
#endif /* HAVE_KVM_PFNCACHE */
diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
index 2d6aba6778..e3453e869e 100644
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@@ -23,57 +23,51 @@
* MMU notifier 'invalidate_range_start' hook.
*/
void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,
- unsigned long end, bool may_block)
+ unsigned long end)
{
- DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
struct gfn_to_pfn_cache *gpc;
- bool evict_vcpus = false;
spin_lock(&kvm->gpc_lock);
list_for_each_entry(gpc, &kvm->gpc_list, list) {
- write_lock_irq(&gpc->lock);
+ read_lock_irq(&gpc->lock);
/* Only a single page so no need to care about length */
if (gpc->valid && !is_error_noslot_pfn(gpc->pfn) &&
gpc->uhva >= start && gpc->uhva < end) {
- gpc->valid = false;
+ read_unlock_irq(&gpc->lock);
/*
- * If a guest vCPU could be using the physical address,
- * it needs to be forced out of guest mode.
+ * There is a small window here where the cache could
+ * be modified, and invalidation would no longer be
+ * necessary. Hence check again whether invalidation
+ * is still necessary once the write lock has been
+ * acquired.
*/
- if (gpc->usage & KVM_GUEST_USES_PFN) {
- if (!evict_vcpus) {
- evict_vcpus = true;
- bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS);
- }
- __set_bit(gpc->vcpu->vcpu_idx, vcpu_bitmap);
- }
+
+ write_lock_irq(&gpc->lock);
+ if (gpc->valid && !is_error_noslot_pfn(gpc->pfn) &&
+ gpc->uhva >= start && gpc->uhva < end)
+ gpc->valid = false;
+ write_unlock_irq(&gpc->lock);
+ continue;
}
- write_unlock_irq(&gpc->lock);
+
+ read_unlock_irq(&gpc->lock);
}
spin_unlock(&kvm->gpc_lock);
+}
- if (evict_vcpus) {
- /*
- * KVM needs to ensure the vCPU is fully out of guest context
- * before allowing the invalidation to continue.
- */
- unsigned int req = KVM_REQ_OUTSIDE_GUEST_MODE;
- bool called;
-
- /*
- * If the OOM reaper is active, then all vCPUs should have
- * been stopped already, so perform the request without
- * KVM_REQUEST_WAIT and be sad if any needed to be IPI'd.
- */
- if (!may_block)
- req &= ~KVM_REQUEST_WAIT;
-
- called = kvm_make_vcpus_request_mask(kvm, req, vcpu_bitmap);
+static bool kvm_gpc_is_valid_len(gpa_t gpa, unsigned long uhva,
+ unsigned long len)
+{
+ unsigned long offset = kvm_is_error_gpa(gpa) ? offset_in_page(uhva) :
+ offset_in_page(gpa);
- WARN_ON_ONCE(called && !may_block);
- }
+ /*
+ * The cached access must fit within a single page. The 'len' argument
+ * to activate() and refresh() exists only to enforce that.
+ */
+ return offset + len <= PAGE_SIZE;
}
bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, unsigned long len)
@@ -83,10 +77,17 @@ bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, unsigned long len)
if (!gpc->active)
return false;
- if ((gpc->gpa & ~PAGE_MASK) + len > PAGE_SIZE)
+ /*
+ * If the page was cached from a memslot, make sure the memslots have
+ * not been re-configured.
+ */
+ if (!kvm_is_error_gpa(gpc->gpa) && gpc->generation != slots->generation)
return false;
- if (gpc->generation != slots->generation || kvm_is_error_hva(gpc->uhva))
+ if (kvm_is_error_hva(gpc->uhva))
+ return false;
+
+ if (!kvm_gpc_is_valid_len(gpc->gpa, gpc->uhva, len))
return false;
if (!gpc->valid)
@@ -94,19 +95,33 @@ bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, unsigned long len)
return true;
}
-EXPORT_SYMBOL_GPL(kvm_gpc_check);
-static void gpc_unmap_khva(kvm_pfn_t pfn, void *khva)
+static void *gpc_map(kvm_pfn_t pfn)
{
- /* Unmap the old pfn/page if it was mapped before. */
- if (!is_error_noslot_pfn(pfn) && khva) {
- if (pfn_valid(pfn))
- kunmap(pfn_to_page(pfn));
+ if (pfn_valid(pfn))
+ return kmap(pfn_to_page(pfn));
+
#ifdef CONFIG_HAS_IOMEM
- else
- memunmap(khva);
+ return memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB);
+#else
+ return NULL;
#endif
+}
+
+static void gpc_unmap(kvm_pfn_t pfn, void *khva)
+{
+ /* Unmap the old pfn/page if it was mapped before. */
+ if (is_error_noslot_pfn(pfn) || !khva)
+ return;
+
+ if (pfn_valid(pfn)) {
+ kunmap(pfn_to_page(pfn));
+ return;
}
+
+#ifdef CONFIG_HAS_IOMEM
+ memunmap(khva);
+#endif
}
static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_seq)
@@ -140,7 +155,7 @@ static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_s
static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
{
/* Note, the new page offset may be different than the old! */
- void *old_khva = gpc->khva - offset_in_page(gpc->khva);
+ void *old_khva = (void *)PAGE_ALIGN_DOWN((uintptr_t)gpc->khva);
kvm_pfn_t new_pfn = KVM_PFN_ERR_FAULT;
void *new_khva = NULL;
unsigned long mmu_seq;
@@ -175,7 +190,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
* the existing mapping and didn't create a new one.
*/
if (new_khva != old_khva)
- gpc_unmap_khva(new_pfn, new_khva);
+ gpc_unmap(new_pfn, new_khva);
kvm_release_pfn_clean(new_pfn);
@@ -192,20 +207,14 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
* pfn. Note, kmap() and memremap() can both sleep, so this
* too must be done outside of gpc->lock!
*/
- if (gpc->usage & KVM_HOST_USES_PFN) {
- if (new_pfn == gpc->pfn) {
- new_khva = old_khva;
- } else if (pfn_valid(new_pfn)) {
- new_khva = kmap(pfn_to_page(new_pfn));
-#ifdef CONFIG_HAS_IOMEM
- } else {
- new_khva = memremap(pfn_to_hpa(new_pfn), PAGE_SIZE, MEMREMAP_WB);
-#endif
- }
- if (!new_khva) {
- kvm_release_pfn_clean(new_pfn);
- goto out_error;
- }
+ if (new_pfn == gpc->pfn)
+ new_khva = old_khva;
+ else
+ new_khva = gpc_map(new_pfn);
+
+ if (!new_khva) {
+ kvm_release_pfn_clean(new_pfn);
+ goto out_error;
}
write_lock_irq(&gpc->lock);
@@ -219,7 +228,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
gpc->valid = true;
gpc->pfn = new_pfn;
- gpc->khva = new_khva + (gpc->gpa & ~PAGE_MASK);
+ gpc->khva = new_khva + offset_in_page(gpc->uhva);
/*
* Put the reference to the _new_ pfn. The pfn is now tracked by the
@@ -236,30 +245,21 @@ out_error:
return -EFAULT;
}
-static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa,
- unsigned long len)
+static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long uhva)
{
- struct kvm_memslots *slots = kvm_memslots(gpc->kvm);
- unsigned long page_offset = gpa & ~PAGE_MASK;
+ unsigned long page_offset;
bool unmap_old = false;
unsigned long old_uhva;
kvm_pfn_t old_pfn;
+ bool hva_change = false;
void *old_khva;
int ret;
- /*
- * If must fit within a single page. The 'len' argument is
- * only to enforce that.
- */
- if (page_offset + len > PAGE_SIZE)
+ /* Either gpa or uhva must be valid, but not both */
+ if (WARN_ON_ONCE(kvm_is_error_gpa(gpa) == kvm_is_error_hva(uhva)))
return -EINVAL;
- /*
- * If another task is refreshing the cache, wait for it to complete.
- * There is no guarantee that concurrent refreshes will see the same
- * gpa, memslots generation, etc..., so they must be fully serialized.
- */
- mutex_lock(&gpc->refresh_lock);
+ lockdep_assert_held(&gpc->refresh_lock);
write_lock_irq(&gpc->lock);
@@ -269,30 +269,56 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa,
}
old_pfn = gpc->pfn;
- old_khva = gpc->khva - offset_in_page(gpc->khva);
- old_uhva = gpc->uhva;
-
- /* If the userspace HVA is invalid, refresh that first */
- if (gpc->gpa != gpa || gpc->generation != slots->generation ||
- kvm_is_error_hva(gpc->uhva)) {
- gfn_t gfn = gpa_to_gfn(gpa);
-
- gpc->gpa = gpa;
- gpc->generation = slots->generation;
- gpc->memslot = __gfn_to_memslot(slots, gfn);
- gpc->uhva = gfn_to_hva_memslot(gpc->memslot, gfn);
-
- if (kvm_is_error_hva(gpc->uhva)) {
- ret = -EFAULT;
- goto out;
+ old_khva = (void *)PAGE_ALIGN_DOWN((uintptr_t)gpc->khva);
+ old_uhva = PAGE_ALIGN_DOWN(gpc->uhva);
+
+ if (kvm_is_error_gpa(gpa)) {
+ page_offset = offset_in_page(uhva);
+
+ gpc->gpa = INVALID_GPA;
+ gpc->memslot = NULL;
+ gpc->uhva = PAGE_ALIGN_DOWN(uhva);
+
+ if (gpc->uhva != old_uhva)
+ hva_change = true;
+ } else {
+ struct kvm_memslots *slots = kvm_memslots(gpc->kvm);
+
+ page_offset = offset_in_page(gpa);
+
+ if (gpc->gpa != gpa || gpc->generation != slots->generation ||
+ kvm_is_error_hva(gpc->uhva)) {
+ gfn_t gfn = gpa_to_gfn(gpa);
+
+ gpc->gpa = gpa;
+ gpc->generation = slots->generation;
+ gpc->memslot = __gfn_to_memslot(slots, gfn);
+ gpc->uhva = gfn_to_hva_memslot(gpc->memslot, gfn);
+
+ if (kvm_is_error_hva(gpc->uhva)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ /*
+ * Even if the GPA and/or the memslot generation changed, the
+ * HVA may still be the same.
+ */
+ if (gpc->uhva != old_uhva)
+ hva_change = true;
+ } else {
+ gpc->uhva = old_uhva;
}
}
+ /* Note: the offset must be correct before calling hva_to_pfn_retry() */
+ gpc->uhva += page_offset;
+
/*
* If the userspace HVA changed or the PFN was already invalid,
* drop the lock and do the HVA to PFN lookup again.
*/
- if (!gpc->valid || old_uhva != gpc->uhva) {
+ if (!gpc->valid || hva_change) {
ret = hva_to_pfn_retry(gpc);
} else {
/*
@@ -323,41 +349,53 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa,
out_unlock:
write_unlock_irq(&gpc->lock);
- mutex_unlock(&gpc->refresh_lock);
-
if (unmap_old)
- gpc_unmap_khva(old_pfn, old_khva);
+ gpc_unmap(old_pfn, old_khva);
return ret;
}
int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsigned long len)
{
- return __kvm_gpc_refresh(gpc, gpc->gpa, len);
+ unsigned long uhva;
+
+ guard(mutex)(&gpc->refresh_lock);
+
+ if (!kvm_gpc_is_valid_len(gpc->gpa, gpc->uhva, len))
+ return -EINVAL;
+
+ /*
+ * If the GPA is valid then ignore the HVA, as a cache can be GPA-based
+ * or HVA-based, not both. For GPA-based caches, the HVA will be
+ * recomputed during refresh if necessary.
+ */
+ uhva = kvm_is_error_gpa(gpc->gpa) ? gpc->uhva : KVM_HVA_ERR_BAD;
+
+ return __kvm_gpc_refresh(gpc, gpc->gpa, uhva);
}
-EXPORT_SYMBOL_GPL(kvm_gpc_refresh);
-void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm,
- struct kvm_vcpu *vcpu, enum pfn_cache_usage usage)
+void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm)
{
- WARN_ON_ONCE(!usage || (usage & KVM_GUEST_AND_HOST_USE_PFN) != usage);
- WARN_ON_ONCE((usage & KVM_GUEST_USES_PFN) && !vcpu);
-
rwlock_init(&gpc->lock);
mutex_init(&gpc->refresh_lock);
gpc->kvm = kvm;
- gpc->vcpu = vcpu;
- gpc->usage = usage;
gpc->pfn = KVM_PFN_ERR_FAULT;
+ gpc->gpa = INVALID_GPA;
gpc->uhva = KVM_HVA_ERR_BAD;
+ gpc->active = gpc->valid = false;
}
-EXPORT_SYMBOL_GPL(kvm_gpc_init);
-int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len)
+static int __kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long uhva,
+ unsigned long len)
{
struct kvm *kvm = gpc->kvm;
+ if (!kvm_gpc_is_valid_len(gpa, uhva, len))
+ return -EINVAL;
+
+ guard(mutex)(&gpc->refresh_lock);
+
if (!gpc->active) {
if (KVM_BUG_ON(gpc->valid, kvm))
return -EIO;
@@ -375,9 +413,25 @@ int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len)
gpc->active = true;
write_unlock_irq(&gpc->lock);
}
- return __kvm_gpc_refresh(gpc, gpa, len);
+ return __kvm_gpc_refresh(gpc, gpa, uhva);
+}
+
+int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len)
+{
+ /*
+ * Explicitly disallow INVALID_GPA so that the magic value can be used
+ * by KVM to differentiate between GPA-based and HVA-based caches.
+ */
+ if (WARN_ON_ONCE(kvm_is_error_gpa(gpa)))
+ return -EINVAL;
+
+ return __kvm_gpc_activate(gpc, gpa, KVM_HVA_ERR_BAD, len);
+}
+
+int kvm_gpc_activate_hva(struct gfn_to_pfn_cache *gpc, unsigned long uhva, unsigned long len)
+{
+ return __kvm_gpc_activate(gpc, INVALID_GPA, uhva, len);
}
-EXPORT_SYMBOL_GPL(kvm_gpc_activate);
void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc)
{
@@ -385,6 +439,8 @@ void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc)
kvm_pfn_t old_pfn;
void *old_khva;
+ guard(mutex)(&gpc->refresh_lock);
+
if (gpc->active) {
/*
* Deactivate the cache before removing it from the list, KVM
@@ -412,7 +468,6 @@ void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc)
list_del(&gpc->list);
spin_unlock(&kvm->gpc_lock);
- gpc_unmap_khva(old_pfn, old_khva);
+ gpc_unmap(old_pfn, old_khva);
}
}
-EXPORT_SYMBOL_GPL(kvm_gpc_deactivate);