summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/mmu/tdp_mmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/mmu/tdp_mmu.c')
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c139
1 files changed, 58 insertions, 81 deletions
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 04c1f0957f..36539c1b36 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -495,8 +495,8 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
* impact the guest since both the former and current SPTEs
* are nonpresent.
*/
- if (WARN_ON_ONCE(!is_mmio_spte(old_spte) &&
- !is_mmio_spte(new_spte) &&
+ if (WARN_ON_ONCE(!is_mmio_spte(kvm, old_spte) &&
+ !is_mmio_spte(kvm, new_spte) &&
!is_removed_spte(new_spte)))
pr_err("Unexpected SPTE change! Nonpresent SPTEs\n"
"should not be replaced with another,\n"
@@ -530,6 +530,31 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
kvm_set_pfn_accessed(spte_to_pfn(old_spte));
}
+static inline int __tdp_mmu_set_spte_atomic(struct tdp_iter *iter, u64 new_spte)
+{
+ u64 *sptep = rcu_dereference(iter->sptep);
+
+ /*
+ * The caller is responsible for ensuring the old SPTE is not a REMOVED
+ * SPTE. KVM should never attempt to zap or manipulate a REMOVED SPTE,
+ * and pre-checking before inserting a new SPTE is advantageous as it
+ * avoids unnecessary work.
+ */
+ WARN_ON_ONCE(iter->yielded || is_removed_spte(iter->old_spte));
+
+ /*
+ * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and
+ * does not hold the mmu_lock. On failure, i.e. if a different logical
+ * CPU modified the SPTE, try_cmpxchg64() updates iter->old_spte with
+ * the current value, so the caller operates on fresh data, e.g. if it
+ * retries tdp_mmu_set_spte_atomic()
+ */
+ if (!try_cmpxchg64(sptep, &iter->old_spte, new_spte))
+ return -EBUSY;
+
+ return 0;
+}
+
/*
* tdp_mmu_set_spte_atomic - Set a TDP MMU SPTE atomically
* and handle the associated bookkeeping. Do not mark the page dirty
@@ -551,27 +576,13 @@ static inline int tdp_mmu_set_spte_atomic(struct kvm *kvm,
struct tdp_iter *iter,
u64 new_spte)
{
- u64 *sptep = rcu_dereference(iter->sptep);
-
- /*
- * The caller is responsible for ensuring the old SPTE is not a REMOVED
- * SPTE. KVM should never attempt to zap or manipulate a REMOVED SPTE,
- * and pre-checking before inserting a new SPTE is advantageous as it
- * avoids unnecessary work.
- */
- WARN_ON_ONCE(iter->yielded || is_removed_spte(iter->old_spte));
+ int ret;
lockdep_assert_held_read(&kvm->mmu_lock);
- /*
- * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and
- * does not hold the mmu_lock. On failure, i.e. if a different logical
- * CPU modified the SPTE, try_cmpxchg64() updates iter->old_spte with
- * the current value, so the caller operates on fresh data, e.g. if it
- * retries tdp_mmu_set_spte_atomic()
- */
- if (!try_cmpxchg64(sptep, &iter->old_spte, new_spte))
- return -EBUSY;
+ ret = __tdp_mmu_set_spte_atomic(iter, new_spte);
+ if (ret)
+ return ret;
handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
new_spte, iter->level, true);
@@ -584,13 +595,17 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm,
{
int ret;
+ lockdep_assert_held_read(&kvm->mmu_lock);
+
/*
- * Freeze the SPTE by setting it to a special,
- * non-present value. This will stop other threads from
- * immediately installing a present entry in its place
- * before the TLBs are flushed.
+ * Freeze the SPTE by setting it to a special, non-present value. This
+ * will stop other threads from immediately installing a present entry
+ * in its place before the TLBs are flushed.
+ *
+ * Delay processing of the zapped SPTE until after TLBs are flushed and
+ * the REMOVED_SPTE is replaced (see below).
*/
- ret = tdp_mmu_set_spte_atomic(kvm, iter, REMOVED_SPTE);
+ ret = __tdp_mmu_set_spte_atomic(iter, REMOVED_SPTE);
if (ret)
return ret;
@@ -599,11 +614,19 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm,
/*
* No other thread can overwrite the removed SPTE as they must either
* wait on the MMU lock or use tdp_mmu_set_spte_atomic() which will not
- * overwrite the special removed SPTE value. No bookkeeping is needed
- * here since the SPTE is going from non-present to non-present. Use
- * the raw write helper to avoid an unnecessary check on volatile bits.
+ * overwrite the special removed SPTE value. Use the raw write helper to
+ * avoid an unnecessary check on volatile bits.
*/
- __kvm_tdp_mmu_write_spte(iter->sptep, 0);
+ __kvm_tdp_mmu_write_spte(iter->sptep, SHADOW_NONPRESENT_VALUE);
+
+ /*
+ * Process the zapped SPTE after flushing TLBs, and after replacing
+ * REMOVED_SPTE with 0. This minimizes the amount of time vCPUs are
+ * blocked by the REMOVED_SPTE and reduces contention on the child
+ * SPTEs.
+ */
+ handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
+ SHADOW_NONPRESENT_VALUE, iter->level, true);
return 0;
}
@@ -740,8 +763,8 @@ retry:
continue;
if (!shared)
- tdp_mmu_iter_set_spte(kvm, &iter, 0);
- else if (tdp_mmu_set_spte_atomic(kvm, &iter, 0))
+ tdp_mmu_iter_set_spte(kvm, &iter, SHADOW_NONPRESENT_VALUE);
+ else if (tdp_mmu_set_spte_atomic(kvm, &iter, SHADOW_NONPRESENT_VALUE))
goto retry;
}
}
@@ -808,8 +831,8 @@ bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
if (WARN_ON_ONCE(!is_shadow_present_pte(old_spte)))
return false;
- tdp_mmu_set_spte(kvm, kvm_mmu_page_as_id(sp), sp->ptep, old_spte, 0,
- sp->gfn, sp->role.level + 1);
+ tdp_mmu_set_spte(kvm, kvm_mmu_page_as_id(sp), sp->ptep, old_spte,
+ SHADOW_NONPRESENT_VALUE, sp->gfn, sp->role.level + 1);
return true;
}
@@ -843,7 +866,7 @@ static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root,
!is_last_spte(iter.old_spte, iter.level))
continue;
- tdp_mmu_iter_set_spte(kvm, &iter, 0);
+ tdp_mmu_iter_set_spte(kvm, &iter, SHADOW_NONPRESENT_VALUE);
/*
* Zappings SPTEs in invalid roots doesn't require a TLB flush,
@@ -1028,7 +1051,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
}
/* If a MMIO SPTE is installed, the MMIO will need to be emulated. */
- if (unlikely(is_mmio_spte(new_spte))) {
+ if (unlikely(is_mmio_spte(vcpu->kvm, new_spte))) {
vcpu->stat.pf_mmio_spte_created++;
trace_mark_mmio_spte(rcu_dereference(iter->sptep), iter->gfn,
new_spte);
@@ -1258,52 +1281,6 @@ bool kvm_tdp_mmu_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
return kvm_tdp_mmu_handle_gfn(kvm, range, test_age_gfn);
}
-static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter,
- struct kvm_gfn_range *range)
-{
- u64 new_spte;
-
- /* Huge pages aren't expected to be modified without first being zapped. */
- WARN_ON_ONCE(pte_huge(range->arg.pte) || range->start + 1 != range->end);
-
- if (iter->level != PG_LEVEL_4K ||
- !is_shadow_present_pte(iter->old_spte))
- return false;
-
- /*
- * Note, when changing a read-only SPTE, it's not strictly necessary to
- * zero the SPTE before setting the new PFN, but doing so preserves the
- * invariant that the PFN of a present * leaf SPTE can never change.
- * See handle_changed_spte().
- */
- tdp_mmu_iter_set_spte(kvm, iter, 0);
-
- if (!pte_write(range->arg.pte)) {
- new_spte = kvm_mmu_changed_pte_notifier_make_spte(iter->old_spte,
- pte_pfn(range->arg.pte));
-
- tdp_mmu_iter_set_spte(kvm, iter, new_spte);
- }
-
- return true;
-}
-
-/*
- * Handle the changed_pte MMU notifier for the TDP MMU.
- * data is a pointer to the new pte_t mapping the HVA specified by the MMU
- * notifier.
- * Returns non-zero if a flush is needed before releasing the MMU lock.
- */
-bool kvm_tdp_mmu_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
-{
- /*
- * No need to handle the remote TLB flush under RCU protection, the
- * target SPTE _must_ be a leaf SPTE, i.e. cannot result in freeing a
- * shadow page. See the WARN on pfn_changed in handle_changed_spte().
- */
- return kvm_tdp_mmu_handle_gfn(kvm, range, set_spte_gfn);
-}
-
/*
* Remove write access from all SPTEs at or above min_level that map GFNs
* [start, end). Returns true if an SPTE has been changed and the TLBs need to