summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/pmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/pmu.c')
-rw-r--r--arch/x86/kvm/pmu.c143
1 files changed, 85 insertions, 58 deletions
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 2ab2d5213f..a593b03c9a 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -29,6 +29,9 @@
struct x86_pmu_capability __read_mostly kvm_pmu_cap;
EXPORT_SYMBOL_GPL(kvm_pmu_cap);
+struct kvm_pmu_emulated_event_selectors __read_mostly kvm_pmu_eventsel;
+EXPORT_SYMBOL_GPL(kvm_pmu_eventsel);
+
/* Precise Distribution of Instructions Retired (PDIR) */
static const struct x86_cpu_id vmx_pebs_pdir_cpu[] = {
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, NULL),
@@ -67,7 +70,7 @@ static const struct x86_cpu_id vmx_pebs_pdist_cpu[] = {
* all perf counters (both gp and fixed). The mapping relationship
* between pmc and perf counters is as the following:
* * Intel: [0 .. KVM_INTEL_PMC_MAX_GENERIC-1] <=> gp counters
- * [INTEL_PMC_IDX_FIXED .. INTEL_PMC_IDX_FIXED + 2] <=> fixed
+ * [KVM_FIXED_PMC_BASE_IDX .. KVM_FIXED_PMC_BASE_IDX + 2] <=> fixed
* * AMD: [0 .. AMD64_NUM_COUNTERS-1] and, for families 15H
* and later, [0 .. AMD64_NUM_COUNTERS_CORE-1] <=> gp counters
*/
@@ -411,7 +414,7 @@ static bool is_gp_event_allowed(struct kvm_x86_pmu_event_filter *f,
static bool is_fixed_event_allowed(struct kvm_x86_pmu_event_filter *filter,
int idx)
{
- int fixed_idx = idx - INTEL_PMC_IDX_FIXED;
+ int fixed_idx = idx - KVM_FIXED_PMC_BASE_IDX;
if (filter->action == KVM_PMU_EVENT_DENY &&
test_bit(fixed_idx, (ulong *)&filter->fixed_counter_bitmap))
@@ -441,11 +444,10 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc)
static bool pmc_event_is_allowed(struct kvm_pmc *pmc)
{
return pmc_is_globally_enabled(pmc) && pmc_speculative_in_use(pmc) &&
- static_call(kvm_x86_pmu_hw_event_available)(pmc) &&
check_pmu_event_filter(pmc);
}
-static void reprogram_counter(struct kvm_pmc *pmc)
+static int reprogram_counter(struct kvm_pmc *pmc)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
u64 eventsel = pmc->eventsel;
@@ -456,7 +458,7 @@ static void reprogram_counter(struct kvm_pmc *pmc)
emulate_overflow = pmc_pause_counter(pmc);
if (!pmc_event_is_allowed(pmc))
- goto reprogram_complete;
+ return 0;
if (emulate_overflow)
__kvm_perf_overflow(pmc, false);
@@ -466,7 +468,7 @@ static void reprogram_counter(struct kvm_pmc *pmc)
if (pmc_is_fixed(pmc)) {
fixed_ctr_ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl,
- pmc->idx - INTEL_PMC_IDX_FIXED);
+ pmc->idx - KVM_FIXED_PMC_BASE_IDX);
if (fixed_ctr_ctrl & 0x1)
eventsel |= ARCH_PERFMON_EVENTSEL_OS;
if (fixed_ctr_ctrl & 0x2)
@@ -477,43 +479,45 @@ static void reprogram_counter(struct kvm_pmc *pmc)
}
if (pmc->current_config == new_config && pmc_resume_counter(pmc))
- goto reprogram_complete;
+ return 0;
pmc_release_perf_event(pmc);
pmc->current_config = new_config;
- /*
- * If reprogramming fails, e.g. due to contention, leave the counter's
- * regprogram bit set, i.e. opportunistically try again on the next PMU
- * refresh. Don't make a new request as doing so can stall the guest
- * if reprogramming repeatedly fails.
- */
- if (pmc_reprogram_counter(pmc, PERF_TYPE_RAW,
- (eventsel & pmu->raw_event_mask),
- !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
- !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
- eventsel & ARCH_PERFMON_EVENTSEL_INT))
- return;
-
-reprogram_complete:
- clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
+ return pmc_reprogram_counter(pmc, PERF_TYPE_RAW,
+ (eventsel & pmu->raw_event_mask),
+ !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
+ !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
+ eventsel & ARCH_PERFMON_EVENTSEL_INT);
}
void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
{
+ DECLARE_BITMAP(bitmap, X86_PMC_IDX_MAX);
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+ struct kvm_pmc *pmc;
int bit;
- for_each_set_bit(bit, pmu->reprogram_pmi, X86_PMC_IDX_MAX) {
- struct kvm_pmc *pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, bit);
+ bitmap_copy(bitmap, pmu->reprogram_pmi, X86_PMC_IDX_MAX);
- if (unlikely(!pmc)) {
- clear_bit(bit, pmu->reprogram_pmi);
- continue;
- }
+ /*
+ * The reprogramming bitmap can be written asynchronously by something
+ * other than the task that holds vcpu->mutex, take care to clear only
+ * the bits that will actually processed.
+ */
+ BUILD_BUG_ON(sizeof(bitmap) != sizeof(atomic64_t));
+ atomic64_andnot(*(s64 *)bitmap, &pmu->__reprogram_pmi);
- reprogram_counter(pmc);
+ kvm_for_each_pmc(pmu, pmc, bit, bitmap) {
+ /*
+ * If reprogramming fails, e.g. due to contention, re-set the
+ * regprogram bit set, i.e. opportunistically try again on the
+ * next PMU refresh. Don't make a new request as doing so can
+ * stall the guest if reprogramming repeatedly fails.
+ */
+ if (reprogram_counter(pmc))
+ set_bit(pmc->idx, pmu->reprogram_pmi);
}
/*
@@ -525,10 +529,20 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
kvm_pmu_cleanup(vcpu);
}
-/* check if idx is a valid index to access PMU */
-bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
+int kvm_pmu_check_rdpmc_early(struct kvm_vcpu *vcpu, unsigned int idx)
{
- return static_call(kvm_x86_pmu_is_valid_rdpmc_ecx)(vcpu, idx);
+ /*
+ * On Intel, VMX interception has priority over RDPMC exceptions that
+ * aren't already handled by the emulator, i.e. there are no additional
+ * check needed for Intel PMUs.
+ *
+ * On AMD, _all_ exceptions on RDPMC have priority over SVM intercepts,
+ * i.e. an invalid PMC results in a #GP, not #VMEXIT.
+ */
+ if (!kvm_pmu_ops.check_rdpmc_early)
+ return 0;
+
+ return static_call(kvm_x86_pmu_check_rdpmc_early)(vcpu, idx);
}
bool is_vmware_backdoor_pmc(u32 pmc_idx)
@@ -567,10 +581,9 @@ static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
{
- bool fast_mode = idx & (1u << 31);
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
struct kvm_pmc *pmc;
- u64 mask = fast_mode ? ~0u : ~0ull;
+ u64 mask = ~0ull;
if (!pmu->version)
return 1;
@@ -716,11 +729,7 @@ static void kvm_pmu_reset(struct kvm_vcpu *vcpu)
bitmap_zero(pmu->reprogram_pmi, X86_PMC_IDX_MAX);
- for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) {
- pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i);
- if (!pmc)
- continue;
-
+ kvm_for_each_pmc(pmu, pmc, i, pmu->all_valid_pmc_idx) {
pmc_stop_counter(pmc);
pmc->counter = 0;
pmc->emulated_counter = 0;
@@ -804,10 +813,8 @@ void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
bitmap_andnot(bitmask, pmu->all_valid_pmc_idx,
pmu->pmc_in_use, X86_PMC_IDX_MAX);
- for_each_set_bit(i, bitmask, X86_PMC_IDX_MAX) {
- pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i);
-
- if (pmc && pmc->perf_event && !pmc_speculative_in_use(pmc))
+ kvm_for_each_pmc(pmu, pmc, i, bitmask) {
+ if (pmc->perf_event && !pmc_speculative_in_use(pmc))
pmc_stop_counter(pmc);
}
@@ -827,13 +834,6 @@ static void kvm_pmu_incr_counter(struct kvm_pmc *pmc)
kvm_pmu_request_counter_reprogram(pmc);
}
-static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc,
- unsigned int perf_hw_id)
-{
- return !((pmc->eventsel ^ perf_get_hw_event_config(perf_hw_id)) &
- AMD64_RAW_EVENT_MASK_NB);
-}
-
static inline bool cpl_is_matched(struct kvm_pmc *pmc)
{
bool select_os, select_user;
@@ -845,29 +845,56 @@ static inline bool cpl_is_matched(struct kvm_pmc *pmc)
select_user = config & ARCH_PERFMON_EVENTSEL_USR;
} else {
config = fixed_ctrl_field(pmc_to_pmu(pmc)->fixed_ctr_ctrl,
- pmc->idx - INTEL_PMC_IDX_FIXED);
+ pmc->idx - KVM_FIXED_PMC_BASE_IDX);
select_os = config & 0x1;
select_user = config & 0x2;
}
+ /*
+ * Skip the CPL lookup, which isn't free on Intel, if the result will
+ * be the same regardless of the CPL.
+ */
+ if (select_os == select_user)
+ return select_os;
+
return (static_call(kvm_x86_get_cpl)(pmc->vcpu) == 0) ? select_os : select_user;
}
-void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id)
+void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 eventsel)
{
+ DECLARE_BITMAP(bitmap, X86_PMC_IDX_MAX);
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
struct kvm_pmc *pmc;
int i;
- for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) {
- pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i);
+ BUILD_BUG_ON(sizeof(pmu->global_ctrl) * BITS_PER_BYTE != X86_PMC_IDX_MAX);
- if (!pmc || !pmc_event_is_allowed(pmc))
+ if (!kvm_pmu_has_perf_global_ctrl(pmu))
+ bitmap_copy(bitmap, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX);
+ else if (!bitmap_and(bitmap, pmu->all_valid_pmc_idx,
+ (unsigned long *)&pmu->global_ctrl, X86_PMC_IDX_MAX))
+ return;
+
+ kvm_for_each_pmc(pmu, pmc, i, bitmap) {
+ /*
+ * Ignore checks for edge detect (all events currently emulated
+ * but KVM are always rising edges), pin control (unsupported
+ * by modern CPUs), and counter mask and its invert flag (KVM
+ * doesn't emulate multiple events in a single clock cycle).
+ *
+ * Note, the uppermost nibble of AMD's mask overlaps Intel's
+ * IN_TX (bit 32) and IN_TXCP (bit 33), as well as two reserved
+ * bits (bits 35:34). Checking the "in HLE/RTM transaction"
+ * flags is correct as the vCPU can't be in a transaction if
+ * KVM is emulating an instruction. Checking the reserved bits
+ * might be wrong if they are defined in the future, but so
+ * could ignoring them, so do the simple thing for now.
+ */
+ if (((pmc->eventsel ^ eventsel) & AMD64_RAW_EVENT_MASK_NB) ||
+ !pmc_event_is_allowed(pmc) || !cpl_is_matched(pmc))
continue;
- /* Ignore checks for edge detect, pin control, invert and CMASK bits */
- if (eventsel_match_perf_hw_id(pmc, perf_hw_id) && cpl_is_matched(pmc))
- kvm_pmu_incr_counter(pmc);
+ kvm_pmu_incr_counter(pmc);
}
}
EXPORT_SYMBOL_GPL(kvm_pmu_trigger_event);