diff options
Diffstat (limited to 'arch/riscv/kvm')
-rw-r--r-- | arch/riscv/kvm/Makefile | 2 | ||||
-rw-r--r-- | arch/riscv/kvm/aia.c | 5 | ||||
-rw-r--r-- | arch/riscv/kvm/main.c | 18 | ||||
-rw-r--r-- | arch/riscv/kvm/mmu.c | 20 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu.c | 85 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_exit.c | 4 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_onereg.c | 6 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_pmu.c | 260 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_sbi.c | 7 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_sbi_hsm.c | 42 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_sbi_pmu.c | 17 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_sbi_sta.c | 4 | ||||
-rw-r--r-- | arch/riscv/kvm/vm.c | 1 |
13 files changed, 379 insertions, 92 deletions
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile index c9646521f1..c2cacfbc06 100644 --- a/arch/riscv/kvm/Makefile +++ b/arch/riscv/kvm/Makefile @@ -3,7 +3,7 @@ # Makefile for RISC-V KVM support # -ccflags-y += -I $(srctree)/$(src) +ccflags-y += -I $(src) include $(srctree)/virt/kvm/Makefile.kvm diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c index a944294f6f..0f0a9d11bb 100644 --- a/arch/riscv/kvm/aia.c +++ b/arch/riscv/kvm/aia.c @@ -545,6 +545,9 @@ void kvm_riscv_aia_enable(void) enable_percpu_irq(hgei_parent_irq, irq_get_trigger_type(hgei_parent_irq)); csr_set(CSR_HIE, BIT(IRQ_S_GEXT)); + /* Enable IRQ filtering for overflow interrupt only if sscofpmf is present */ + if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF)) + csr_write(CSR_HVIEN, BIT(IRQ_PMU_OVF)); } void kvm_riscv_aia_disable(void) @@ -558,6 +561,8 @@ void kvm_riscv_aia_disable(void) return; hgctrl = get_cpu_ptr(&aia_hgei); + if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF)) + csr_clear(CSR_HVIEN, BIT(IRQ_PMU_OVF)); /* Disable per-CPU SGEI interrupt */ csr_clear(CSR_HIE, BIT(IRQ_S_GEXT)); disable_percpu_irq(hgei_parent_irq); diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index 225a435d9c..bab2ec34cd 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -22,22 +22,8 @@ long kvm_arch_dev_ioctl(struct file *filp, int kvm_arch_hardware_enable(void) { - unsigned long hideleg, hedeleg; - - hedeleg = 0; - hedeleg |= (1UL << EXC_INST_MISALIGNED); - hedeleg |= (1UL << EXC_BREAKPOINT); - hedeleg |= (1UL << EXC_SYSCALL); - hedeleg |= (1UL << EXC_INST_PAGE_FAULT); - hedeleg |= (1UL << EXC_LOAD_PAGE_FAULT); - hedeleg |= (1UL << EXC_STORE_PAGE_FAULT); - csr_write(CSR_HEDELEG, hedeleg); - - hideleg = 0; - hideleg |= (1UL << IRQ_VS_SOFT); - hideleg |= (1UL << IRQ_VS_TIMER); - hideleg |= (1UL << IRQ_VS_EXT); - csr_write(CSR_HIDELEG, hideleg); + csr_write(CSR_HEDELEG, KVM_HEDELEG_DEFAULT); + csr_write(CSR_HIDELEG, KVM_HIDELEG_DEFAULT); /* VS should access only the time counter directly. Everything else should trap */ csr_write(CSR_HCOUNTEREN, 0x02); diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index a9e2fd7245..b63650f9b9 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -550,26 +550,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) return false; } -bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) -{ - int ret; - kvm_pfn_t pfn = pte_pfn(range->arg.pte); - - if (!kvm->arch.pgd) - return false; - - WARN_ON(range->end - range->start != 1); - - ret = gstage_map_page(kvm, NULL, range->start << PAGE_SHIFT, - __pfn_to_phys(pfn), PAGE_SIZE, true, true); - if (ret) { - kvm_debug("Failed to map G-stage page (error %d)\n", ret); - return true; - } - - return false; -} - bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { pte_t *ptep; diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index b5ca9f2e98..17e21df36c 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -64,7 +64,9 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) memcpy(csr, reset_csr, sizeof(*csr)); + spin_lock(&vcpu->arch.reset_cntx_lock); memcpy(cntx, reset_cntx, sizeof(*cntx)); + spin_unlock(&vcpu->arch.reset_cntx_lock); kvm_riscv_vcpu_fp_reset(vcpu); @@ -102,6 +104,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) struct kvm_cpu_context *cntx; struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr; + spin_lock_init(&vcpu->arch.mp_state_lock); + /* Mark this VCPU never ran */ vcpu->arch.ran_atleast_once = false; vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO; @@ -119,12 +123,16 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) spin_lock_init(&vcpu->arch.hfence_lock); /* Setup reset state of shadow SSTATUS and HSTATUS CSRs */ + spin_lock_init(&vcpu->arch.reset_cntx_lock); + + spin_lock(&vcpu->arch.reset_cntx_lock); cntx = &vcpu->arch.guest_reset_context; cntx->sstatus = SR_SPP | SR_SPIE; cntx->hstatus = 0; cntx->hstatus |= HSTATUS_VTW; cntx->hstatus |= HSTATUS_SPVP; cntx->hstatus |= HSTATUS_SPV; + spin_unlock(&vcpu->arch.reset_cntx_lock); if (kvm_riscv_vcpu_alloc_vector_context(vcpu, cntx)) return -ENOMEM; @@ -201,7 +209,7 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) && - !vcpu->arch.power_off && !vcpu->arch.pause); + !kvm_riscv_vcpu_stopped(vcpu) && !vcpu->arch.pause); } int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) @@ -365,6 +373,13 @@ void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu) } } + /* Sync up the HVIP.LCOFIP bit changes (only clear) by the guest */ + if ((csr->hvip ^ hvip) & (1UL << IRQ_PMU_OVF)) { + if (!(hvip & (1UL << IRQ_PMU_OVF)) && + !test_and_set_bit(IRQ_PMU_OVF, v->irqs_pending_mask)) + clear_bit(IRQ_PMU_OVF, v->irqs_pending); + } + /* Sync-up AIA high interrupts */ kvm_riscv_vcpu_aia_sync_interrupts(vcpu); @@ -382,7 +397,8 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) if (irq < IRQ_LOCAL_MAX && irq != IRQ_VS_SOFT && irq != IRQ_VS_TIMER && - irq != IRQ_VS_EXT) + irq != IRQ_VS_EXT && + irq != IRQ_PMU_OVF) return -EINVAL; set_bit(irq, vcpu->arch.irqs_pending); @@ -397,14 +413,15 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) { /* - * We only allow VS-mode software, timer, and external + * We only allow VS-mode software, timer, counter overflow and external * interrupts when irq is one of the local interrupts * defined by RISC-V privilege specification. */ if (irq < IRQ_LOCAL_MAX && irq != IRQ_VS_SOFT && irq != IRQ_VS_TIMER && - irq != IRQ_VS_EXT) + irq != IRQ_VS_EXT && + irq != IRQ_PMU_OVF) return -EINVAL; clear_bit(irq, vcpu->arch.irqs_pending); @@ -429,26 +446,42 @@ bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask) return kvm_riscv_vcpu_aia_has_interrupts(vcpu, mask); } -void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu) +void __kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu) { - vcpu->arch.power_off = true; + WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED); kvm_make_request(KVM_REQ_SLEEP, vcpu); kvm_vcpu_kick(vcpu); } -void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu) +void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu) +{ + spin_lock(&vcpu->arch.mp_state_lock); + __kvm_riscv_vcpu_power_off(vcpu); + spin_unlock(&vcpu->arch.mp_state_lock); +} + +void __kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu) { - vcpu->arch.power_off = false; + WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE); kvm_vcpu_wake_up(vcpu); } +void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu) +{ + spin_lock(&vcpu->arch.mp_state_lock); + __kvm_riscv_vcpu_power_on(vcpu); + spin_unlock(&vcpu->arch.mp_state_lock); +} + +bool kvm_riscv_vcpu_stopped(struct kvm_vcpu *vcpu) +{ + return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_STOPPED; +} + int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - if (vcpu->arch.power_off) - mp_state->mp_state = KVM_MP_STATE_STOPPED; - else - mp_state->mp_state = KVM_MP_STATE_RUNNABLE; + *mp_state = READ_ONCE(vcpu->arch.mp_state); return 0; } @@ -458,25 +491,36 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, { int ret = 0; + spin_lock(&vcpu->arch.mp_state_lock); + switch (mp_state->mp_state) { case KVM_MP_STATE_RUNNABLE: - vcpu->arch.power_off = false; + WRITE_ONCE(vcpu->arch.mp_state, *mp_state); break; case KVM_MP_STATE_STOPPED: - kvm_riscv_vcpu_power_off(vcpu); + __kvm_riscv_vcpu_power_off(vcpu); break; default: ret = -EINVAL; } + spin_unlock(&vcpu->arch.mp_state_lock); + return ret; } int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - /* TODO; To be implemented later. */ - return -EINVAL; + if (dbg->control & KVM_GUESTDBG_ENABLE) { + vcpu->guest_debug = dbg->control; + vcpu->arch.cfg.hedeleg &= ~BIT(EXC_BREAKPOINT); + } else { + vcpu->guest_debug = 0; + vcpu->arch.cfg.hedeleg |= BIT(EXC_BREAKPOINT); + } + + return 0; } static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu) @@ -505,6 +549,10 @@ static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu) if (riscv_isa_extension_available(isa, SMSTATEEN)) cfg->hstateen0 |= SMSTATEEN0_SSTATEEN0; } + + cfg->hedeleg = KVM_HEDELEG_DEFAULT; + if (vcpu->guest_debug) + cfg->hedeleg &= ~BIT(EXC_BREAKPOINT); } void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -519,6 +567,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) csr_write(CSR_VSEPC, csr->vsepc); csr_write(CSR_VSCAUSE, csr->vscause); csr_write(CSR_VSTVAL, csr->vstval); + csr_write(CSR_HEDELEG, cfg->hedeleg); csr_write(CSR_HVIP, csr->hvip); csr_write(CSR_VSATP, csr->vsatp); csr_write(CSR_HENVCFG, cfg->henvcfg); @@ -584,11 +633,11 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) { kvm_vcpu_srcu_read_unlock(vcpu); rcuwait_wait_event(wait, - (!vcpu->arch.power_off) && (!vcpu->arch.pause), + (!kvm_riscv_vcpu_stopped(vcpu)) && (!vcpu->arch.pause), TASK_INTERRUPTIBLE); kvm_vcpu_srcu_read_lock(vcpu); - if (vcpu->arch.power_off || vcpu->arch.pause) { + if (kvm_riscv_vcpu_stopped(vcpu) || vcpu->arch.pause) { /* * Awaken to handle a signal, request to * sleep again later. diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c index 2415722c01..5761f95abb 100644 --- a/arch/riscv/kvm/vcpu_exit.c +++ b/arch/riscv/kvm/vcpu_exit.c @@ -204,6 +204,10 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV) ret = kvm_riscv_vcpu_sbi_ecall(vcpu, run); break; + case EXC_BREAKPOINT: + run->exit_reason = KVM_EXIT_DEBUG; + ret = 0; + break; default: break; } diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index e5706f5f2c..62874fbca2 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -36,6 +36,7 @@ static const unsigned long kvm_isa_ext_arr[] = { /* Multi letter extensions (alphabetically sorted) */ KVM_ISA_EXT_ARR(SMSTATEEN), KVM_ISA_EXT_ARR(SSAIA), + KVM_ISA_EXT_ARR(SSCOFPMF), KVM_ISA_EXT_ARR(SSTC), KVM_ISA_EXT_ARR(SVINVAL), KVM_ISA_EXT_ARR(SVNAPOT), @@ -99,6 +100,9 @@ static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext) switch (ext) { case KVM_RISCV_ISA_EXT_H: return false; + case KVM_RISCV_ISA_EXT_SSCOFPMF: + /* Sscofpmf depends on interrupt filtering defined in ssaia */ + return __riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSAIA); case KVM_RISCV_ISA_EXT_V: return riscv_v_vstate_ctrl_user_allowed(); default: @@ -116,6 +120,8 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_C: case KVM_RISCV_ISA_EXT_I: case KVM_RISCV_ISA_EXT_M: + /* There is not architectural config bit to disable sscofpmf completely */ + case KVM_RISCV_ISA_EXT_SSCOFPMF: case KVM_RISCV_ISA_EXT_SSTC: case KVM_RISCV_ISA_EXT_SVINVAL: case KVM_RISCV_ISA_EXT_SVNAPOT: diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c index cee1b9ca4e..bcf41d6e0d 100644 --- a/arch/riscv/kvm/vcpu_pmu.c +++ b/arch/riscv/kvm/vcpu_pmu.c @@ -14,6 +14,7 @@ #include <asm/csr.h> #include <asm/kvm_vcpu_sbi.h> #include <asm/kvm_vcpu_pmu.h> +#include <asm/sbi.h> #include <linux/bitops.h> #define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs) @@ -196,6 +197,36 @@ static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx, return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask); } +static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx, + unsigned long *out_val) +{ + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); + struct kvm_pmc *pmc; + int fevent_code; + + if (!IS_ENABLED(CONFIG_32BIT)) { + pr_warn("%s: should be invoked for only RV32\n", __func__); + return -EINVAL; + } + + if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) { + pr_warn("Invalid counter id [%ld]during read\n", cidx); + return -EINVAL; + } + + pmc = &kvpmu->pmc[cidx]; + + if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW) + return -EINVAL; + + fevent_code = get_event_code(pmc->event_idx); + pmc->counter_val = kvpmu->fw_event[fevent_code].value; + + *out_val = pmc->counter_val >> 32; + + return 0; +} + static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, unsigned long *out_val) { @@ -204,6 +235,11 @@ static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, u64 enabled, running; int fevent_code; + if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) { + pr_warn("Invalid counter id [%ld] during read\n", cidx); + return -EINVAL; + } + pmc = &kvpmu->pmc[cidx]; if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { @@ -229,8 +265,50 @@ static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ct return 0; } -static int kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr, - unsigned long flags, unsigned long eidx, unsigned long evtdata) +static void kvm_riscv_pmu_overflow(struct perf_event *perf_event, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct kvm_pmc *pmc = perf_event->overflow_handler_context; + struct kvm_vcpu *vcpu = pmc->vcpu; + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); + struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu); + u64 period; + + /* + * Stop the event counting by directly accessing the perf_event. + * Otherwise, this needs to deferred via a workqueue. + * That will introduce skew in the counter value because the actual + * physical counter would start after returning from this function. + * It will be stopped again once the workqueue is scheduled + */ + rpmu->pmu.stop(perf_event, PERF_EF_UPDATE); + + /* + * The hw counter would start automatically when this function returns. + * Thus, the host may continue to interrupt and inject it to the guest + * even without the guest configuring the next event. Depending on the hardware + * the host may have some sluggishness only if privilege mode filtering is not + * available. In an ideal world, where qemu is not the only capable hardware, + * this can be removed. + * FYI: ARM64 does this way while x86 doesn't do anything as such. + * TODO: Should we keep it for RISC-V ? + */ + period = -(local64_read(&perf_event->count)); + + local64_set(&perf_event->hw.period_left, 0); + perf_event->attr.sample_period = period; + perf_event->hw.sample_period = period; + + set_bit(pmc->idx, kvpmu->pmc_overflown); + kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF); + + rpmu->pmu.start(perf_event, PERF_EF_RELOAD); +} + +static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr, + unsigned long flags, unsigned long eidx, + unsigned long evtdata) { struct perf_event *event; @@ -247,9 +325,9 @@ static int kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr */ attr->sample_period = kvm_pmu_get_sample_period(pmc); - event = perf_event_create_kernel_counter(attr, -1, current, NULL, pmc); + event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc); if (IS_ERR(event)) { - pr_err("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event)); + pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event)); return PTR_ERR(event); } @@ -310,6 +388,80 @@ int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num, return ret; } +static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu) +{ + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); + int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data); + + if (kvpmu->sdata) { + if (kvpmu->snapshot_addr != INVALID_GPA) { + memset(kvpmu->sdata, 0, snapshot_area_size); + kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, + kvpmu->sdata, snapshot_area_size); + } else { + pr_warn("snapshot address invalid\n"); + } + kfree(kvpmu->sdata); + kvpmu->sdata = NULL; + } + kvpmu->snapshot_addr = INVALID_GPA; +} + +int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low, + unsigned long saddr_high, unsigned long flags, + struct kvm_vcpu_sbi_return *retdata) +{ + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); + int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data); + int sbiret = 0; + gpa_t saddr; + unsigned long hva; + bool writable; + + if (!kvpmu || flags) { + sbiret = SBI_ERR_INVALID_PARAM; + goto out; + } + + if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) { + kvm_pmu_clear_snapshot_area(vcpu); + return 0; + } + + saddr = saddr_low; + + if (saddr_high != 0) { + if (IS_ENABLED(CONFIG_32BIT)) + saddr |= ((gpa_t)saddr_high << 32); + else + sbiret = SBI_ERR_INVALID_ADDRESS; + goto out; + } + + hva = kvm_vcpu_gfn_to_hva_prot(vcpu, saddr >> PAGE_SHIFT, &writable); + if (kvm_is_error_hva(hva) || !writable) { + sbiret = SBI_ERR_INVALID_ADDRESS; + goto out; + } + + kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC); + if (!kvpmu->sdata) + return -ENOMEM; + + if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) { + kfree(kvpmu->sdata); + sbiret = SBI_ERR_FAILURE; + goto out; + } + + kvpmu->snapshot_addr = saddr; + +out: + retdata->err_val = sbiret; + + return 0; +} + int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu, struct kvm_vcpu_sbi_return *retdata) { @@ -343,20 +495,40 @@ int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base, int i, pmc_index, sbiret = 0; struct kvm_pmc *pmc; int fevent_code; + bool snap_flag_set = flags & SBI_PMU_START_FLAG_INIT_SNAPSHOT; if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) { sbiret = SBI_ERR_INVALID_PARAM; goto out; } + if (snap_flag_set) { + if (kvpmu->snapshot_addr == INVALID_GPA) { + sbiret = SBI_ERR_NO_SHMEM; + goto out; + } + if (kvm_vcpu_read_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata, + sizeof(struct riscv_pmu_snapshot_data))) { + pr_warn("Unable to read snapshot shared memory while starting counters\n"); + sbiret = SBI_ERR_FAILURE; + goto out; + } + } /* Start the counters that have been configured and requested by the guest */ for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) { pmc_index = i + ctr_base; if (!test_bit(pmc_index, kvpmu->pmc_in_use)) continue; + /* The guest started the counter again. Reset the overflow status */ + clear_bit(pmc_index, kvpmu->pmc_overflown); pmc = &kvpmu->pmc[pmc_index]; - if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) + if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) { pmc->counter_val = ival; + } else if (snap_flag_set) { + /* The counter index in the snapshot are relative to the counter base */ + pmc->counter_val = kvpmu->sdata->ctr_values[i]; + } + if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { fevent_code = get_event_code(pmc->event_idx); if (fevent_code >= SBI_PMU_FW_MAX) { @@ -400,12 +572,19 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base, u64 enabled, running; struct kvm_pmc *pmc; int fevent_code; + bool snap_flag_set = flags & SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT; + bool shmem_needs_update = false; if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) { sbiret = SBI_ERR_INVALID_PARAM; goto out; } + if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) { + sbiret = SBI_ERR_NO_SHMEM; + goto out; + } + /* Stop the counters that have been configured and requested by the guest */ for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) { pmc_index = i + ctr_base; @@ -432,21 +611,49 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base, sbiret = SBI_ERR_ALREADY_STOPPED; } - if (flags & SBI_PMU_STOP_FLAG_RESET) { - /* Relase the counter if this is a reset request */ - pmc->counter_val += perf_event_read_value(pmc->perf_event, - &enabled, &running); + if (flags & SBI_PMU_STOP_FLAG_RESET) + /* Release the counter if this is a reset request */ kvm_pmu_release_perf_event(pmc); - } } else { sbiret = SBI_ERR_INVALID_PARAM; } + + if (snap_flag_set && !sbiret) { + if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) + pmc->counter_val = kvpmu->fw_event[fevent_code].value; + else if (pmc->perf_event) + pmc->counter_val += perf_event_read_value(pmc->perf_event, + &enabled, &running); + /* + * The counter and overflow indicies in the snapshot region are w.r.to + * cbase. Modify the set bit in the counter mask instead of the pmc_index + * which indicates the absolute counter index. + */ + if (test_bit(pmc_index, kvpmu->pmc_overflown)) + kvpmu->sdata->ctr_overflow_mask |= BIT(i); + kvpmu->sdata->ctr_values[i] = pmc->counter_val; + shmem_needs_update = true; + } + if (flags & SBI_PMU_STOP_FLAG_RESET) { pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; clear_bit(pmc_index, kvpmu->pmc_in_use); + clear_bit(pmc_index, kvpmu->pmc_overflown); + if (snap_flag_set) { + /* + * Only clear the given counter as the caller is responsible to + * validate both the overflow mask and configured counters. + */ + kvpmu->sdata->ctr_overflow_mask &= ~BIT(i); + shmem_needs_update = true; + } } } + if (shmem_needs_update) + kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata, + sizeof(struct riscv_pmu_snapshot_data)); + out: retdata->err_val = sbiret; @@ -458,7 +665,8 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba unsigned long eidx, u64 evtdata, struct kvm_vcpu_sbi_return *retdata) { - int ctr_idx, ret, sbiret = 0; + int ctr_idx, sbiret = 0; + long ret; bool is_fevent; unsigned long event_code; u32 etype = kvm_pmu_get_perf_event_type(eidx); @@ -517,8 +725,10 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba kvpmu->fw_event[event_code].started = true; } else { ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata); - if (ret) - return ret; + if (ret) { + sbiret = SBI_ERR_NOT_SUPPORTED; + goto out; + } } set_bit(ctr_idx, kvpmu->pmc_in_use); @@ -530,7 +740,19 @@ out: return 0; } -int kvm_riscv_vcpu_pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, +int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx, + struct kvm_vcpu_sbi_return *retdata) +{ + int ret; + + ret = pmu_fw_ctr_read_hi(vcpu, cidx, &retdata->out_val); + if (ret == -EINVAL) + retdata->err_val = SBI_ERR_INVALID_PARAM; + + return 0; +} + +int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, struct kvm_vcpu_sbi_return *retdata) { int ret; @@ -566,6 +788,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) kvpmu->num_hw_ctrs = num_hw_ctrs + 1; kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX; memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event)); + kvpmu->snapshot_addr = INVALID_GPA; if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) { pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA"); @@ -585,6 +808,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) pmc = &kvpmu->pmc[i]; pmc->idx = i; pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; + pmc->vcpu = vcpu; if (i < kvpmu->num_hw_ctrs) { pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW; if (i < 3) @@ -601,7 +825,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) pmc->cinfo.csr = CSR_CYCLE + i; } else { pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW; - pmc->cinfo.width = BITS_PER_LONG - 1; + pmc->cinfo.width = 63; } } @@ -617,14 +841,16 @@ void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu) if (!kvpmu) return; - for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_MAX_COUNTERS) { + for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) { pmc = &kvpmu->pmc[i]; pmc->counter_val = 0; kvm_pmu_release_perf_event(pmc); pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; } - bitmap_zero(kvpmu->pmc_in_use, RISCV_MAX_COUNTERS); + bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS); + bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS); memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event)); + kvm_pmu_clear_snapshot_area(vcpu); } void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu) diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index 72a2ffb8dc..62f409d417 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -138,8 +138,11 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu, unsigned long i; struct kvm_vcpu *tmp; - kvm_for_each_vcpu(i, tmp, vcpu->kvm) - tmp->arch.power_off = true; + kvm_for_each_vcpu(i, tmp, vcpu->kvm) { + spin_lock(&vcpu->arch.mp_state_lock); + WRITE_ONCE(tmp->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED); + spin_unlock(&vcpu->arch.mp_state_lock); + } kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); memset(&run->system_event, 0, sizeof(run->system_event)); diff --git a/arch/riscv/kvm/vcpu_sbi_hsm.c b/arch/riscv/kvm/vcpu_sbi_hsm.c index 7dca0e9381..dce667f4b6 100644 --- a/arch/riscv/kvm/vcpu_sbi_hsm.c +++ b/arch/riscv/kvm/vcpu_sbi_hsm.c @@ -18,13 +18,20 @@ static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu) struct kvm_cpu_context *cp = &vcpu->arch.guest_context; struct kvm_vcpu *target_vcpu; unsigned long target_vcpuid = cp->a0; + int ret = 0; target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid); if (!target_vcpu) return SBI_ERR_INVALID_PARAM; - if (!target_vcpu->arch.power_off) - return SBI_ERR_ALREADY_AVAILABLE; + spin_lock(&target_vcpu->arch.mp_state_lock); + + if (!kvm_riscv_vcpu_stopped(target_vcpu)) { + ret = SBI_ERR_ALREADY_AVAILABLE; + goto out; + } + + spin_lock(&target_vcpu->arch.reset_cntx_lock); reset_cntx = &target_vcpu->arch.guest_reset_context; /* start address */ reset_cntx->sepc = cp->a1; @@ -32,21 +39,35 @@ static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu) reset_cntx->a0 = target_vcpuid; /* private data passed from kernel */ reset_cntx->a1 = cp->a2; + spin_unlock(&target_vcpu->arch.reset_cntx_lock); + kvm_make_request(KVM_REQ_VCPU_RESET, target_vcpu); - kvm_riscv_vcpu_power_on(target_vcpu); + __kvm_riscv_vcpu_power_on(target_vcpu); - return 0; +out: + spin_unlock(&target_vcpu->arch.mp_state_lock); + + return ret; } static int kvm_sbi_hsm_vcpu_stop(struct kvm_vcpu *vcpu) { - if (vcpu->arch.power_off) - return SBI_ERR_FAILURE; + int ret = 0; - kvm_riscv_vcpu_power_off(vcpu); + spin_lock(&vcpu->arch.mp_state_lock); - return 0; + if (kvm_riscv_vcpu_stopped(vcpu)) { + ret = SBI_ERR_FAILURE; + goto out; + } + + __kvm_riscv_vcpu_power_off(vcpu); + +out: + spin_unlock(&vcpu->arch.mp_state_lock); + + return ret; } static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu) @@ -58,7 +79,7 @@ static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu) target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid); if (!target_vcpu) return SBI_ERR_INVALID_PARAM; - if (!target_vcpu->arch.power_off) + if (!kvm_riscv_vcpu_stopped(target_vcpu)) return SBI_HSM_STATE_STARTED; else if (vcpu->stat.generic.blocking) return SBI_HSM_STATE_SUSPENDED; @@ -71,14 +92,11 @@ static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, { int ret = 0; struct kvm_cpu_context *cp = &vcpu->arch.guest_context; - struct kvm *kvm = vcpu->kvm; unsigned long funcid = cp->a6; switch (funcid) { case SBI_EXT_HSM_HART_START: - mutex_lock(&kvm->lock); ret = kvm_sbi_hsm_vcpu_start(vcpu); - mutex_unlock(&kvm->lock); break; case SBI_EXT_HSM_HART_STOP: ret = kvm_sbi_hsm_vcpu_stop(vcpu); diff --git a/arch/riscv/kvm/vcpu_sbi_pmu.c b/arch/riscv/kvm/vcpu_sbi_pmu.c index 7eca72df2c..e4be34e03e 100644 --- a/arch/riscv/kvm/vcpu_sbi_pmu.c +++ b/arch/riscv/kvm/vcpu_sbi_pmu.c @@ -42,9 +42,9 @@ static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, #endif /* * This can fail if perf core framework fails to create an event. - * Forward the error to userspace because it's an error which - * happened within the host kernel. The other option would be - * to convert to an SBI error and forward to the guest. + * No need to forward the error to userspace and exit the guest. + * The operation can continue without profiling. Forward the + * appropriate SBI error to the guest. */ ret = kvm_riscv_vcpu_pmu_ctr_cfg_match(vcpu, cp->a0, cp->a1, cp->a2, cp->a3, temp, retdata); @@ -62,7 +62,16 @@ static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, ret = kvm_riscv_vcpu_pmu_ctr_stop(vcpu, cp->a0, cp->a1, cp->a2, retdata); break; case SBI_EXT_PMU_COUNTER_FW_READ: - ret = kvm_riscv_vcpu_pmu_ctr_read(vcpu, cp->a0, retdata); + ret = kvm_riscv_vcpu_pmu_fw_ctr_read(vcpu, cp->a0, retdata); + break; + case SBI_EXT_PMU_COUNTER_FW_READ_HI: + if (IS_ENABLED(CONFIG_32BIT)) + ret = kvm_riscv_vcpu_pmu_fw_ctr_read_hi(vcpu, cp->a0, retdata); + else + retdata->out_val = 0; + break; + case SBI_EXT_PMU_SNAPSHOT_SET_SHMEM: + ret = kvm_riscv_vcpu_pmu_snapshot_set_shmem(vcpu, cp->a0, cp->a1, cp->a2, retdata); break; default: retdata->err_val = SBI_ERR_NOT_SUPPORTED; diff --git a/arch/riscv/kvm/vcpu_sbi_sta.c b/arch/riscv/kvm/vcpu_sbi_sta.c index d8cf9ca28c..5f35427114 100644 --- a/arch/riscv/kvm/vcpu_sbi_sta.c +++ b/arch/riscv/kvm/vcpu_sbi_sta.c @@ -93,8 +93,8 @@ static int kvm_sbi_sta_steal_time_set_shmem(struct kvm_vcpu *vcpu) if (flags != 0) return SBI_ERR_INVALID_PARAM; - if (shmem_phys_lo == SBI_STA_SHMEM_DISABLE && - shmem_phys_hi == SBI_STA_SHMEM_DISABLE) { + if (shmem_phys_lo == SBI_SHMEM_DISABLE && + shmem_phys_hi == SBI_SHMEM_DISABLE) { vcpu->arch.sta.shmem = INVALID_GPA; return 0; } diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c index ce58bc48e5..7396b8654f 100644 --- a/arch/riscv/kvm/vm.c +++ b/arch/riscv/kvm/vm.c @@ -186,6 +186,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_READONLY_MEM: case KVM_CAP_MP_STATE: case KVM_CAP_IMMEDIATE_EXIT: + case KVM_CAP_SET_GUEST_DEBUG: r = 1; break; case KVM_CAP_NR_VCPUS: |