diff options
Diffstat (limited to 'arch/arm64/kvm/vgic')
-rw-r--r-- | arch/arm64/kvm/vgic/trace.h | 38 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-debug.c | 280 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-init.c | 626 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-irqfd.c | 155 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-its.c | 2908 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-kvm-device.c | 672 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-mmio-v2.c | 561 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-mmio-v3.c | 1186 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-mmio.c | 1118 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-mmio.h | 230 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-v2.c | 480 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-v3.c | 760 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-v4.c | 521 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic.c | 1078 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic.h | 346 |
15 files changed, 10959 insertions, 0 deletions
diff --git a/arch/arm64/kvm/vgic/trace.h b/arch/arm64/kvm/vgic/trace.h new file mode 100644 index 0000000000..83c64401a7 --- /dev/null +++ b/arch/arm64/kvm/vgic/trace.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#if !defined(_TRACE_VGIC_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_VGIC_H + +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm + +TRACE_EVENT(vgic_update_irq_pending, + TP_PROTO(unsigned long vcpu_id, __u32 irq, bool level), + TP_ARGS(vcpu_id, irq, level), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_id ) + __field( __u32, irq ) + __field( bool, level ) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->irq = irq; + __entry->level = level; + ), + + TP_printk("VCPU: %ld, IRQ %d, level: %d", + __entry->vcpu_id, __entry->irq, __entry->level) +); + +#endif /* _TRACE_VGIC_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../arch/arm64/kvm/vgic +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c new file mode 100644 index 0000000000..07aa043712 --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -0,0 +1,280 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2016 Linaro + * Author: Christoffer Dall <christoffer.dall@linaro.org> + */ + +#include <linux/cpu.h> +#include <linux/debugfs.h> +#include <linux/interrupt.h> +#include <linux/kvm_host.h> +#include <linux/seq_file.h> +#include <kvm/arm_vgic.h> +#include <asm/kvm_mmu.h> +#include "vgic.h" + +/* + * Structure to control looping through the entire vgic state. We start at + * zero for each field and move upwards. So, if dist_id is 0 we print the + * distributor info. When dist_id is 1, we have already printed it and move + * on. + * + * When vcpu_id < nr_cpus we print the vcpu info until vcpu_id == nr_cpus and + * so on. + */ +struct vgic_state_iter { + int nr_cpus; + int nr_spis; + int nr_lpis; + int dist_id; + int vcpu_id; + int intid; + int lpi_idx; + u32 *lpi_array; +}; + +static void iter_next(struct vgic_state_iter *iter) +{ + if (iter->dist_id == 0) { + iter->dist_id++; + return; + } + + iter->intid++; + if (iter->intid == VGIC_NR_PRIVATE_IRQS && + ++iter->vcpu_id < iter->nr_cpus) + iter->intid = 0; + + if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS)) { + if (iter->lpi_idx < iter->nr_lpis) + iter->intid = iter->lpi_array[iter->lpi_idx]; + iter->lpi_idx++; + } +} + +static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter, + loff_t pos) +{ + int nr_cpus = atomic_read(&kvm->online_vcpus); + + memset(iter, 0, sizeof(*iter)); + + iter->nr_cpus = nr_cpus; + iter->nr_spis = kvm->arch.vgic.nr_spis; + if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + iter->nr_lpis = vgic_copy_lpi_list(kvm, NULL, &iter->lpi_array); + if (iter->nr_lpis < 0) + iter->nr_lpis = 0; + } + + /* Fast forward to the right position if needed */ + while (pos--) + iter_next(iter); +} + +static bool end_of_vgic(struct vgic_state_iter *iter) +{ + return iter->dist_id > 0 && + iter->vcpu_id == iter->nr_cpus && + iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS) && + iter->lpi_idx > iter->nr_lpis; +} + +static void *vgic_debug_start(struct seq_file *s, loff_t *pos) +{ + struct kvm *kvm = s->private; + struct vgic_state_iter *iter; + + mutex_lock(&kvm->arch.config_lock); + iter = kvm->arch.vgic.iter; + if (iter) { + iter = ERR_PTR(-EBUSY); + goto out; + } + + iter = kmalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) { + iter = ERR_PTR(-ENOMEM); + goto out; + } + + iter_init(kvm, iter, *pos); + kvm->arch.vgic.iter = iter; + + if (end_of_vgic(iter)) + iter = NULL; +out: + mutex_unlock(&kvm->arch.config_lock); + return iter; +} + +static void *vgic_debug_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct kvm *kvm = s->private; + struct vgic_state_iter *iter = kvm->arch.vgic.iter; + + ++*pos; + iter_next(iter); + if (end_of_vgic(iter)) + iter = NULL; + return iter; +} + +static void vgic_debug_stop(struct seq_file *s, void *v) +{ + struct kvm *kvm = s->private; + struct vgic_state_iter *iter; + + /* + * If the seq file wasn't properly opened, there's nothing to clearn + * up. + */ + if (IS_ERR(v)) + return; + + mutex_lock(&kvm->arch.config_lock); + iter = kvm->arch.vgic.iter; + kfree(iter->lpi_array); + kfree(iter); + kvm->arch.vgic.iter = NULL; + mutex_unlock(&kvm->arch.config_lock); +} + +static void print_dist_state(struct seq_file *s, struct vgic_dist *dist) +{ + bool v3 = dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3; + + seq_printf(s, "Distributor\n"); + seq_printf(s, "===========\n"); + seq_printf(s, "vgic_model:\t%s\n", v3 ? "GICv3" : "GICv2"); + seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis); + if (v3) + seq_printf(s, "nr_lpis:\t%d\n", dist->lpi_list_count); + seq_printf(s, "enabled:\t%d\n", dist->enabled); + seq_printf(s, "\n"); + + seq_printf(s, "P=pending_latch, L=line_level, A=active\n"); + seq_printf(s, "E=enabled, H=hw, C=config (level=1, edge=0)\n"); + seq_printf(s, "G=group\n"); +} + +static void print_header(struct seq_file *s, struct vgic_irq *irq, + struct kvm_vcpu *vcpu) +{ + int id = 0; + char *hdr = "SPI "; + + if (vcpu) { + hdr = "VCPU"; + id = vcpu->vcpu_id; + } + + seq_printf(s, "\n"); + seq_printf(s, "%s%2d TYP ID TGT_ID PLAEHCG HWID TARGET SRC PRI VCPU_ID\n", hdr, id); + seq_printf(s, "----------------------------------------------------------------\n"); +} + +static void print_irq_state(struct seq_file *s, struct vgic_irq *irq, + struct kvm_vcpu *vcpu) +{ + char *type; + bool pending; + + if (irq->intid < VGIC_NR_SGIS) + type = "SGI"; + else if (irq->intid < VGIC_NR_PRIVATE_IRQS) + type = "PPI"; + else if (irq->intid < VGIC_MAX_SPI) + type = "SPI"; + else + type = "LPI"; + + if (irq->intid ==0 || irq->intid == VGIC_NR_PRIVATE_IRQS) + print_header(s, irq, vcpu); + + pending = irq->pending_latch; + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + int err; + + err = irq_get_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + &pending); + WARN_ON_ONCE(err); + } + + seq_printf(s, " %s %4d " + " %2d " + "%d%d%d%d%d%d%d " + "%8d " + "%8x " + " %2x " + "%3d " + " %2d " + "\n", + type, irq->intid, + (irq->target_vcpu) ? irq->target_vcpu->vcpu_id : -1, + pending, + irq->line_level, + irq->active, + irq->enabled, + irq->hw, + irq->config == VGIC_CONFIG_LEVEL, + irq->group, + irq->hwintid, + irq->mpidr, + irq->source, + irq->priority, + (irq->vcpu) ? irq->vcpu->vcpu_id : -1); +} + +static int vgic_debug_show(struct seq_file *s, void *v) +{ + struct kvm *kvm = s->private; + struct vgic_state_iter *iter = v; + struct vgic_irq *irq; + struct kvm_vcpu *vcpu = NULL; + unsigned long flags; + + if (iter->dist_id == 0) { + print_dist_state(s, &kvm->arch.vgic); + return 0; + } + + if (!kvm->arch.vgic.initialized) + return 0; + + if (iter->vcpu_id < iter->nr_cpus) + vcpu = kvm_get_vcpu(kvm, iter->vcpu_id); + + irq = vgic_get_irq(kvm, vcpu, iter->intid); + if (!irq) { + seq_printf(s, " LPI %4d freed\n", iter->intid); + return 0; + } + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + print_irq_state(s, irq, vcpu); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + vgic_put_irq(kvm, irq); + return 0; +} + +static const struct seq_operations vgic_debug_sops = { + .start = vgic_debug_start, + .next = vgic_debug_next, + .stop = vgic_debug_stop, + .show = vgic_debug_show +}; + +DEFINE_SEQ_ATTRIBUTE(vgic_debug); + +void vgic_debug_init(struct kvm *kvm) +{ + debugfs_create_file("vgic-state", 0444, kvm->debugfs_dentry, kvm, + &vgic_debug_fops); +} + +void vgic_debug_destroy(struct kvm *kvm) +{ +} diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c new file mode 100644 index 0000000000..e949e1d0fd --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -0,0 +1,626 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015, 2016 ARM Ltd. + */ + +#include <linux/uaccess.h> +#include <linux/interrupt.h> +#include <linux/cpu.h> +#include <linux/kvm_host.h> +#include <kvm/arm_vgic.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_mmu.h> +#include "vgic.h" + +/* + * Initialization rules: there are multiple stages to the vgic + * initialization, both for the distributor and the CPU interfaces. The basic + * idea is that even though the VGIC is not functional or not requested from + * user space, the critical path of the run loop can still call VGIC functions + * that just won't do anything, without them having to check additional + * initialization flags to ensure they don't look at uninitialized data + * structures. + * + * Distributor: + * + * - kvm_vgic_early_init(): initialization of static data that doesn't + * depend on any sizing information or emulation type. No allocation + * is allowed there. + * + * - vgic_init(): allocation and initialization of the generic data + * structures that depend on sizing information (number of CPUs, + * number of interrupts). Also initializes the vcpu specific data + * structures. Can be executed lazily for GICv2. + * + * CPU Interface: + * + * - kvm_vgic_vcpu_init(): initialization of static data that + * doesn't depend on any sizing information or emulation type. No + * allocation is allowed there. + */ + +/* EARLY INIT */ + +/** + * kvm_vgic_early_init() - Initialize static VGIC VCPU data structures + * @kvm: The VM whose VGIC districutor should be initialized + * + * Only do initialization of static structures that don't require any + * allocation or sizing information from userspace. vgic_init() called + * kvm_vgic_dist_init() which takes care of the rest. + */ +void kvm_vgic_early_init(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + + INIT_LIST_HEAD(&dist->lpi_list_head); + INIT_LIST_HEAD(&dist->lpi_translation_cache); + raw_spin_lock_init(&dist->lpi_list_lock); +} + +/* CREATION */ + +/** + * kvm_vgic_create: triggered by the instantiation of the VGIC device by + * user space, either through the legacy KVM_CREATE_IRQCHIP ioctl (v2 only) + * or through the generic KVM_CREATE_DEVICE API ioctl. + * irqchip_in_kernel() tells you if this function succeeded or not. + * @kvm: kvm struct pointer + * @type: KVM_DEV_TYPE_ARM_VGIC_V[23] + */ +int kvm_vgic_create(struct kvm *kvm, u32 type) +{ + struct kvm_vcpu *vcpu; + unsigned long i; + int ret; + + /* + * This function is also called by the KVM_CREATE_IRQCHIP handler, + * which had no chance yet to check the availability of the GICv2 + * emulation. So check this here again. KVM_CREATE_DEVICE does + * the proper checks already. + */ + if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && + !kvm_vgic_global_state.can_emulate_gicv2) + return -ENODEV; + + /* Must be held to avoid race with vCPU creation */ + lockdep_assert_held(&kvm->lock); + + ret = -EBUSY; + if (!lock_all_vcpus(kvm)) + return ret; + + mutex_lock(&kvm->arch.config_lock); + + if (irqchip_in_kernel(kvm)) { + ret = -EEXIST; + goto out_unlock; + } + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (vcpu_has_run_once(vcpu)) + goto out_unlock; + } + ret = 0; + + if (type == KVM_DEV_TYPE_ARM_VGIC_V2) + kvm->max_vcpus = VGIC_V2_MAX_CPUS; + else + kvm->max_vcpus = VGIC_V3_MAX_CPUS; + + if (atomic_read(&kvm->online_vcpus) > kvm->max_vcpus) { + ret = -E2BIG; + goto out_unlock; + } + + kvm->arch.vgic.in_kernel = true; + kvm->arch.vgic.vgic_model = type; + + kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; + + if (type == KVM_DEV_TYPE_ARM_VGIC_V2) + kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; + else + INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions); + +out_unlock: + mutex_unlock(&kvm->arch.config_lock); + unlock_all_vcpus(kvm); + return ret; +} + +/* INIT/DESTROY */ + +/** + * kvm_vgic_dist_init: initialize the dist data structures + * @kvm: kvm struct pointer + * @nr_spis: number of spis, frozen by caller + */ +static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0); + int i; + + dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL_ACCOUNT); + if (!dist->spis) + return -ENOMEM; + + /* + * In the following code we do not take the irq struct lock since + * no other action on irq structs can happen while the VGIC is + * not initialized yet: + * If someone wants to inject an interrupt or does a MMIO access, we + * require prior initialization in case of a virtual GICv3 or trigger + * initialization when using a virtual GICv2. + */ + for (i = 0; i < nr_spis; i++) { + struct vgic_irq *irq = &dist->spis[i]; + + irq->intid = i + VGIC_NR_PRIVATE_IRQS; + INIT_LIST_HEAD(&irq->ap_list); + raw_spin_lock_init(&irq->irq_lock); + irq->vcpu = NULL; + irq->target_vcpu = vcpu0; + kref_init(&irq->refcount); + switch (dist->vgic_model) { + case KVM_DEV_TYPE_ARM_VGIC_V2: + irq->targets = 0; + irq->group = 0; + break; + case KVM_DEV_TYPE_ARM_VGIC_V3: + irq->mpidr = 0; + irq->group = 1; + break; + default: + kfree(dist->spis); + dist->spis = NULL; + return -EINVAL; + } + } + return 0; +} + +/** + * kvm_vgic_vcpu_init() - Initialize static VGIC VCPU data + * structures and register VCPU-specific KVM iodevs + * + * @vcpu: pointer to the VCPU being created and initialized + * + * Only do initialization, but do not actually enable the + * VGIC CPU interface + */ +int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + int ret = 0; + int i; + + vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; + + INIT_LIST_HEAD(&vgic_cpu->ap_list_head); + raw_spin_lock_init(&vgic_cpu->ap_list_lock); + atomic_set(&vgic_cpu->vgic_v3.its_vpe.vlpi_count, 0); + + /* + * Enable and configure all SGIs to be edge-triggered and + * configure all PPIs as level-triggered. + */ + for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { + struct vgic_irq *irq = &vgic_cpu->private_irqs[i]; + + INIT_LIST_HEAD(&irq->ap_list); + raw_spin_lock_init(&irq->irq_lock); + irq->intid = i; + irq->vcpu = NULL; + irq->target_vcpu = vcpu; + kref_init(&irq->refcount); + if (vgic_irq_is_sgi(i)) { + /* SGIs */ + irq->enabled = 1; + irq->config = VGIC_CONFIG_EDGE; + } else { + /* PPIs */ + irq->config = VGIC_CONFIG_LEVEL; + } + } + + if (!irqchip_in_kernel(vcpu->kvm)) + return 0; + + /* + * If we are creating a VCPU with a GICv3 we must also register the + * KVM io device for the redistributor that belongs to this VCPU. + */ + if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + mutex_lock(&vcpu->kvm->slots_lock); + ret = vgic_register_redist_iodev(vcpu); + mutex_unlock(&vcpu->kvm->slots_lock); + } + return ret; +} + +static void kvm_vgic_vcpu_enable(struct kvm_vcpu *vcpu) +{ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_enable(vcpu); + else + vgic_v3_enable(vcpu); +} + +/* + * vgic_init: allocates and initializes dist and vcpu data structures + * depending on two dimensioning parameters: + * - the number of spis + * - the number of vcpus + * The function is generally called when nr_spis has been explicitly set + * by the guest through the KVM DEVICE API. If not nr_spis is set to 256. + * vgic_initialized() returns true when this function has succeeded. + */ +int vgic_init(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int ret = 0, i; + unsigned long idx; + + lockdep_assert_held(&kvm->arch.config_lock); + + if (vgic_initialized(kvm)) + return 0; + + /* Are we also in the middle of creating a VCPU? */ + if (kvm->created_vcpus != atomic_read(&kvm->online_vcpus)) + return -EBUSY; + + /* freeze the number of spis */ + if (!dist->nr_spis) + dist->nr_spis = VGIC_NR_IRQS_LEGACY - VGIC_NR_PRIVATE_IRQS; + + ret = kvm_vgic_dist_init(kvm, dist->nr_spis); + if (ret) + goto out; + + /* Initialize groups on CPUs created before the VGIC type was known */ + kvm_for_each_vcpu(idx, vcpu, kvm) { + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { + struct vgic_irq *irq = &vgic_cpu->private_irqs[i]; + switch (dist->vgic_model) { + case KVM_DEV_TYPE_ARM_VGIC_V3: + irq->group = 1; + irq->mpidr = kvm_vcpu_get_mpidr_aff(vcpu); + break; + case KVM_DEV_TYPE_ARM_VGIC_V2: + irq->group = 0; + irq->targets = 1U << idx; + break; + default: + ret = -EINVAL; + goto out; + } + } + } + + if (vgic_has_its(kvm)) + vgic_lpi_translation_cache_init(kvm); + + /* + * If we have GICv4.1 enabled, unconditionnaly request enable the + * v4 support so that we get HW-accelerated vSGIs. Otherwise, only + * enable it if we present a virtual ITS to the guest. + */ + if (vgic_supports_direct_msis(kvm)) { + ret = vgic_v4_init(kvm); + if (ret) + goto out; + } + + kvm_for_each_vcpu(idx, vcpu, kvm) + kvm_vgic_vcpu_enable(vcpu); + + ret = kvm_vgic_setup_default_irq_routing(kvm); + if (ret) + goto out; + + vgic_debug_init(kvm); + + /* + * If userspace didn't set the GIC implementation revision, + * default to the latest and greatest. You know want it. + */ + if (!dist->implementation_rev) + dist->implementation_rev = KVM_VGIC_IMP_REV_LATEST; + dist->initialized = true; + +out: + return ret; +} + +static void kvm_vgic_dist_destroy(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_redist_region *rdreg, *next; + + dist->ready = false; + dist->initialized = false; + + kfree(dist->spis); + dist->spis = NULL; + dist->nr_spis = 0; + dist->vgic_dist_base = VGIC_ADDR_UNDEF; + + if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + list_for_each_entry_safe(rdreg, next, &dist->rd_regions, list) + vgic_v3_free_redist_region(rdreg); + INIT_LIST_HEAD(&dist->rd_regions); + } else { + dist->vgic_cpu_base = VGIC_ADDR_UNDEF; + } + + if (vgic_has_its(kvm)) + vgic_lpi_translation_cache_destroy(kvm); + + if (vgic_supports_direct_msis(kvm)) + vgic_v4_teardown(kvm); +} + +static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + /* + * Retire all pending LPIs on this vcpu anyway as we're + * going to destroy it. + */ + vgic_flush_pending_lpis(vcpu); + + INIT_LIST_HEAD(&vgic_cpu->ap_list_head); + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + vgic_unregister_redist_iodev(vcpu); + vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; + } +} + +void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + + mutex_lock(&kvm->slots_lock); + __kvm_vgic_vcpu_destroy(vcpu); + mutex_unlock(&kvm->slots_lock); +} + +void kvm_vgic_destroy(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + unsigned long i; + + mutex_lock(&kvm->slots_lock); + + vgic_debug_destroy(kvm); + + kvm_for_each_vcpu(i, vcpu, kvm) + __kvm_vgic_vcpu_destroy(vcpu); + + mutex_lock(&kvm->arch.config_lock); + + kvm_vgic_dist_destroy(kvm); + + mutex_unlock(&kvm->arch.config_lock); + mutex_unlock(&kvm->slots_lock); +} + +/** + * vgic_lazy_init: Lazy init is only allowed if the GIC exposed to the guest + * is a GICv2. A GICv3 must be explicitly initialized by userspace using the + * KVM_DEV_ARM_VGIC_GRP_CTRL KVM_DEVICE group. + * @kvm: kvm struct pointer + */ +int vgic_lazy_init(struct kvm *kvm) +{ + int ret = 0; + + if (unlikely(!vgic_initialized(kvm))) { + /* + * We only provide the automatic initialization of the VGIC + * for the legacy case of a GICv2. Any other type must + * be explicitly initialized once setup with the respective + * KVM device call. + */ + if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) + return -EBUSY; + + mutex_lock(&kvm->arch.config_lock); + ret = vgic_init(kvm); + mutex_unlock(&kvm->arch.config_lock); + } + + return ret; +} + +/* RESOURCE MAPPING */ + +/** + * Map the MMIO regions depending on the VGIC model exposed to the guest + * called on the first VCPU run. + * Also map the virtual CPU interface into the VM. + * v2 calls vgic_init() if not already done. + * v3 and derivatives return an error if the VGIC is not initialized. + * vgic_ready() returns true if this function has succeeded. + * @kvm: kvm struct pointer + */ +int kvm_vgic_map_resources(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + enum vgic_type type; + gpa_t dist_base; + int ret = 0; + + if (likely(vgic_ready(kvm))) + return 0; + + mutex_lock(&kvm->slots_lock); + mutex_lock(&kvm->arch.config_lock); + if (vgic_ready(kvm)) + goto out; + + if (!irqchip_in_kernel(kvm)) + goto out; + + if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) { + ret = vgic_v2_map_resources(kvm); + type = VGIC_V2; + } else { + ret = vgic_v3_map_resources(kvm); + type = VGIC_V3; + } + + if (ret) + goto out; + + dist->ready = true; + dist_base = dist->vgic_dist_base; + mutex_unlock(&kvm->arch.config_lock); + + ret = vgic_register_dist_iodev(kvm, dist_base, type); + if (ret) + kvm_err("Unable to register VGIC dist MMIO regions\n"); + + goto out_slots; +out: + mutex_unlock(&kvm->arch.config_lock); +out_slots: + mutex_unlock(&kvm->slots_lock); + + if (ret) + kvm_vgic_destroy(kvm); + + return ret; +} + +/* GENERIC PROBE */ + +void kvm_vgic_cpu_up(void) +{ + enable_percpu_irq(kvm_vgic_global_state.maint_irq, 0); +} + + +void kvm_vgic_cpu_down(void) +{ + disable_percpu_irq(kvm_vgic_global_state.maint_irq); +} + +static irqreturn_t vgic_maintenance_handler(int irq, void *data) +{ + /* + * We cannot rely on the vgic maintenance interrupt to be + * delivered synchronously. This means we can only use it to + * exit the VM, and we perform the handling of EOIed + * interrupts on the exit path (see vgic_fold_lr_state). + */ + return IRQ_HANDLED; +} + +static struct gic_kvm_info *gic_kvm_info; + +void __init vgic_set_kvm_info(const struct gic_kvm_info *info) +{ + BUG_ON(gic_kvm_info != NULL); + gic_kvm_info = kmalloc(sizeof(*info), GFP_KERNEL); + if (gic_kvm_info) + *gic_kvm_info = *info; +} + +/** + * kvm_vgic_init_cpu_hardware - initialize the GIC VE hardware + * + * For a specific CPU, initialize the GIC VE hardware. + */ +void kvm_vgic_init_cpu_hardware(void) +{ + BUG_ON(preemptible()); + + /* + * We want to make sure the list registers start out clear so that we + * only have the program the used registers. + */ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_init_lrs(); + else + kvm_call_hyp(__vgic_v3_init_lrs); +} + +/** + * kvm_vgic_hyp_init: populates the kvm_vgic_global_state variable + * according to the host GIC model. Accordingly calls either + * vgic_v2/v3_probe which registers the KVM_DEVICE that can be + * instantiated by a guest later on . + */ +int kvm_vgic_hyp_init(void) +{ + bool has_mask; + int ret; + + if (!gic_kvm_info) + return -ENODEV; + + has_mask = !gic_kvm_info->no_maint_irq_mask; + + if (has_mask && !gic_kvm_info->maint_irq) { + kvm_err("No vgic maintenance irq\n"); + return -ENXIO; + } + + /* + * If we get one of these oddball non-GICs, taint the kernel, + * as we have no idea of how they *really* behave. + */ + if (gic_kvm_info->no_hw_deactivation) { + kvm_info("Non-architectural vgic, tainting kernel\n"); + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); + kvm_vgic_global_state.no_hw_deactivation = true; + } + + switch (gic_kvm_info->type) { + case GIC_V2: + ret = vgic_v2_probe(gic_kvm_info); + break; + case GIC_V3: + ret = vgic_v3_probe(gic_kvm_info); + if (!ret) { + static_branch_enable(&kvm_vgic_global_state.gicv3_cpuif); + kvm_info("GIC system register CPU interface enabled\n"); + } + break; + default: + ret = -ENODEV; + } + + kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq; + + kfree(gic_kvm_info); + gic_kvm_info = NULL; + + if (ret) + return ret; + + if (!has_mask && !kvm_vgic_global_state.maint_irq) + return 0; + + ret = request_percpu_irq(kvm_vgic_global_state.maint_irq, + vgic_maintenance_handler, + "vgic", kvm_get_running_vcpus()); + if (ret) { + kvm_err("Cannot register interrupt %d\n", + kvm_vgic_global_state.maint_irq); + return ret; + } + + kvm_info("vgic interrupt IRQ%d\n", kvm_vgic_global_state.maint_irq); + return 0; +} diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c new file mode 100644 index 0000000000..475059bace --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-irqfd.c @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015, 2016 ARM Ltd. + */ + +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <trace/events/kvm.h> +#include <kvm/arm_vgic.h> +#include "vgic.h" + +/** + * vgic_irqfd_set_irq: inject the IRQ corresponding to the + * irqchip routing entry + * + * This is the entry point for irqfd IRQ injection + */ +static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, + int level, bool line_status) +{ + unsigned int spi_id = e->irqchip.pin + VGIC_NR_PRIVATE_IRQS; + + if (!vgic_valid_spi(kvm, spi_id)) + return -EINVAL; + return kvm_vgic_inject_irq(kvm, 0, spi_id, level, NULL); +} + +/** + * kvm_set_routing_entry: populate a kvm routing entry + * from a user routing entry + * + * @kvm: the VM this entry is applied to + * @e: kvm kernel routing entry handle + * @ue: user api routing entry handle + * return 0 on success, -EINVAL on errors. + */ +int kvm_set_routing_entry(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +{ + int r = -EINVAL; + + switch (ue->type) { + case KVM_IRQ_ROUTING_IRQCHIP: + e->set = vgic_irqfd_set_irq; + e->irqchip.irqchip = ue->u.irqchip.irqchip; + e->irqchip.pin = ue->u.irqchip.pin; + if ((e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS) || + (e->irqchip.irqchip >= KVM_NR_IRQCHIPS)) + goto out; + break; + case KVM_IRQ_ROUTING_MSI: + e->set = kvm_set_msi; + e->msi.address_lo = ue->u.msi.address_lo; + e->msi.address_hi = ue->u.msi.address_hi; + e->msi.data = ue->u.msi.data; + e->msi.flags = ue->flags; + e->msi.devid = ue->u.msi.devid; + break; + default: + goto out; + } + r = 0; +out: + return r; +} + +static void kvm_populate_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm_msi *msi) +{ + msi->address_lo = e->msi.address_lo; + msi->address_hi = e->msi.address_hi; + msi->data = e->msi.data; + msi->flags = e->msi.flags; + msi->devid = e->msi.devid; +} +/** + * kvm_set_msi: inject the MSI corresponding to the + * MSI routing entry + * + * This is the entry point for irqfd MSI injection + * and userspace MSI injection. + */ +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, + int level, bool line_status) +{ + struct kvm_msi msi; + + if (!vgic_has_its(kvm)) + return -ENODEV; + + if (!level) + return -1; + + kvm_populate_msi(e, &msi); + return vgic_its_inject_msi(kvm, &msi); +} + +/** + * kvm_arch_set_irq_inatomic: fast-path for irqfd injection + */ +int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) +{ + if (!level) + return -EWOULDBLOCK; + + switch (e->type) { + case KVM_IRQ_ROUTING_MSI: { + struct kvm_msi msi; + + if (!vgic_has_its(kvm)) + break; + + kvm_populate_msi(e, &msi); + return vgic_its_inject_cached_translation(kvm, &msi); + } + + case KVM_IRQ_ROUTING_IRQCHIP: + /* + * Injecting SPIs is always possible in atomic context + * as long as the damn vgic is initialized. + */ + if (unlikely(!vgic_initialized(kvm))) + break; + return vgic_irqfd_set_irq(e, kvm, irq_source_id, 1, line_status); + } + + return -EWOULDBLOCK; +} + +int kvm_vgic_setup_default_irq_routing(struct kvm *kvm) +{ + struct kvm_irq_routing_entry *entries; + struct vgic_dist *dist = &kvm->arch.vgic; + u32 nr = dist->nr_spis; + int i, ret; + + entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL_ACCOUNT); + if (!entries) + return -ENOMEM; + + for (i = 0; i < nr; i++) { + entries[i].gsi = i; + entries[i].type = KVM_IRQ_ROUTING_IRQCHIP; + entries[i].u.irqchip.irqchip = 0; + entries[i].u.irqchip.pin = i; + } + ret = kvm_set_irq_routing(kvm, entries, nr, 0); + kfree(entries); + return ret; +} diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c new file mode 100644 index 0000000000..c420723548 --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -0,0 +1,2908 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * GICv3 ITS emulation + * + * Copyright (C) 2015,2016 ARM Ltd. + * Author: Andre Przywara <andre.przywara@arm.com> + */ + +#include <linux/cpu.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/interrupt.h> +#include <linux/list.h> +#include <linux/uaccess.h> +#include <linux/list_sort.h> + +#include <linux/irqchip/arm-gic-v3.h> + +#include <asm/kvm_emulate.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_mmu.h> + +#include "vgic.h" +#include "vgic-mmio.h" + +static int vgic_its_save_tables_v0(struct vgic_its *its); +static int vgic_its_restore_tables_v0(struct vgic_its *its); +static int vgic_its_commit_v0(struct vgic_its *its); +static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, + struct kvm_vcpu *filter_vcpu, bool needs_inv); + +/* + * Creates a new (reference to a) struct vgic_irq for a given LPI. + * If this LPI is already mapped on another ITS, we increase its refcount + * and return a pointer to the existing structure. + * If this is a "new" LPI, we allocate and initialize a new struct vgic_irq. + * This function returns a pointer to the _unlocked_ structure. + */ +static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, + struct kvm_vcpu *vcpu) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intid), *oldirq; + unsigned long flags; + int ret; + + /* In this case there is no put, since we keep the reference. */ + if (irq) + return irq; + + irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL_ACCOUNT); + if (!irq) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&irq->lpi_list); + INIT_LIST_HEAD(&irq->ap_list); + raw_spin_lock_init(&irq->irq_lock); + + irq->config = VGIC_CONFIG_EDGE; + kref_init(&irq->refcount); + irq->intid = intid; + irq->target_vcpu = vcpu; + irq->group = 1; + + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + + /* + * There could be a race with another vgic_add_lpi(), so we need to + * check that we don't add a second list entry with the same LPI. + */ + list_for_each_entry(oldirq, &dist->lpi_list_head, lpi_list) { + if (oldirq->intid != intid) + continue; + + /* Someone was faster with adding this LPI, lets use that. */ + kfree(irq); + irq = oldirq; + + /* + * This increases the refcount, the caller is expected to + * call vgic_put_irq() on the returned pointer once it's + * finished with the IRQ. + */ + vgic_get_irq_kref(irq); + + goto out_unlock; + } + + list_add_tail(&irq->lpi_list, &dist->lpi_list_head); + dist->lpi_list_count++; + +out_unlock: + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); + + /* + * We "cache" the configuration table entries in our struct vgic_irq's. + * However we only have those structs for mapped IRQs, so we read in + * the respective config data from memory here upon mapping the LPI. + * + * Should any of these fail, behave as if we couldn't create the LPI + * by dropping the refcount and returning the error. + */ + ret = update_lpi_config(kvm, irq, NULL, false); + if (ret) { + vgic_put_irq(kvm, irq); + return ERR_PTR(ret); + } + + ret = vgic_v3_lpi_sync_pending_status(kvm, irq); + if (ret) { + vgic_put_irq(kvm, irq); + return ERR_PTR(ret); + } + + return irq; +} + +struct its_device { + struct list_head dev_list; + + /* the head for the list of ITTEs */ + struct list_head itt_head; + u32 num_eventid_bits; + gpa_t itt_addr; + u32 device_id; +}; + +#define COLLECTION_NOT_MAPPED ((u32)~0) + +struct its_collection { + struct list_head coll_list; + + u32 collection_id; + u32 target_addr; +}; + +#define its_is_collection_mapped(coll) ((coll) && \ + ((coll)->target_addr != COLLECTION_NOT_MAPPED)) + +struct its_ite { + struct list_head ite_list; + + struct vgic_irq *irq; + struct its_collection *collection; + u32 event_id; +}; + +struct vgic_translation_cache_entry { + struct list_head entry; + phys_addr_t db; + u32 devid; + u32 eventid; + struct vgic_irq *irq; +}; + +/** + * struct vgic_its_abi - ITS abi ops and settings + * @cte_esz: collection table entry size + * @dte_esz: device table entry size + * @ite_esz: interrupt translation table entry size + * @save tables: save the ITS tables into guest RAM + * @restore_tables: restore the ITS internal structs from tables + * stored in guest RAM + * @commit: initialize the registers which expose the ABI settings, + * especially the entry sizes + */ +struct vgic_its_abi { + int cte_esz; + int dte_esz; + int ite_esz; + int (*save_tables)(struct vgic_its *its); + int (*restore_tables)(struct vgic_its *its); + int (*commit)(struct vgic_its *its); +}; + +#define ABI_0_ESZ 8 +#define ESZ_MAX ABI_0_ESZ + +static const struct vgic_its_abi its_table_abi_versions[] = { + [0] = { + .cte_esz = ABI_0_ESZ, + .dte_esz = ABI_0_ESZ, + .ite_esz = ABI_0_ESZ, + .save_tables = vgic_its_save_tables_v0, + .restore_tables = vgic_its_restore_tables_v0, + .commit = vgic_its_commit_v0, + }, +}; + +#define NR_ITS_ABIS ARRAY_SIZE(its_table_abi_versions) + +inline const struct vgic_its_abi *vgic_its_get_abi(struct vgic_its *its) +{ + return &its_table_abi_versions[its->abi_rev]; +} + +static int vgic_its_set_abi(struct vgic_its *its, u32 rev) +{ + const struct vgic_its_abi *abi; + + its->abi_rev = rev; + abi = vgic_its_get_abi(its); + return abi->commit(its); +} + +/* + * Find and returns a device in the device table for an ITS. + * Must be called with the its_lock mutex held. + */ +static struct its_device *find_its_device(struct vgic_its *its, u32 device_id) +{ + struct its_device *device; + + list_for_each_entry(device, &its->device_list, dev_list) + if (device_id == device->device_id) + return device; + + return NULL; +} + +/* + * Find and returns an interrupt translation table entry (ITTE) for a given + * Device ID/Event ID pair on an ITS. + * Must be called with the its_lock mutex held. + */ +static struct its_ite *find_ite(struct vgic_its *its, u32 device_id, + u32 event_id) +{ + struct its_device *device; + struct its_ite *ite; + + device = find_its_device(its, device_id); + if (device == NULL) + return NULL; + + list_for_each_entry(ite, &device->itt_head, ite_list) + if (ite->event_id == event_id) + return ite; + + return NULL; +} + +/* To be used as an iterator this macro misses the enclosing parentheses */ +#define for_each_lpi_its(dev, ite, its) \ + list_for_each_entry(dev, &(its)->device_list, dev_list) \ + list_for_each_entry(ite, &(dev)->itt_head, ite_list) + +#define GIC_LPI_OFFSET 8192 + +#define VITS_TYPER_IDBITS 16 +#define VITS_TYPER_DEVBITS 16 +#define VITS_DTE_MAX_DEVID_OFFSET (BIT(14) - 1) +#define VITS_ITE_MAX_EVENTID_OFFSET (BIT(16) - 1) + +/* + * Finds and returns a collection in the ITS collection table. + * Must be called with the its_lock mutex held. + */ +static struct its_collection *find_collection(struct vgic_its *its, int coll_id) +{ + struct its_collection *collection; + + list_for_each_entry(collection, &its->collection_list, coll_list) { + if (coll_id == collection->collection_id) + return collection; + } + + return NULL; +} + +#define LPI_PROP_ENABLE_BIT(p) ((p) & LPI_PROP_ENABLED) +#define LPI_PROP_PRIORITY(p) ((p) & 0xfc) + +/* + * Reads the configuration data for a given LPI from guest memory and + * updates the fields in struct vgic_irq. + * If filter_vcpu is not NULL, applies only if the IRQ is targeting this + * VCPU. Unconditionally applies if filter_vcpu is NULL. + */ +static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, + struct kvm_vcpu *filter_vcpu, bool needs_inv) +{ + u64 propbase = GICR_PROPBASER_ADDRESS(kvm->arch.vgic.propbaser); + u8 prop; + int ret; + unsigned long flags; + + ret = kvm_read_guest_lock(kvm, propbase + irq->intid - GIC_LPI_OFFSET, + &prop, 1); + + if (ret) + return ret; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + if (!filter_vcpu || filter_vcpu == irq->target_vcpu) { + irq->priority = LPI_PROP_PRIORITY(prop); + irq->enabled = LPI_PROP_ENABLE_BIT(prop); + + if (!irq->hw) { + vgic_queue_irq_unlock(kvm, irq, flags); + return 0; + } + } + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + if (irq->hw) + return its_prop_update_vlpi(irq->host_irq, prop, needs_inv); + + return 0; +} + +/* + * Create a snapshot of the current LPIs targeting @vcpu, so that we can + * enumerate those LPIs without holding any lock. + * Returns their number and puts the kmalloc'ed array into intid_ptr. + */ +int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_irq *irq; + unsigned long flags; + u32 *intids; + int irq_count, i = 0; + + /* + * There is an obvious race between allocating the array and LPIs + * being mapped/unmapped. If we ended up here as a result of a + * command, we're safe (locks are held, preventing another + * command). If coming from another path (such as enabling LPIs), + * we must be careful not to overrun the array. + */ + irq_count = READ_ONCE(dist->lpi_list_count); + intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL_ACCOUNT); + if (!intids) + return -ENOMEM; + + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { + if (i == irq_count) + break; + /* We don't need to "get" the IRQ, as we hold the list lock. */ + if (vcpu && irq->target_vcpu != vcpu) + continue; + intids[i++] = irq->intid; + } + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); + + *intid_ptr = intids; + return i; +} + +static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu) +{ + int ret = 0; + unsigned long flags; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->target_vcpu = vcpu; + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + if (irq->hw) { + struct its_vlpi_map map; + + ret = its_get_vlpi(irq->host_irq, &map); + if (ret) + return ret; + + if (map.vpe) + atomic_dec(&map.vpe->vlpi_count); + map.vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + atomic_inc(&map.vpe->vlpi_count); + + ret = its_map_vlpi(irq->host_irq, &map); + } + + return ret; +} + +/* + * Promotes the ITS view of affinity of an ITTE (which redistributor this LPI + * is targeting) to the VGIC's view, which deals with target VCPUs. + * Needs to be called whenever either the collection for a LPIs has + * changed or the collection itself got retargeted. + */ +static void update_affinity_ite(struct kvm *kvm, struct its_ite *ite) +{ + struct kvm_vcpu *vcpu; + + if (!its_is_collection_mapped(ite->collection)) + return; + + vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr); + update_affinity(ite->irq, vcpu); +} + +/* + * Updates the target VCPU for every LPI targeting this collection. + * Must be called with the its_lock mutex held. + */ +static void update_affinity_collection(struct kvm *kvm, struct vgic_its *its, + struct its_collection *coll) +{ + struct its_device *device; + struct its_ite *ite; + + for_each_lpi_its(device, ite, its) { + if (ite->collection != coll) + continue; + + update_affinity_ite(kvm, ite); + } +} + +static u32 max_lpis_propbaser(u64 propbaser) +{ + int nr_idbits = (propbaser & 0x1f) + 1; + + return 1U << min(nr_idbits, INTERRUPT_ID_BITS_ITS); +} + +/* + * Sync the pending table pending bit of LPIs targeting @vcpu + * with our own data structures. This relies on the LPI being + * mapped before. + */ +static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) +{ + gpa_t pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser); + struct vgic_irq *irq; + int last_byte_offset = -1; + int ret = 0; + u32 *intids; + int nr_irqs, i; + unsigned long flags; + u8 pendmask; + + nr_irqs = vgic_copy_lpi_list(vcpu->kvm, vcpu, &intids); + if (nr_irqs < 0) + return nr_irqs; + + for (i = 0; i < nr_irqs; i++) { + int byte_offset, bit_nr; + + byte_offset = intids[i] / BITS_PER_BYTE; + bit_nr = intids[i] % BITS_PER_BYTE; + + /* + * For contiguously allocated LPIs chances are we just read + * this very same byte in the last iteration. Reuse that. + */ + if (byte_offset != last_byte_offset) { + ret = kvm_read_guest_lock(vcpu->kvm, + pendbase + byte_offset, + &pendmask, 1); + if (ret) { + kfree(intids); + return ret; + } + last_byte_offset = byte_offset; + } + + irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]); + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->pending_latch = pendmask & (1U << bit_nr); + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + vgic_put_irq(vcpu->kvm, irq); + } + + kfree(intids); + + return ret; +} + +static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + u64 reg = GITS_TYPER_PLPIS; + + /* + * We use linear CPU numbers for redistributor addressing, + * so GITS_TYPER.PTA is 0. + * Also we force all PROPBASER registers to be the same, so + * CommonLPIAff is 0 as well. + * To avoid memory waste in the guest, we keep the number of IDBits and + * DevBits low - as least for the time being. + */ + reg |= GIC_ENCODE_SZ(VITS_TYPER_DEVBITS, 5) << GITS_TYPER_DEVBITS_SHIFT; + reg |= GIC_ENCODE_SZ(VITS_TYPER_IDBITS, 5) << GITS_TYPER_IDBITS_SHIFT; + reg |= GIC_ENCODE_SZ(abi->ite_esz, 4) << GITS_TYPER_ITT_ENTRY_SIZE_SHIFT; + + return extract_bytes(reg, addr & 7, len); +} + +static unsigned long vgic_mmio_read_its_iidr(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + u32 val; + + val = (its->abi_rev << GITS_IIDR_REV_SHIFT) & GITS_IIDR_REV_MASK; + val |= (PRODUCT_ID_KVM << GITS_IIDR_PRODUCTID_SHIFT) | IMPLEMENTER_ARM; + return val; +} + +static int vgic_mmio_uaccess_write_its_iidr(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 rev = GITS_IIDR_REV(val); + + if (rev >= NR_ITS_ABIS) + return -EINVAL; + return vgic_its_set_abi(its, rev); +} + +static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + switch (addr & 0xffff) { + case GITS_PIDR0: + return 0x92; /* part number, bits[7:0] */ + case GITS_PIDR1: + return 0xb4; /* part number, bits[11:8] */ + case GITS_PIDR2: + return GIC_PIDR2_ARCH_GICv3 | 0x0b; + case GITS_PIDR4: + return 0x40; /* This is a 64K software visible page */ + /* The following are the ID registers for (any) GIC. */ + case GITS_CIDR0: + return 0x0d; + case GITS_CIDR1: + return 0xf0; + case GITS_CIDR2: + return 0x05; + case GITS_CIDR3: + return 0xb1; + } + + return 0; +} + +static struct vgic_irq *__vgic_its_check_cache(struct vgic_dist *dist, + phys_addr_t db, + u32 devid, u32 eventid) +{ + struct vgic_translation_cache_entry *cte; + + list_for_each_entry(cte, &dist->lpi_translation_cache, entry) { + /* + * If we hit a NULL entry, there is nothing after this + * point. + */ + if (!cte->irq) + break; + + if (cte->db != db || cte->devid != devid || + cte->eventid != eventid) + continue; + + /* + * Move this entry to the head, as it is the most + * recently used. + */ + if (!list_is_first(&cte->entry, &dist->lpi_translation_cache)) + list_move(&cte->entry, &dist->lpi_translation_cache); + + return cte->irq; + } + + return NULL; +} + +static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db, + u32 devid, u32 eventid) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_irq *irq; + unsigned long flags; + + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + + irq = __vgic_its_check_cache(dist, db, devid, eventid); + if (irq) + vgic_get_irq_kref(irq); + + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); + + return irq; +} + +static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its, + u32 devid, u32 eventid, + struct vgic_irq *irq) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_translation_cache_entry *cte; + unsigned long flags; + phys_addr_t db; + + /* Do not cache a directly injected interrupt */ + if (irq->hw) + return; + + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + + if (unlikely(list_empty(&dist->lpi_translation_cache))) + goto out; + + /* + * We could have raced with another CPU caching the same + * translation behind our back, so let's check it is not in + * already + */ + db = its->vgic_its_base + GITS_TRANSLATER; + if (__vgic_its_check_cache(dist, db, devid, eventid)) + goto out; + + /* Always reuse the last entry (LRU policy) */ + cte = list_last_entry(&dist->lpi_translation_cache, + typeof(*cte), entry); + + /* + * Caching the translation implies having an extra reference + * to the interrupt, so drop the potential reference on what + * was in the cache, and increment it on the new interrupt. + */ + if (cte->irq) + __vgic_put_lpi_locked(kvm, cte->irq); + + vgic_get_irq_kref(irq); + + cte->db = db; + cte->devid = devid; + cte->eventid = eventid; + cte->irq = irq; + + /* Move the new translation to the head of the list */ + list_move(&cte->entry, &dist->lpi_translation_cache); + +out: + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); +} + +void vgic_its_invalidate_cache(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_translation_cache_entry *cte; + unsigned long flags; + + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + + list_for_each_entry(cte, &dist->lpi_translation_cache, entry) { + /* + * If we hit a NULL entry, there is nothing after this + * point. + */ + if (!cte->irq) + break; + + __vgic_put_lpi_locked(kvm, cte->irq); + cte->irq = NULL; + } + + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); +} + +int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, + u32 devid, u32 eventid, struct vgic_irq **irq) +{ + struct kvm_vcpu *vcpu; + struct its_ite *ite; + + if (!its->enabled) + return -EBUSY; + + ite = find_ite(its, devid, eventid); + if (!ite || !its_is_collection_mapped(ite->collection)) + return E_ITS_INT_UNMAPPED_INTERRUPT; + + vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr); + if (!vcpu) + return E_ITS_INT_UNMAPPED_INTERRUPT; + + if (!vgic_lpis_enabled(vcpu)) + return -EBUSY; + + vgic_its_cache_translation(kvm, its, devid, eventid, ite->irq); + + *irq = ite->irq; + return 0; +} + +struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi) +{ + u64 address; + struct kvm_io_device *kvm_io_dev; + struct vgic_io_device *iodev; + + if (!vgic_has_its(kvm)) + return ERR_PTR(-ENODEV); + + if (!(msi->flags & KVM_MSI_VALID_DEVID)) + return ERR_PTR(-EINVAL); + + address = (u64)msi->address_hi << 32 | msi->address_lo; + + kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address); + if (!kvm_io_dev) + return ERR_PTR(-EINVAL); + + if (kvm_io_dev->ops != &kvm_io_gic_ops) + return ERR_PTR(-EINVAL); + + iodev = container_of(kvm_io_dev, struct vgic_io_device, dev); + if (iodev->iodev_type != IODEV_ITS) + return ERR_PTR(-EINVAL); + + return iodev->its; +} + +/* + * Find the target VCPU and the LPI number for a given devid/eventid pair + * and make this IRQ pending, possibly injecting it. + * Must be called with the its_lock mutex held. + * Returns 0 on success, a positive error value for any ITS mapping + * related errors and negative error values for generic errors. + */ +static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, + u32 devid, u32 eventid) +{ + struct vgic_irq *irq = NULL; + unsigned long flags; + int err; + + err = vgic_its_resolve_lpi(kvm, its, devid, eventid, &irq); + if (err) + return err; + + if (irq->hw) + return irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, true); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->pending_latch = true; + vgic_queue_irq_unlock(kvm, irq, flags); + + return 0; +} + +int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi) +{ + struct vgic_irq *irq; + unsigned long flags; + phys_addr_t db; + + db = (u64)msi->address_hi << 32 | msi->address_lo; + irq = vgic_its_check_cache(kvm, db, msi->devid, msi->data); + if (!irq) + return -EWOULDBLOCK; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->pending_latch = true; + vgic_queue_irq_unlock(kvm, irq, flags); + vgic_put_irq(kvm, irq); + + return 0; +} + +/* + * Queries the KVM IO bus framework to get the ITS pointer from the given + * doorbell address. + * We then call vgic_its_trigger_msi() with the decoded data. + * According to the KVM_SIGNAL_MSI API description returns 1 on success. + */ +int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi) +{ + struct vgic_its *its; + int ret; + + if (!vgic_its_inject_cached_translation(kvm, msi)) + return 1; + + its = vgic_msi_to_its(kvm, msi); + if (IS_ERR(its)) + return PTR_ERR(its); + + mutex_lock(&its->its_lock); + ret = vgic_its_trigger_msi(kvm, its, msi->devid, msi->data); + mutex_unlock(&its->its_lock); + + if (ret < 0) + return ret; + + /* + * KVM_SIGNAL_MSI demands a return value > 0 for success and 0 + * if the guest has blocked the MSI. So we map any LPI mapping + * related error to that. + */ + if (ret) + return 0; + else + return 1; +} + +/* Requires the its_lock to be held. */ +static void its_free_ite(struct kvm *kvm, struct its_ite *ite) +{ + list_del(&ite->ite_list); + + /* This put matches the get in vgic_add_lpi. */ + if (ite->irq) { + if (ite->irq->hw) + WARN_ON(its_unmap_vlpi(ite->irq->host_irq)); + + vgic_put_irq(kvm, ite->irq); + } + + kfree(ite); +} + +static u64 its_cmd_mask_field(u64 *its_cmd, int word, int shift, int size) +{ + return (le64_to_cpu(its_cmd[word]) >> shift) & (BIT_ULL(size) - 1); +} + +#define its_cmd_get_command(cmd) its_cmd_mask_field(cmd, 0, 0, 8) +#define its_cmd_get_deviceid(cmd) its_cmd_mask_field(cmd, 0, 32, 32) +#define its_cmd_get_size(cmd) (its_cmd_mask_field(cmd, 1, 0, 5) + 1) +#define its_cmd_get_id(cmd) its_cmd_mask_field(cmd, 1, 0, 32) +#define its_cmd_get_physical_id(cmd) its_cmd_mask_field(cmd, 1, 32, 32) +#define its_cmd_get_collection(cmd) its_cmd_mask_field(cmd, 2, 0, 16) +#define its_cmd_get_ittaddr(cmd) (its_cmd_mask_field(cmd, 2, 8, 44) << 8) +#define its_cmd_get_target_addr(cmd) its_cmd_mask_field(cmd, 2, 16, 32) +#define its_cmd_get_validbit(cmd) its_cmd_mask_field(cmd, 2, 63, 1) + +/* + * The DISCARD command frees an Interrupt Translation Table Entry (ITTE). + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_discard(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + u32 event_id = its_cmd_get_id(its_cmd); + struct its_ite *ite; + + ite = find_ite(its, device_id, event_id); + if (ite && its_is_collection_mapped(ite->collection)) { + /* + * Though the spec talks about removing the pending state, we + * don't bother here since we clear the ITTE anyway and the + * pending state is a property of the ITTE struct. + */ + vgic_its_invalidate_cache(kvm); + + its_free_ite(kvm, ite); + return 0; + } + + return E_ITS_DISCARD_UNMAPPED_INTERRUPT; +} + +/* + * The MOVI command moves an ITTE to a different collection. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + u32 event_id = its_cmd_get_id(its_cmd); + u32 coll_id = its_cmd_get_collection(its_cmd); + struct kvm_vcpu *vcpu; + struct its_ite *ite; + struct its_collection *collection; + + ite = find_ite(its, device_id, event_id); + if (!ite) + return E_ITS_MOVI_UNMAPPED_INTERRUPT; + + if (!its_is_collection_mapped(ite->collection)) + return E_ITS_MOVI_UNMAPPED_COLLECTION; + + collection = find_collection(its, coll_id); + if (!its_is_collection_mapped(collection)) + return E_ITS_MOVI_UNMAPPED_COLLECTION; + + ite->collection = collection; + vcpu = kvm_get_vcpu(kvm, collection->target_addr); + + vgic_its_invalidate_cache(kvm); + + return update_affinity(ite->irq, vcpu); +} + +static bool __is_visible_gfn_locked(struct vgic_its *its, gpa_t gpa) +{ + gfn_t gfn = gpa >> PAGE_SHIFT; + int idx; + bool ret; + + idx = srcu_read_lock(&its->dev->kvm->srcu); + ret = kvm_is_visible_gfn(its->dev->kvm, gfn); + srcu_read_unlock(&its->dev->kvm->srcu, idx); + return ret; +} + +/* + * Check whether an ID can be stored into the corresponding guest table. + * For a direct table this is pretty easy, but gets a bit nasty for + * indirect tables. We check whether the resulting guest physical address + * is actually valid (covered by a memslot and guest accessible). + * For this we have to read the respective first level entry. + */ +static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, + gpa_t *eaddr) +{ + int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; + u64 indirect_ptr, type = GITS_BASER_TYPE(baser); + phys_addr_t base = GITS_BASER_ADDR_48_to_52(baser); + int esz = GITS_BASER_ENTRY_SIZE(baser); + int index; + + switch (type) { + case GITS_BASER_TYPE_DEVICE: + if (id >= BIT_ULL(VITS_TYPER_DEVBITS)) + return false; + break; + case GITS_BASER_TYPE_COLLECTION: + /* as GITS_TYPER.CIL == 0, ITS supports 16-bit collection ID */ + if (id >= BIT_ULL(16)) + return false; + break; + default: + return false; + } + + if (!(baser & GITS_BASER_INDIRECT)) { + phys_addr_t addr; + + if (id >= (l1_tbl_size / esz)) + return false; + + addr = base + id * esz; + + if (eaddr) + *eaddr = addr; + + return __is_visible_gfn_locked(its, addr); + } + + /* calculate and check the index into the 1st level */ + index = id / (SZ_64K / esz); + if (index >= (l1_tbl_size / sizeof(u64))) + return false; + + /* Each 1st level entry is represented by a 64-bit value. */ + if (kvm_read_guest_lock(its->dev->kvm, + base + index * sizeof(indirect_ptr), + &indirect_ptr, sizeof(indirect_ptr))) + return false; + + indirect_ptr = le64_to_cpu(indirect_ptr); + + /* check the valid bit of the first level entry */ + if (!(indirect_ptr & BIT_ULL(63))) + return false; + + /* Mask the guest physical address and calculate the frame number. */ + indirect_ptr &= GENMASK_ULL(51, 16); + + /* Find the address of the actual entry */ + index = id % (SZ_64K / esz); + indirect_ptr += index * esz; + + if (eaddr) + *eaddr = indirect_ptr; + + return __is_visible_gfn_locked(its, indirect_ptr); +} + +/* + * Check whether an event ID can be stored in the corresponding Interrupt + * Translation Table, which starts at device->itt_addr. + */ +static bool vgic_its_check_event_id(struct vgic_its *its, struct its_device *device, + u32 event_id) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + int ite_esz = abi->ite_esz; + gpa_t gpa; + + /* max table size is: BIT_ULL(device->num_eventid_bits) * ite_esz */ + if (event_id >= BIT_ULL(device->num_eventid_bits)) + return false; + + gpa = device->itt_addr + event_id * ite_esz; + return __is_visible_gfn_locked(its, gpa); +} + +/* + * Add a new collection into the ITS collection table. + * Returns 0 on success, and a negative error value for generic errors. + */ +static int vgic_its_alloc_collection(struct vgic_its *its, + struct its_collection **colp, + u32 coll_id) +{ + struct its_collection *collection; + + collection = kzalloc(sizeof(*collection), GFP_KERNEL_ACCOUNT); + if (!collection) + return -ENOMEM; + + collection->collection_id = coll_id; + collection->target_addr = COLLECTION_NOT_MAPPED; + + list_add_tail(&collection->coll_list, &its->collection_list); + *colp = collection; + + return 0; +} + +static void vgic_its_free_collection(struct vgic_its *its, u32 coll_id) +{ + struct its_collection *collection; + struct its_device *device; + struct its_ite *ite; + + /* + * Clearing the mapping for that collection ID removes the + * entry from the list. If there wasn't any before, we can + * go home early. + */ + collection = find_collection(its, coll_id); + if (!collection) + return; + + for_each_lpi_its(device, ite, its) + if (ite->collection && + ite->collection->collection_id == coll_id) + ite->collection = NULL; + + list_del(&collection->coll_list); + kfree(collection); +} + +/* Must be called with its_lock mutex held */ +static struct its_ite *vgic_its_alloc_ite(struct its_device *device, + struct its_collection *collection, + u32 event_id) +{ + struct its_ite *ite; + + ite = kzalloc(sizeof(*ite), GFP_KERNEL_ACCOUNT); + if (!ite) + return ERR_PTR(-ENOMEM); + + ite->event_id = event_id; + ite->collection = collection; + + list_add_tail(&ite->ite_list, &device->itt_head); + return ite; +} + +/* + * The MAPTI and MAPI commands map LPIs to ITTEs. + * Must be called with its_lock mutex held. + */ +static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + u32 event_id = its_cmd_get_id(its_cmd); + u32 coll_id = its_cmd_get_collection(its_cmd); + struct its_ite *ite; + struct kvm_vcpu *vcpu = NULL; + struct its_device *device; + struct its_collection *collection, *new_coll = NULL; + struct vgic_irq *irq; + int lpi_nr; + + device = find_its_device(its, device_id); + if (!device) + return E_ITS_MAPTI_UNMAPPED_DEVICE; + + if (!vgic_its_check_event_id(its, device, event_id)) + return E_ITS_MAPTI_ID_OOR; + + if (its_cmd_get_command(its_cmd) == GITS_CMD_MAPTI) + lpi_nr = its_cmd_get_physical_id(its_cmd); + else + lpi_nr = event_id; + if (lpi_nr < GIC_LPI_OFFSET || + lpi_nr >= max_lpis_propbaser(kvm->arch.vgic.propbaser)) + return E_ITS_MAPTI_PHYSICALID_OOR; + + /* If there is an existing mapping, behavior is UNPREDICTABLE. */ + if (find_ite(its, device_id, event_id)) + return 0; + + collection = find_collection(its, coll_id); + if (!collection) { + int ret; + + if (!vgic_its_check_id(its, its->baser_coll_table, coll_id, NULL)) + return E_ITS_MAPC_COLLECTION_OOR; + + ret = vgic_its_alloc_collection(its, &collection, coll_id); + if (ret) + return ret; + new_coll = collection; + } + + ite = vgic_its_alloc_ite(device, collection, event_id); + if (IS_ERR(ite)) { + if (new_coll) + vgic_its_free_collection(its, coll_id); + return PTR_ERR(ite); + } + + if (its_is_collection_mapped(collection)) + vcpu = kvm_get_vcpu(kvm, collection->target_addr); + + irq = vgic_add_lpi(kvm, lpi_nr, vcpu); + if (IS_ERR(irq)) { + if (new_coll) + vgic_its_free_collection(its, coll_id); + its_free_ite(kvm, ite); + return PTR_ERR(irq); + } + ite->irq = irq; + + return 0; +} + +/* Requires the its_lock to be held. */ +static void vgic_its_free_device(struct kvm *kvm, struct its_device *device) +{ + struct its_ite *ite, *temp; + + /* + * The spec says that unmapping a device with still valid + * ITTEs associated is UNPREDICTABLE. We remove all ITTEs, + * since we cannot leave the memory unreferenced. + */ + list_for_each_entry_safe(ite, temp, &device->itt_head, ite_list) + its_free_ite(kvm, ite); + + vgic_its_invalidate_cache(kvm); + + list_del(&device->dev_list); + kfree(device); +} + +/* its lock must be held */ +static void vgic_its_free_device_list(struct kvm *kvm, struct vgic_its *its) +{ + struct its_device *cur, *temp; + + list_for_each_entry_safe(cur, temp, &its->device_list, dev_list) + vgic_its_free_device(kvm, cur); +} + +/* its lock must be held */ +static void vgic_its_free_collection_list(struct kvm *kvm, struct vgic_its *its) +{ + struct its_collection *cur, *temp; + + list_for_each_entry_safe(cur, temp, &its->collection_list, coll_list) + vgic_its_free_collection(its, cur->collection_id); +} + +/* Must be called with its_lock mutex held */ +static struct its_device *vgic_its_alloc_device(struct vgic_its *its, + u32 device_id, gpa_t itt_addr, + u8 num_eventid_bits) +{ + struct its_device *device; + + device = kzalloc(sizeof(*device), GFP_KERNEL_ACCOUNT); + if (!device) + return ERR_PTR(-ENOMEM); + + device->device_id = device_id; + device->itt_addr = itt_addr; + device->num_eventid_bits = num_eventid_bits; + INIT_LIST_HEAD(&device->itt_head); + + list_add_tail(&device->dev_list, &its->device_list); + return device; +} + +/* + * MAPD maps or unmaps a device ID to Interrupt Translation Tables (ITTs). + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + bool valid = its_cmd_get_validbit(its_cmd); + u8 num_eventid_bits = its_cmd_get_size(its_cmd); + gpa_t itt_addr = its_cmd_get_ittaddr(its_cmd); + struct its_device *device; + + if (!vgic_its_check_id(its, its->baser_device_table, device_id, NULL)) + return E_ITS_MAPD_DEVICE_OOR; + + if (valid && num_eventid_bits > VITS_TYPER_IDBITS) + return E_ITS_MAPD_ITTSIZE_OOR; + + device = find_its_device(its, device_id); + + /* + * The spec says that calling MAPD on an already mapped device + * invalidates all cached data for this device. We implement this + * by removing the mapping and re-establishing it. + */ + if (device) + vgic_its_free_device(kvm, device); + + /* + * The spec does not say whether unmapping a not-mapped device + * is an error, so we are done in any case. + */ + if (!valid) + return 0; + + device = vgic_its_alloc_device(its, device_id, itt_addr, + num_eventid_bits); + + return PTR_ERR_OR_ZERO(device); +} + +/* + * The MAPC command maps collection IDs to redistributors. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u16 coll_id; + u32 target_addr; + struct its_collection *collection; + bool valid; + + valid = its_cmd_get_validbit(its_cmd); + coll_id = its_cmd_get_collection(its_cmd); + target_addr = its_cmd_get_target_addr(its_cmd); + + if (target_addr >= atomic_read(&kvm->online_vcpus)) + return E_ITS_MAPC_PROCNUM_OOR; + + if (!valid) { + vgic_its_free_collection(its, coll_id); + vgic_its_invalidate_cache(kvm); + } else { + collection = find_collection(its, coll_id); + + if (!collection) { + int ret; + + if (!vgic_its_check_id(its, its->baser_coll_table, + coll_id, NULL)) + return E_ITS_MAPC_COLLECTION_OOR; + + ret = vgic_its_alloc_collection(its, &collection, + coll_id); + if (ret) + return ret; + collection->target_addr = target_addr; + } else { + collection->target_addr = target_addr; + update_affinity_collection(kvm, its, collection); + } + } + + return 0; +} + +/* + * The CLEAR command removes the pending state for a particular LPI. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + u32 event_id = its_cmd_get_id(its_cmd); + struct its_ite *ite; + + + ite = find_ite(its, device_id, event_id); + if (!ite) + return E_ITS_CLEAR_UNMAPPED_INTERRUPT; + + ite->irq->pending_latch = false; + + if (ite->irq->hw) + return irq_set_irqchip_state(ite->irq->host_irq, + IRQCHIP_STATE_PENDING, false); + + return 0; +} + +int vgic_its_inv_lpi(struct kvm *kvm, struct vgic_irq *irq) +{ + return update_lpi_config(kvm, irq, NULL, true); +} + +/* + * The INV command syncs the configuration bits from the memory table. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + u32 event_id = its_cmd_get_id(its_cmd); + struct its_ite *ite; + + + ite = find_ite(its, device_id, event_id); + if (!ite) + return E_ITS_INV_UNMAPPED_INTERRUPT; + + return vgic_its_inv_lpi(kvm, ite->irq); +} + +/** + * vgic_its_invall - invalidate all LPIs targetting a given vcpu + * @vcpu: the vcpu for which the RD is targetted by an invalidation + * + * Contrary to the INVALL command, this targets a RD instead of a + * collection, and we don't need to hold the its_lock, since no ITS is + * involved here. + */ +int vgic_its_invall(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + int irq_count, i = 0; + u32 *intids; + + irq_count = vgic_copy_lpi_list(kvm, vcpu, &intids); + if (irq_count < 0) + return irq_count; + + for (i = 0; i < irq_count; i++) { + struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intids[i]); + if (!irq) + continue; + update_lpi_config(kvm, irq, vcpu, false); + vgic_put_irq(kvm, irq); + } + + kfree(intids); + + if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.its_vm) + its_invall_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe); + + return 0; +} + +/* + * The INVALL command requests flushing of all IRQ data in this collection. + * Find the VCPU mapped to that collection, then iterate over the VM's list + * of mapped LPIs and update the configuration for each IRQ which targets + * the specified vcpu. The configuration will be read from the in-memory + * configuration table. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 coll_id = its_cmd_get_collection(its_cmd); + struct its_collection *collection; + struct kvm_vcpu *vcpu; + + collection = find_collection(its, coll_id); + if (!its_is_collection_mapped(collection)) + return E_ITS_INVALL_UNMAPPED_COLLECTION; + + vcpu = kvm_get_vcpu(kvm, collection->target_addr); + vgic_its_invall(vcpu); + + return 0; +} + +/* + * The MOVALL command moves the pending state of all IRQs targeting one + * redistributor to another. We don't hold the pending state in the VCPUs, + * but in the IRQs instead, so there is really not much to do for us here. + * However the spec says that no IRQ must target the old redistributor + * afterwards, so we make sure that no LPI is using the associated target_vcpu. + * This command affects all LPIs in the system that target that redistributor. + */ +static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 target1_addr = its_cmd_get_target_addr(its_cmd); + u32 target2_addr = its_cmd_mask_field(its_cmd, 3, 16, 32); + struct kvm_vcpu *vcpu1, *vcpu2; + struct vgic_irq *irq; + u32 *intids; + int irq_count, i; + + if (target1_addr >= atomic_read(&kvm->online_vcpus) || + target2_addr >= atomic_read(&kvm->online_vcpus)) + return E_ITS_MOVALL_PROCNUM_OOR; + + if (target1_addr == target2_addr) + return 0; + + vcpu1 = kvm_get_vcpu(kvm, target1_addr); + vcpu2 = kvm_get_vcpu(kvm, target2_addr); + + irq_count = vgic_copy_lpi_list(kvm, vcpu1, &intids); + if (irq_count < 0) + return irq_count; + + for (i = 0; i < irq_count; i++) { + irq = vgic_get_irq(kvm, NULL, intids[i]); + + update_affinity(irq, vcpu2); + + vgic_put_irq(kvm, irq); + } + + vgic_its_invalidate_cache(kvm); + + kfree(intids); + return 0; +} + +/* + * The INT command injects the LPI associated with that DevID/EvID pair. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_int(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 msi_data = its_cmd_get_id(its_cmd); + u64 msi_devid = its_cmd_get_deviceid(its_cmd); + + return vgic_its_trigger_msi(kvm, its, msi_devid, msi_data); +} + +/* + * This function is called with the its_cmd lock held, but the ITS data + * structure lock dropped. + */ +static int vgic_its_handle_command(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + int ret = -ENODEV; + + mutex_lock(&its->its_lock); + switch (its_cmd_get_command(its_cmd)) { + case GITS_CMD_MAPD: + ret = vgic_its_cmd_handle_mapd(kvm, its, its_cmd); + break; + case GITS_CMD_MAPC: + ret = vgic_its_cmd_handle_mapc(kvm, its, its_cmd); + break; + case GITS_CMD_MAPI: + ret = vgic_its_cmd_handle_mapi(kvm, its, its_cmd); + break; + case GITS_CMD_MAPTI: + ret = vgic_its_cmd_handle_mapi(kvm, its, its_cmd); + break; + case GITS_CMD_MOVI: + ret = vgic_its_cmd_handle_movi(kvm, its, its_cmd); + break; + case GITS_CMD_DISCARD: + ret = vgic_its_cmd_handle_discard(kvm, its, its_cmd); + break; + case GITS_CMD_CLEAR: + ret = vgic_its_cmd_handle_clear(kvm, its, its_cmd); + break; + case GITS_CMD_MOVALL: + ret = vgic_its_cmd_handle_movall(kvm, its, its_cmd); + break; + case GITS_CMD_INT: + ret = vgic_its_cmd_handle_int(kvm, its, its_cmd); + break; + case GITS_CMD_INV: + ret = vgic_its_cmd_handle_inv(kvm, its, its_cmd); + break; + case GITS_CMD_INVALL: + ret = vgic_its_cmd_handle_invall(kvm, its, its_cmd); + break; + case GITS_CMD_SYNC: + /* we ignore this command: we are in sync all of the time */ + ret = 0; + break; + } + mutex_unlock(&its->its_lock); + + return ret; +} + +static u64 vgic_sanitise_its_baser(u64 reg) +{ + reg = vgic_sanitise_field(reg, GITS_BASER_SHAREABILITY_MASK, + GITS_BASER_SHAREABILITY_SHIFT, + vgic_sanitise_shareability); + reg = vgic_sanitise_field(reg, GITS_BASER_INNER_CACHEABILITY_MASK, + GITS_BASER_INNER_CACHEABILITY_SHIFT, + vgic_sanitise_inner_cacheability); + reg = vgic_sanitise_field(reg, GITS_BASER_OUTER_CACHEABILITY_MASK, + GITS_BASER_OUTER_CACHEABILITY_SHIFT, + vgic_sanitise_outer_cacheability); + + /* We support only one (ITS) page size: 64K */ + reg = (reg & ~GITS_BASER_PAGE_SIZE_MASK) | GITS_BASER_PAGE_SIZE_64K; + + return reg; +} + +static u64 vgic_sanitise_its_cbaser(u64 reg) +{ + reg = vgic_sanitise_field(reg, GITS_CBASER_SHAREABILITY_MASK, + GITS_CBASER_SHAREABILITY_SHIFT, + vgic_sanitise_shareability); + reg = vgic_sanitise_field(reg, GITS_CBASER_INNER_CACHEABILITY_MASK, + GITS_CBASER_INNER_CACHEABILITY_SHIFT, + vgic_sanitise_inner_cacheability); + reg = vgic_sanitise_field(reg, GITS_CBASER_OUTER_CACHEABILITY_MASK, + GITS_CBASER_OUTER_CACHEABILITY_SHIFT, + vgic_sanitise_outer_cacheability); + + /* Sanitise the physical address to be 64k aligned. */ + reg &= ~GENMASK_ULL(15, 12); + + return reg; +} + +static unsigned long vgic_mmio_read_its_cbaser(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + return extract_bytes(its->cbaser, addr & 7, len); +} + +static void vgic_mmio_write_its_cbaser(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + /* When GITS_CTLR.Enable is 1, this register is RO. */ + if (its->enabled) + return; + + mutex_lock(&its->cmd_lock); + its->cbaser = update_64bit_reg(its->cbaser, addr & 7, len, val); + its->cbaser = vgic_sanitise_its_cbaser(its->cbaser); + its->creadr = 0; + /* + * CWRITER is architecturally UNKNOWN on reset, but we need to reset + * it to CREADR to make sure we start with an empty command buffer. + */ + its->cwriter = its->creadr; + mutex_unlock(&its->cmd_lock); +} + +#define ITS_CMD_BUFFER_SIZE(baser) ((((baser) & 0xff) + 1) << 12) +#define ITS_CMD_SIZE 32 +#define ITS_CMD_OFFSET(reg) ((reg) & GENMASK(19, 5)) + +/* Must be called with the cmd_lock held. */ +static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its) +{ + gpa_t cbaser; + u64 cmd_buf[4]; + + /* Commands are only processed when the ITS is enabled. */ + if (!its->enabled) + return; + + cbaser = GITS_CBASER_ADDRESS(its->cbaser); + + while (its->cwriter != its->creadr) { + int ret = kvm_read_guest_lock(kvm, cbaser + its->creadr, + cmd_buf, ITS_CMD_SIZE); + /* + * If kvm_read_guest() fails, this could be due to the guest + * programming a bogus value in CBASER or something else going + * wrong from which we cannot easily recover. + * According to section 6.3.2 in the GICv3 spec we can just + * ignore that command then. + */ + if (!ret) + vgic_its_handle_command(kvm, its, cmd_buf); + + its->creadr += ITS_CMD_SIZE; + if (its->creadr == ITS_CMD_BUFFER_SIZE(its->cbaser)) + its->creadr = 0; + } +} + +/* + * By writing to CWRITER the guest announces new commands to be processed. + * To avoid any races in the first place, we take the its_cmd lock, which + * protects our ring buffer variables, so that there is only one user + * per ITS handling commands at a given time. + */ +static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u64 reg; + + if (!its) + return; + + mutex_lock(&its->cmd_lock); + + reg = update_64bit_reg(its->cwriter, addr & 7, len, val); + reg = ITS_CMD_OFFSET(reg); + if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) { + mutex_unlock(&its->cmd_lock); + return; + } + its->cwriter = reg; + + vgic_its_process_commands(kvm, its); + + mutex_unlock(&its->cmd_lock); +} + +static unsigned long vgic_mmio_read_its_cwriter(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + return extract_bytes(its->cwriter, addr & 0x7, len); +} + +static unsigned long vgic_mmio_read_its_creadr(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + return extract_bytes(its->creadr, addr & 0x7, len); +} + +static int vgic_mmio_uaccess_write_its_creadr(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 cmd_offset; + int ret = 0; + + mutex_lock(&its->cmd_lock); + + if (its->enabled) { + ret = -EBUSY; + goto out; + } + + cmd_offset = ITS_CMD_OFFSET(val); + if (cmd_offset >= ITS_CMD_BUFFER_SIZE(its->cbaser)) { + ret = -EINVAL; + goto out; + } + + its->creadr = cmd_offset; +out: + mutex_unlock(&its->cmd_lock); + return ret; +} + +#define BASER_INDEX(addr) (((addr) / sizeof(u64)) & 0x7) +static unsigned long vgic_mmio_read_its_baser(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + u64 reg; + + switch (BASER_INDEX(addr)) { + case 0: + reg = its->baser_device_table; + break; + case 1: + reg = its->baser_coll_table; + break; + default: + reg = 0; + break; + } + + return extract_bytes(reg, addr & 7, len); +} + +#define GITS_BASER_RO_MASK (GENMASK_ULL(52, 48) | GENMASK_ULL(58, 56)) +static void vgic_mmio_write_its_baser(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + u64 entry_size, table_type; + u64 reg, *regptr, clearbits = 0; + + /* When GITS_CTLR.Enable is 1, we ignore write accesses. */ + if (its->enabled) + return; + + switch (BASER_INDEX(addr)) { + case 0: + regptr = &its->baser_device_table; + entry_size = abi->dte_esz; + table_type = GITS_BASER_TYPE_DEVICE; + break; + case 1: + regptr = &its->baser_coll_table; + entry_size = abi->cte_esz; + table_type = GITS_BASER_TYPE_COLLECTION; + clearbits = GITS_BASER_INDIRECT; + break; + default: + return; + } + + reg = update_64bit_reg(*regptr, addr & 7, len, val); + reg &= ~GITS_BASER_RO_MASK; + reg &= ~clearbits; + + reg |= (entry_size - 1) << GITS_BASER_ENTRY_SIZE_SHIFT; + reg |= table_type << GITS_BASER_TYPE_SHIFT; + reg = vgic_sanitise_its_baser(reg); + + *regptr = reg; + + if (!(reg & GITS_BASER_VALID)) { + /* Take the its_lock to prevent a race with a save/restore */ + mutex_lock(&its->its_lock); + switch (table_type) { + case GITS_BASER_TYPE_DEVICE: + vgic_its_free_device_list(kvm, its); + break; + case GITS_BASER_TYPE_COLLECTION: + vgic_its_free_collection_list(kvm, its); + break; + } + mutex_unlock(&its->its_lock); + } +} + +static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + u32 reg = 0; + + mutex_lock(&its->cmd_lock); + if (its->creadr == its->cwriter) + reg |= GITS_CTLR_QUIESCENT; + if (its->enabled) + reg |= GITS_CTLR_ENABLE; + mutex_unlock(&its->cmd_lock); + + return reg; +} + +static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + mutex_lock(&its->cmd_lock); + + /* + * It is UNPREDICTABLE to enable the ITS if any of the CBASER or + * device/collection BASER are invalid + */ + if (!its->enabled && (val & GITS_CTLR_ENABLE) && + (!(its->baser_device_table & GITS_BASER_VALID) || + !(its->baser_coll_table & GITS_BASER_VALID) || + !(its->cbaser & GITS_CBASER_VALID))) + goto out; + + its->enabled = !!(val & GITS_CTLR_ENABLE); + if (!its->enabled) + vgic_its_invalidate_cache(kvm); + + /* + * Try to process any pending commands. This function bails out early + * if the ITS is disabled or no commands have been queued. + */ + vgic_its_process_commands(kvm, its); + +out: + mutex_unlock(&its->cmd_lock); +} + +#define REGISTER_ITS_DESC(off, rd, wr, length, acc) \ +{ \ + .reg_offset = off, \ + .len = length, \ + .access_flags = acc, \ + .its_read = rd, \ + .its_write = wr, \ +} + +#define REGISTER_ITS_DESC_UACCESS(off, rd, wr, uwr, length, acc)\ +{ \ + .reg_offset = off, \ + .len = length, \ + .access_flags = acc, \ + .its_read = rd, \ + .its_write = wr, \ + .uaccess_its_write = uwr, \ +} + +static void its_mmio_write_wi(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, unsigned long val) +{ + /* Ignore */ +} + +static struct vgic_register_region its_registers[] = { + REGISTER_ITS_DESC(GITS_CTLR, + vgic_mmio_read_its_ctlr, vgic_mmio_write_its_ctlr, 4, + VGIC_ACCESS_32bit), + REGISTER_ITS_DESC_UACCESS(GITS_IIDR, + vgic_mmio_read_its_iidr, its_mmio_write_wi, + vgic_mmio_uaccess_write_its_iidr, 4, + VGIC_ACCESS_32bit), + REGISTER_ITS_DESC(GITS_TYPER, + vgic_mmio_read_its_typer, its_mmio_write_wi, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_ITS_DESC(GITS_CBASER, + vgic_mmio_read_its_cbaser, vgic_mmio_write_its_cbaser, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_ITS_DESC(GITS_CWRITER, + vgic_mmio_read_its_cwriter, vgic_mmio_write_its_cwriter, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_ITS_DESC_UACCESS(GITS_CREADR, + vgic_mmio_read_its_creadr, its_mmio_write_wi, + vgic_mmio_uaccess_write_its_creadr, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_ITS_DESC(GITS_BASER, + vgic_mmio_read_its_baser, vgic_mmio_write_its_baser, 0x40, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_ITS_DESC(GITS_IDREGS_BASE, + vgic_mmio_read_its_idregs, its_mmio_write_wi, 0x30, + VGIC_ACCESS_32bit), +}; + +/* This is called on setting the LPI enable bit in the redistributor. */ +void vgic_enable_lpis(struct kvm_vcpu *vcpu) +{ + if (!(vcpu->arch.vgic_cpu.pendbaser & GICR_PENDBASER_PTZ)) + its_sync_lpi_pending_table(vcpu); +} + +static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its, + u64 addr) +{ + struct vgic_io_device *iodev = &its->iodev; + int ret; + + mutex_lock(&kvm->slots_lock); + if (!IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) { + ret = -EBUSY; + goto out; + } + + its->vgic_its_base = addr; + iodev->regions = its_registers; + iodev->nr_regions = ARRAY_SIZE(its_registers); + kvm_iodevice_init(&iodev->dev, &kvm_io_gic_ops); + + iodev->base_addr = its->vgic_its_base; + iodev->iodev_type = IODEV_ITS; + iodev->its = its; + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, iodev->base_addr, + KVM_VGIC_V3_ITS_SIZE, &iodev->dev); +out: + mutex_unlock(&kvm->slots_lock); + + return ret; +} + +/* Default is 16 cached LPIs per vcpu */ +#define LPI_DEFAULT_PCPU_CACHE_SIZE 16 + +void vgic_lpi_translation_cache_init(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + unsigned int sz; + int i; + + if (!list_empty(&dist->lpi_translation_cache)) + return; + + sz = atomic_read(&kvm->online_vcpus) * LPI_DEFAULT_PCPU_CACHE_SIZE; + + for (i = 0; i < sz; i++) { + struct vgic_translation_cache_entry *cte; + + /* An allocation failure is not fatal */ + cte = kzalloc(sizeof(*cte), GFP_KERNEL_ACCOUNT); + if (WARN_ON(!cte)) + break; + + INIT_LIST_HEAD(&cte->entry); + list_add(&cte->entry, &dist->lpi_translation_cache); + } +} + +void vgic_lpi_translation_cache_destroy(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_translation_cache_entry *cte, *tmp; + + vgic_its_invalidate_cache(kvm); + + list_for_each_entry_safe(cte, tmp, + &dist->lpi_translation_cache, entry) { + list_del(&cte->entry); + kfree(cte); + } +} + +#define INITIAL_BASER_VALUE \ + (GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb) | \ + GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, SameAsInner) | \ + GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) | \ + GITS_BASER_PAGE_SIZE_64K) + +#define INITIAL_PROPBASER_VALUE \ + (GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWb) | \ + GIC_BASER_CACHEABILITY(GICR_PROPBASER, OUTER, SameAsInner) | \ + GIC_BASER_SHAREABILITY(GICR_PROPBASER, InnerShareable)) + +static int vgic_its_create(struct kvm_device *dev, u32 type) +{ + int ret; + struct vgic_its *its; + + if (type != KVM_DEV_TYPE_ARM_VGIC_ITS) + return -ENODEV; + + its = kzalloc(sizeof(struct vgic_its), GFP_KERNEL_ACCOUNT); + if (!its) + return -ENOMEM; + + mutex_lock(&dev->kvm->arch.config_lock); + + if (vgic_initialized(dev->kvm)) { + ret = vgic_v4_init(dev->kvm); + if (ret < 0) { + mutex_unlock(&dev->kvm->arch.config_lock); + kfree(its); + return ret; + } + + vgic_lpi_translation_cache_init(dev->kvm); + } + + mutex_init(&its->its_lock); + mutex_init(&its->cmd_lock); + + /* Yep, even more trickery for lock ordering... */ +#ifdef CONFIG_LOCKDEP + mutex_lock(&its->cmd_lock); + mutex_lock(&its->its_lock); + mutex_unlock(&its->its_lock); + mutex_unlock(&its->cmd_lock); +#endif + + its->vgic_its_base = VGIC_ADDR_UNDEF; + + INIT_LIST_HEAD(&its->device_list); + INIT_LIST_HEAD(&its->collection_list); + + dev->kvm->arch.vgic.msis_require_devid = true; + dev->kvm->arch.vgic.has_its = true; + its->enabled = false; + its->dev = dev; + + its->baser_device_table = INITIAL_BASER_VALUE | + ((u64)GITS_BASER_TYPE_DEVICE << GITS_BASER_TYPE_SHIFT); + its->baser_coll_table = INITIAL_BASER_VALUE | + ((u64)GITS_BASER_TYPE_COLLECTION << GITS_BASER_TYPE_SHIFT); + dev->kvm->arch.vgic.propbaser = INITIAL_PROPBASER_VALUE; + + dev->private = its; + + ret = vgic_its_set_abi(its, NR_ITS_ABIS - 1); + + mutex_unlock(&dev->kvm->arch.config_lock); + + return ret; +} + +static void vgic_its_destroy(struct kvm_device *kvm_dev) +{ + struct kvm *kvm = kvm_dev->kvm; + struct vgic_its *its = kvm_dev->private; + + mutex_lock(&its->its_lock); + + vgic_its_free_device_list(kvm, its); + vgic_its_free_collection_list(kvm, its); + + mutex_unlock(&its->its_lock); + kfree(its); + kfree(kvm_dev);/* alloc by kvm_ioctl_create_device, free by .destroy */ +} + +static int vgic_its_has_attr_regs(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + const struct vgic_register_region *region; + gpa_t offset = attr->attr; + int align; + + align = (offset < GITS_TYPER) || (offset >= GITS_PIDR4) ? 0x3 : 0x7; + + if (offset & align) + return -EINVAL; + + region = vgic_find_mmio_region(its_registers, + ARRAY_SIZE(its_registers), + offset); + if (!region) + return -ENXIO; + + return 0; +} + +static int vgic_its_attr_regs_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + u64 *reg, bool is_write) +{ + const struct vgic_register_region *region; + struct vgic_its *its; + gpa_t addr, offset; + unsigned int len; + int align, ret = 0; + + its = dev->private; + offset = attr->attr; + + /* + * Although the spec supports upper/lower 32-bit accesses to + * 64-bit ITS registers, the userspace ABI requires 64-bit + * accesses to all 64-bit wide registers. We therefore only + * support 32-bit accesses to GITS_CTLR, GITS_IIDR and GITS ID + * registers + */ + if ((offset < GITS_TYPER) || (offset >= GITS_PIDR4)) + align = 0x3; + else + align = 0x7; + + if (offset & align) + return -EINVAL; + + mutex_lock(&dev->kvm->lock); + + if (!lock_all_vcpus(dev->kvm)) { + mutex_unlock(&dev->kvm->lock); + return -EBUSY; + } + + mutex_lock(&dev->kvm->arch.config_lock); + + if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) { + ret = -ENXIO; + goto out; + } + + region = vgic_find_mmio_region(its_registers, + ARRAY_SIZE(its_registers), + offset); + if (!region) { + ret = -ENXIO; + goto out; + } + + addr = its->vgic_its_base + offset; + + len = region->access_flags & VGIC_ACCESS_64bit ? 8 : 4; + + if (is_write) { + if (region->uaccess_its_write) + ret = region->uaccess_its_write(dev->kvm, its, addr, + len, *reg); + else + region->its_write(dev->kvm, its, addr, len, *reg); + } else { + *reg = region->its_read(dev->kvm, its, addr, len); + } +out: + mutex_unlock(&dev->kvm->arch.config_lock); + unlock_all_vcpus(dev->kvm); + mutex_unlock(&dev->kvm->lock); + return ret; +} + +static u32 compute_next_devid_offset(struct list_head *h, + struct its_device *dev) +{ + struct its_device *next; + u32 next_offset; + + if (list_is_last(&dev->dev_list, h)) + return 0; + next = list_next_entry(dev, dev_list); + next_offset = next->device_id - dev->device_id; + + return min_t(u32, next_offset, VITS_DTE_MAX_DEVID_OFFSET); +} + +static u32 compute_next_eventid_offset(struct list_head *h, struct its_ite *ite) +{ + struct its_ite *next; + u32 next_offset; + + if (list_is_last(&ite->ite_list, h)) + return 0; + next = list_next_entry(ite, ite_list); + next_offset = next->event_id - ite->event_id; + + return min_t(u32, next_offset, VITS_ITE_MAX_EVENTID_OFFSET); +} + +/** + * entry_fn_t - Callback called on a table entry restore path + * @its: its handle + * @id: id of the entry + * @entry: pointer to the entry + * @opaque: pointer to an opaque data + * + * Return: < 0 on error, 0 if last element was identified, id offset to next + * element otherwise + */ +typedef int (*entry_fn_t)(struct vgic_its *its, u32 id, void *entry, + void *opaque); + +/** + * scan_its_table - Scan a contiguous table in guest RAM and applies a function + * to each entry + * + * @its: its handle + * @base: base gpa of the table + * @size: size of the table in bytes + * @esz: entry size in bytes + * @start_id: the ID of the first entry in the table + * (non zero for 2d level tables) + * @fn: function to apply on each entry + * + * Return: < 0 on error, 0 if last element was identified, 1 otherwise + * (the last element may not be found on second level tables) + */ +static int scan_its_table(struct vgic_its *its, gpa_t base, int size, u32 esz, + int start_id, entry_fn_t fn, void *opaque) +{ + struct kvm *kvm = its->dev->kvm; + unsigned long len = size; + int id = start_id; + gpa_t gpa = base; + char entry[ESZ_MAX]; + int ret; + + memset(entry, 0, esz); + + while (true) { + int next_offset; + size_t byte_offset; + + ret = kvm_read_guest_lock(kvm, gpa, entry, esz); + if (ret) + return ret; + + next_offset = fn(its, id, entry, opaque); + if (next_offset <= 0) + return next_offset; + + byte_offset = next_offset * esz; + if (byte_offset >= len) + break; + + id += next_offset; + gpa += byte_offset; + len -= byte_offset; + } + return 1; +} + +/** + * vgic_its_save_ite - Save an interrupt translation entry at @gpa + */ +static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev, + struct its_ite *ite, gpa_t gpa, int ite_esz) +{ + struct kvm *kvm = its->dev->kvm; + u32 next_offset; + u64 val; + + next_offset = compute_next_eventid_offset(&dev->itt_head, ite); + val = ((u64)next_offset << KVM_ITS_ITE_NEXT_SHIFT) | + ((u64)ite->irq->intid << KVM_ITS_ITE_PINTID_SHIFT) | + ite->collection->collection_id; + val = cpu_to_le64(val); + return vgic_write_guest_lock(kvm, gpa, &val, ite_esz); +} + +/** + * vgic_its_restore_ite - restore an interrupt translation entry + * @event_id: id used for indexing + * @ptr: pointer to the ITE entry + * @opaque: pointer to the its_device + */ +static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id, + void *ptr, void *opaque) +{ + struct its_device *dev = opaque; + struct its_collection *collection; + struct kvm *kvm = its->dev->kvm; + struct kvm_vcpu *vcpu = NULL; + u64 val; + u64 *p = (u64 *)ptr; + struct vgic_irq *irq; + u32 coll_id, lpi_id; + struct its_ite *ite; + u32 offset; + + val = *p; + + val = le64_to_cpu(val); + + coll_id = val & KVM_ITS_ITE_ICID_MASK; + lpi_id = (val & KVM_ITS_ITE_PINTID_MASK) >> KVM_ITS_ITE_PINTID_SHIFT; + + if (!lpi_id) + return 1; /* invalid entry, no choice but to scan next entry */ + + if (lpi_id < VGIC_MIN_LPI) + return -EINVAL; + + offset = val >> KVM_ITS_ITE_NEXT_SHIFT; + if (event_id + offset >= BIT_ULL(dev->num_eventid_bits)) + return -EINVAL; + + collection = find_collection(its, coll_id); + if (!collection) + return -EINVAL; + + if (!vgic_its_check_event_id(its, dev, event_id)) + return -EINVAL; + + ite = vgic_its_alloc_ite(dev, collection, event_id); + if (IS_ERR(ite)) + return PTR_ERR(ite); + + if (its_is_collection_mapped(collection)) + vcpu = kvm_get_vcpu(kvm, collection->target_addr); + + irq = vgic_add_lpi(kvm, lpi_id, vcpu); + if (IS_ERR(irq)) { + its_free_ite(kvm, ite); + return PTR_ERR(irq); + } + ite->irq = irq; + + return offset; +} + +static int vgic_its_ite_cmp(void *priv, const struct list_head *a, + const struct list_head *b) +{ + struct its_ite *itea = container_of(a, struct its_ite, ite_list); + struct its_ite *iteb = container_of(b, struct its_ite, ite_list); + + if (itea->event_id < iteb->event_id) + return -1; + else + return 1; +} + +static int vgic_its_save_itt(struct vgic_its *its, struct its_device *device) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + gpa_t base = device->itt_addr; + struct its_ite *ite; + int ret; + int ite_esz = abi->ite_esz; + + list_sort(NULL, &device->itt_head, vgic_its_ite_cmp); + + list_for_each_entry(ite, &device->itt_head, ite_list) { + gpa_t gpa = base + ite->event_id * ite_esz; + + /* + * If an LPI carries the HW bit, this means that this + * interrupt is controlled by GICv4, and we do not + * have direct access to that state without GICv4.1. + * Let's simply fail the save operation... + */ + if (ite->irq->hw && !kvm_vgic_global_state.has_gicv4_1) + return -EACCES; + + ret = vgic_its_save_ite(its, device, ite, gpa, ite_esz); + if (ret) + return ret; + } + return 0; +} + +/** + * vgic_its_restore_itt - restore the ITT of a device + * + * @its: its handle + * @dev: device handle + * + * Return 0 on success, < 0 on error + */ +static int vgic_its_restore_itt(struct vgic_its *its, struct its_device *dev) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + gpa_t base = dev->itt_addr; + int ret; + int ite_esz = abi->ite_esz; + size_t max_size = BIT_ULL(dev->num_eventid_bits) * ite_esz; + + ret = scan_its_table(its, base, max_size, ite_esz, 0, + vgic_its_restore_ite, dev); + + /* scan_its_table returns +1 if all ITEs are invalid */ + if (ret > 0) + ret = 0; + + return ret; +} + +/** + * vgic_its_save_dte - Save a device table entry at a given GPA + * + * @its: ITS handle + * @dev: ITS device + * @ptr: GPA + */ +static int vgic_its_save_dte(struct vgic_its *its, struct its_device *dev, + gpa_t ptr, int dte_esz) +{ + struct kvm *kvm = its->dev->kvm; + u64 val, itt_addr_field; + u32 next_offset; + + itt_addr_field = dev->itt_addr >> 8; + next_offset = compute_next_devid_offset(&its->device_list, dev); + val = (1ULL << KVM_ITS_DTE_VALID_SHIFT | + ((u64)next_offset << KVM_ITS_DTE_NEXT_SHIFT) | + (itt_addr_field << KVM_ITS_DTE_ITTADDR_SHIFT) | + (dev->num_eventid_bits - 1)); + val = cpu_to_le64(val); + return vgic_write_guest_lock(kvm, ptr, &val, dte_esz); +} + +/** + * vgic_its_restore_dte - restore a device table entry + * + * @its: its handle + * @id: device id the DTE corresponds to + * @ptr: kernel VA where the 8 byte DTE is located + * @opaque: unused + * + * Return: < 0 on error, 0 if the dte is the last one, id offset to the + * next dte otherwise + */ +static int vgic_its_restore_dte(struct vgic_its *its, u32 id, + void *ptr, void *opaque) +{ + struct its_device *dev; + u64 baser = its->baser_device_table; + gpa_t itt_addr; + u8 num_eventid_bits; + u64 entry = *(u64 *)ptr; + bool valid; + u32 offset; + int ret; + + entry = le64_to_cpu(entry); + + valid = entry >> KVM_ITS_DTE_VALID_SHIFT; + num_eventid_bits = (entry & KVM_ITS_DTE_SIZE_MASK) + 1; + itt_addr = ((entry & KVM_ITS_DTE_ITTADDR_MASK) + >> KVM_ITS_DTE_ITTADDR_SHIFT) << 8; + + if (!valid) + return 1; + + /* dte entry is valid */ + offset = (entry & KVM_ITS_DTE_NEXT_MASK) >> KVM_ITS_DTE_NEXT_SHIFT; + + if (!vgic_its_check_id(its, baser, id, NULL)) + return -EINVAL; + + dev = vgic_its_alloc_device(its, id, itt_addr, num_eventid_bits); + if (IS_ERR(dev)) + return PTR_ERR(dev); + + ret = vgic_its_restore_itt(its, dev); + if (ret) { + vgic_its_free_device(its->dev->kvm, dev); + return ret; + } + + return offset; +} + +static int vgic_its_device_cmp(void *priv, const struct list_head *a, + const struct list_head *b) +{ + struct its_device *deva = container_of(a, struct its_device, dev_list); + struct its_device *devb = container_of(b, struct its_device, dev_list); + + if (deva->device_id < devb->device_id) + return -1; + else + return 1; +} + +/** + * vgic_its_save_device_tables - Save the device table and all ITT + * into guest RAM + * + * L1/L2 handling is hidden by vgic_its_check_id() helper which directly + * returns the GPA of the device entry + */ +static int vgic_its_save_device_tables(struct vgic_its *its) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + u64 baser = its->baser_device_table; + struct its_device *dev; + int dte_esz = abi->dte_esz; + + if (!(baser & GITS_BASER_VALID)) + return 0; + + list_sort(NULL, &its->device_list, vgic_its_device_cmp); + + list_for_each_entry(dev, &its->device_list, dev_list) { + int ret; + gpa_t eaddr; + + if (!vgic_its_check_id(its, baser, + dev->device_id, &eaddr)) + return -EINVAL; + + ret = vgic_its_save_itt(its, dev); + if (ret) + return ret; + + ret = vgic_its_save_dte(its, dev, eaddr, dte_esz); + if (ret) + return ret; + } + return 0; +} + +/** + * handle_l1_dte - callback used for L1 device table entries (2 stage case) + * + * @its: its handle + * @id: index of the entry in the L1 table + * @addr: kernel VA + * @opaque: unused + * + * L1 table entries are scanned by steps of 1 entry + * Return < 0 if error, 0 if last dte was found when scanning the L2 + * table, +1 otherwise (meaning next L1 entry must be scanned) + */ +static int handle_l1_dte(struct vgic_its *its, u32 id, void *addr, + void *opaque) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + int l2_start_id = id * (SZ_64K / abi->dte_esz); + u64 entry = *(u64 *)addr; + int dte_esz = abi->dte_esz; + gpa_t gpa; + int ret; + + entry = le64_to_cpu(entry); + + if (!(entry & KVM_ITS_L1E_VALID_MASK)) + return 1; + + gpa = entry & KVM_ITS_L1E_ADDR_MASK; + + ret = scan_its_table(its, gpa, SZ_64K, dte_esz, + l2_start_id, vgic_its_restore_dte, NULL); + + return ret; +} + +/** + * vgic_its_restore_device_tables - Restore the device table and all ITT + * from guest RAM to internal data structs + */ +static int vgic_its_restore_device_tables(struct vgic_its *its) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + u64 baser = its->baser_device_table; + int l1_esz, ret; + int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; + gpa_t l1_gpa; + + if (!(baser & GITS_BASER_VALID)) + return 0; + + l1_gpa = GITS_BASER_ADDR_48_to_52(baser); + + if (baser & GITS_BASER_INDIRECT) { + l1_esz = GITS_LVL1_ENTRY_SIZE; + ret = scan_its_table(its, l1_gpa, l1_tbl_size, l1_esz, 0, + handle_l1_dte, NULL); + } else { + l1_esz = abi->dte_esz; + ret = scan_its_table(its, l1_gpa, l1_tbl_size, l1_esz, 0, + vgic_its_restore_dte, NULL); + } + + /* scan_its_table returns +1 if all entries are invalid */ + if (ret > 0) + ret = 0; + + if (ret < 0) + vgic_its_free_device_list(its->dev->kvm, its); + + return ret; +} + +static int vgic_its_save_cte(struct vgic_its *its, + struct its_collection *collection, + gpa_t gpa, int esz) +{ + u64 val; + + val = (1ULL << KVM_ITS_CTE_VALID_SHIFT | + ((u64)collection->target_addr << KVM_ITS_CTE_RDBASE_SHIFT) | + collection->collection_id); + val = cpu_to_le64(val); + return vgic_write_guest_lock(its->dev->kvm, gpa, &val, esz); +} + +/* + * Restore a collection entry into the ITS collection table. + * Return +1 on success, 0 if the entry was invalid (which should be + * interpreted as end-of-table), and a negative error value for generic errors. + */ +static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) +{ + struct its_collection *collection; + struct kvm *kvm = its->dev->kvm; + u32 target_addr, coll_id; + u64 val; + int ret; + + BUG_ON(esz > sizeof(val)); + ret = kvm_read_guest_lock(kvm, gpa, &val, esz); + if (ret) + return ret; + val = le64_to_cpu(val); + if (!(val & KVM_ITS_CTE_VALID_MASK)) + return 0; + + target_addr = (u32)(val >> KVM_ITS_CTE_RDBASE_SHIFT); + coll_id = val & KVM_ITS_CTE_ICID_MASK; + + if (target_addr != COLLECTION_NOT_MAPPED && + target_addr >= atomic_read(&kvm->online_vcpus)) + return -EINVAL; + + collection = find_collection(its, coll_id); + if (collection) + return -EEXIST; + + if (!vgic_its_check_id(its, its->baser_coll_table, coll_id, NULL)) + return -EINVAL; + + ret = vgic_its_alloc_collection(its, &collection, coll_id); + if (ret) + return ret; + collection->target_addr = target_addr; + return 1; +} + +/** + * vgic_its_save_collection_table - Save the collection table into + * guest RAM + */ +static int vgic_its_save_collection_table(struct vgic_its *its) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + u64 baser = its->baser_coll_table; + gpa_t gpa = GITS_BASER_ADDR_48_to_52(baser); + struct its_collection *collection; + u64 val; + size_t max_size, filled = 0; + int ret, cte_esz = abi->cte_esz; + + if (!(baser & GITS_BASER_VALID)) + return 0; + + max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; + + list_for_each_entry(collection, &its->collection_list, coll_list) { + ret = vgic_its_save_cte(its, collection, gpa, cte_esz); + if (ret) + return ret; + gpa += cte_esz; + filled += cte_esz; + } + + if (filled == max_size) + return 0; + + /* + * table is not fully filled, add a last dummy element + * with valid bit unset + */ + val = 0; + BUG_ON(cte_esz > sizeof(val)); + ret = vgic_write_guest_lock(its->dev->kvm, gpa, &val, cte_esz); + return ret; +} + +/** + * vgic_its_restore_collection_table - reads the collection table + * in guest memory and restores the ITS internal state. Requires the + * BASER registers to be restored before. + */ +static int vgic_its_restore_collection_table(struct vgic_its *its) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + u64 baser = its->baser_coll_table; + int cte_esz = abi->cte_esz; + size_t max_size, read = 0; + gpa_t gpa; + int ret; + + if (!(baser & GITS_BASER_VALID)) + return 0; + + gpa = GITS_BASER_ADDR_48_to_52(baser); + + max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; + + while (read < max_size) { + ret = vgic_its_restore_cte(its, gpa, cte_esz); + if (ret <= 0) + break; + gpa += cte_esz; + read += cte_esz; + } + + if (ret > 0) + return 0; + + if (ret < 0) + vgic_its_free_collection_list(its->dev->kvm, its); + + return ret; +} + +/** + * vgic_its_save_tables_v0 - Save the ITS tables into guest ARM + * according to v0 ABI + */ +static int vgic_its_save_tables_v0(struct vgic_its *its) +{ + int ret; + + ret = vgic_its_save_device_tables(its); + if (ret) + return ret; + + return vgic_its_save_collection_table(its); +} + +/** + * vgic_its_restore_tables_v0 - Restore the ITS tables from guest RAM + * to internal data structs according to V0 ABI + * + */ +static int vgic_its_restore_tables_v0(struct vgic_its *its) +{ + int ret; + + ret = vgic_its_restore_collection_table(its); + if (ret) + return ret; + + ret = vgic_its_restore_device_tables(its); + if (ret) + vgic_its_free_collection_list(its->dev->kvm, its); + return ret; +} + +static int vgic_its_commit_v0(struct vgic_its *its) +{ + const struct vgic_its_abi *abi; + + abi = vgic_its_get_abi(its); + its->baser_coll_table &= ~GITS_BASER_ENTRY_SIZE_MASK; + its->baser_device_table &= ~GITS_BASER_ENTRY_SIZE_MASK; + + its->baser_coll_table |= (GIC_ENCODE_SZ(abi->cte_esz, 5) + << GITS_BASER_ENTRY_SIZE_SHIFT); + + its->baser_device_table |= (GIC_ENCODE_SZ(abi->dte_esz, 5) + << GITS_BASER_ENTRY_SIZE_SHIFT); + return 0; +} + +static void vgic_its_reset(struct kvm *kvm, struct vgic_its *its) +{ + /* We need to keep the ABI specific field values */ + its->baser_coll_table &= ~GITS_BASER_VALID; + its->baser_device_table &= ~GITS_BASER_VALID; + its->cbaser = 0; + its->creadr = 0; + its->cwriter = 0; + its->enabled = 0; + vgic_its_free_device_list(kvm, its); + vgic_its_free_collection_list(kvm, its); +} + +static int vgic_its_has_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: + switch (attr->attr) { + case KVM_VGIC_ITS_ADDR_TYPE: + return 0; + } + break; + case KVM_DEV_ARM_VGIC_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_ARM_VGIC_CTRL_INIT: + return 0; + case KVM_DEV_ARM_ITS_CTRL_RESET: + return 0; + case KVM_DEV_ARM_ITS_SAVE_TABLES: + return 0; + case KVM_DEV_ARM_ITS_RESTORE_TABLES: + return 0; + } + break; + case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: + return vgic_its_has_attr_regs(dev, attr); + } + return -ENXIO; +} + +static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + int ret = 0; + + if (attr == KVM_DEV_ARM_VGIC_CTRL_INIT) /* Nothing to do */ + return 0; + + mutex_lock(&kvm->lock); + + if (!lock_all_vcpus(kvm)) { + mutex_unlock(&kvm->lock); + return -EBUSY; + } + + mutex_lock(&kvm->arch.config_lock); + mutex_lock(&its->its_lock); + + switch (attr) { + case KVM_DEV_ARM_ITS_CTRL_RESET: + vgic_its_reset(kvm, its); + break; + case KVM_DEV_ARM_ITS_SAVE_TABLES: + ret = abi->save_tables(its); + break; + case KVM_DEV_ARM_ITS_RESTORE_TABLES: + ret = abi->restore_tables(its); + break; + } + + mutex_unlock(&its->its_lock); + mutex_unlock(&kvm->arch.config_lock); + unlock_all_vcpus(kvm); + mutex_unlock(&kvm->lock); + return ret; +} + +/* + * kvm_arch_allow_write_without_running_vcpu - allow writing guest memory + * without the running VCPU when dirty ring is enabled. + * + * The running VCPU is required to track dirty guest pages when dirty ring + * is enabled. Otherwise, the backup bitmap should be used to track the + * dirty guest pages. When vgic/its tables are being saved, the backup + * bitmap is used to track the dirty guest pages due to the missed running + * VCPU in the period. + */ +bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + + return dist->table_write_in_progress; +} + +static int vgic_its_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + struct vgic_its *its = dev->private; + int ret; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + unsigned long type = (unsigned long)attr->attr; + u64 addr; + + if (type != KVM_VGIC_ITS_ADDR_TYPE) + return -ENODEV; + + if (copy_from_user(&addr, uaddr, sizeof(addr))) + return -EFAULT; + + ret = vgic_check_iorange(dev->kvm, its->vgic_its_base, + addr, SZ_64K, KVM_VGIC_V3_ITS_SIZE); + if (ret) + return ret; + + return vgic_register_its_iodev(dev->kvm, its, addr); + } + case KVM_DEV_ARM_VGIC_GRP_CTRL: + return vgic_its_ctrl(dev->kvm, its, attr->attr); + case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 reg; + + if (get_user(reg, uaddr)) + return -EFAULT; + + return vgic_its_attr_regs_access(dev, attr, ®, true); + } + } + return -ENXIO; +} + +static int vgic_its_get_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: { + struct vgic_its *its = dev->private; + u64 addr = its->vgic_its_base; + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + unsigned long type = (unsigned long)attr->attr; + + if (type != KVM_VGIC_ITS_ADDR_TYPE) + return -ENODEV; + + if (copy_to_user(uaddr, &addr, sizeof(addr))) + return -EFAULT; + break; + } + case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 reg; + int ret; + + ret = vgic_its_attr_regs_access(dev, attr, ®, false); + if (ret) + return ret; + return put_user(reg, uaddr); + } + default: + return -ENXIO; + } + + return 0; +} + +static struct kvm_device_ops kvm_arm_vgic_its_ops = { + .name = "kvm-arm-vgic-its", + .create = vgic_its_create, + .destroy = vgic_its_destroy, + .set_attr = vgic_its_set_attr, + .get_attr = vgic_its_get_attr, + .has_attr = vgic_its_has_attr, +}; + +int kvm_vgic_register_its_device(void) +{ + return kvm_register_device_ops(&kvm_arm_vgic_its_ops, + KVM_DEV_TYPE_ARM_VGIC_ITS); +} diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c new file mode 100644 index 0000000000..212b73a715 --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -0,0 +1,672 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VGIC: KVM DEVICE API + * + * Copyright (C) 2015 ARM Ltd. + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ +#include <linux/kvm_host.h> +#include <kvm/arm_vgic.h> +#include <linux/uaccess.h> +#include <asm/kvm_mmu.h> +#include <asm/cputype.h> +#include "vgic.h" + +/* common helpers */ + +int vgic_check_iorange(struct kvm *kvm, phys_addr_t ioaddr, + phys_addr_t addr, phys_addr_t alignment, + phys_addr_t size) +{ + if (!IS_VGIC_ADDR_UNDEF(ioaddr)) + return -EEXIST; + + if (!IS_ALIGNED(addr, alignment) || !IS_ALIGNED(size, alignment)) + return -EINVAL; + + if (addr + size < addr) + return -EINVAL; + + if (addr & ~kvm_phys_mask(kvm) || addr + size > kvm_phys_size(kvm)) + return -E2BIG; + + return 0; +} + +static int vgic_check_type(struct kvm *kvm, int type_needed) +{ + if (kvm->arch.vgic.vgic_model != type_needed) + return -ENODEV; + else + return 0; +} + +int kvm_set_legacy_vgic_v2_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev_addr) +{ + struct vgic_dist *vgic = &kvm->arch.vgic; + int r; + + mutex_lock(&kvm->arch.config_lock); + switch (FIELD_GET(KVM_ARM_DEVICE_TYPE_MASK, dev_addr->id)) { + case KVM_VGIC_V2_ADDR_TYPE_DIST: + r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); + if (!r) + r = vgic_check_iorange(kvm, vgic->vgic_dist_base, dev_addr->addr, + SZ_4K, KVM_VGIC_V2_DIST_SIZE); + if (!r) + vgic->vgic_dist_base = dev_addr->addr; + break; + case KVM_VGIC_V2_ADDR_TYPE_CPU: + r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); + if (!r) + r = vgic_check_iorange(kvm, vgic->vgic_cpu_base, dev_addr->addr, + SZ_4K, KVM_VGIC_V2_CPU_SIZE); + if (!r) + vgic->vgic_cpu_base = dev_addr->addr; + break; + default: + r = -ENODEV; + } + + mutex_unlock(&kvm->arch.config_lock); + + return r; +} + +/** + * kvm_vgic_addr - set or get vgic VM base addresses + * @kvm: pointer to the vm struct + * @attr: pointer to the attribute being retrieved/updated + * @write: if true set the address in the VM address space, if false read the + * address + * + * Set or get the vgic base addresses for the distributor and the virtual CPU + * interface in the VM physical address space. These addresses are properties + * of the emulated core/SoC and therefore user space initially knows this + * information. + * Check them for sanity (alignment, double assignment). We can't check for + * overlapping regions in case of a virtual GICv3 here, since we don't know + * the number of VCPUs yet, so we defer this check to map_resources(). + */ +static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool write) +{ + u64 __user *uaddr = (u64 __user *)attr->addr; + struct vgic_dist *vgic = &kvm->arch.vgic; + phys_addr_t *addr_ptr, alignment, size; + u64 undef_value = VGIC_ADDR_UNDEF; + u64 addr; + int r; + + /* Reading a redistributor region addr implies getting the index */ + if (write || attr->attr == KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION) + if (get_user(addr, uaddr)) + return -EFAULT; + + /* + * Since we can't hold config_lock while registering the redistributor + * iodevs, take the slots_lock immediately. + */ + mutex_lock(&kvm->slots_lock); + switch (attr->attr) { + case KVM_VGIC_V2_ADDR_TYPE_DIST: + r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); + addr_ptr = &vgic->vgic_dist_base; + alignment = SZ_4K; + size = KVM_VGIC_V2_DIST_SIZE; + break; + case KVM_VGIC_V2_ADDR_TYPE_CPU: + r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); + addr_ptr = &vgic->vgic_cpu_base; + alignment = SZ_4K; + size = KVM_VGIC_V2_CPU_SIZE; + break; + case KVM_VGIC_V3_ADDR_TYPE_DIST: + r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3); + addr_ptr = &vgic->vgic_dist_base; + alignment = SZ_64K; + size = KVM_VGIC_V3_DIST_SIZE; + break; + case KVM_VGIC_V3_ADDR_TYPE_REDIST: { + struct vgic_redist_region *rdreg; + + r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3); + if (r) + break; + if (write) { + r = vgic_v3_set_redist_base(kvm, 0, addr, 0); + goto out; + } + rdreg = list_first_entry_or_null(&vgic->rd_regions, + struct vgic_redist_region, list); + if (!rdreg) + addr_ptr = &undef_value; + else + addr_ptr = &rdreg->base; + break; + } + case KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION: + { + struct vgic_redist_region *rdreg; + u8 index; + + r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3); + if (r) + break; + + index = addr & KVM_VGIC_V3_RDIST_INDEX_MASK; + + if (write) { + gpa_t base = addr & KVM_VGIC_V3_RDIST_BASE_MASK; + u32 count = FIELD_GET(KVM_VGIC_V3_RDIST_COUNT_MASK, addr); + u8 flags = FIELD_GET(KVM_VGIC_V3_RDIST_FLAGS_MASK, addr); + + if (!count || flags) + r = -EINVAL; + else + r = vgic_v3_set_redist_base(kvm, index, + base, count); + goto out; + } + + rdreg = vgic_v3_rdist_region_from_index(kvm, index); + if (!rdreg) { + r = -ENOENT; + goto out; + } + + addr = index; + addr |= rdreg->base; + addr |= (u64)rdreg->count << KVM_VGIC_V3_RDIST_COUNT_SHIFT; + goto out; + } + default: + r = -ENODEV; + } + + if (r) + goto out; + + mutex_lock(&kvm->arch.config_lock); + if (write) { + r = vgic_check_iorange(kvm, *addr_ptr, addr, alignment, size); + if (!r) + *addr_ptr = addr; + } else { + addr = *addr_ptr; + } + mutex_unlock(&kvm->arch.config_lock); + +out: + mutex_unlock(&kvm->slots_lock); + + if (!r && !write) + r = put_user(addr, uaddr); + + return r; +} + +static int vgic_set_common_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + int r; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: + r = kvm_vgic_addr(dev->kvm, attr, true); + return (r == -ENODEV) ? -ENXIO : r; + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u32 val; + int ret = 0; + + if (get_user(val, uaddr)) + return -EFAULT; + + /* + * We require: + * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs + * - at most 1024 interrupts + * - a multiple of 32 interrupts + */ + if (val < (VGIC_NR_PRIVATE_IRQS + 32) || + val > VGIC_MAX_RESERVED || + (val & 31)) + return -EINVAL; + + mutex_lock(&dev->kvm->arch.config_lock); + + if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_spis) + ret = -EBUSY; + else + dev->kvm->arch.vgic.nr_spis = + val - VGIC_NR_PRIVATE_IRQS; + + mutex_unlock(&dev->kvm->arch.config_lock); + + return ret; + } + case KVM_DEV_ARM_VGIC_GRP_CTRL: { + switch (attr->attr) { + case KVM_DEV_ARM_VGIC_CTRL_INIT: + mutex_lock(&dev->kvm->arch.config_lock); + r = vgic_init(dev->kvm); + mutex_unlock(&dev->kvm->arch.config_lock); + return r; + case KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES: + /* + * OK, this one isn't common at all, but we + * want to handle all control group attributes + * in a single place. + */ + if (vgic_check_type(dev->kvm, KVM_DEV_TYPE_ARM_VGIC_V3)) + return -ENXIO; + mutex_lock(&dev->kvm->lock); + + if (!lock_all_vcpus(dev->kvm)) { + mutex_unlock(&dev->kvm->lock); + return -EBUSY; + } + + mutex_lock(&dev->kvm->arch.config_lock); + r = vgic_v3_save_pending_tables(dev->kvm); + mutex_unlock(&dev->kvm->arch.config_lock); + unlock_all_vcpus(dev->kvm); + mutex_unlock(&dev->kvm->lock); + return r; + } + break; + } + } + + return -ENXIO; +} + +static int vgic_get_common_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + int r = -ENXIO; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: + r = kvm_vgic_addr(dev->kvm, attr, false); + return (r == -ENODEV) ? -ENXIO : r; + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + + r = put_user(dev->kvm->arch.vgic.nr_spis + + VGIC_NR_PRIVATE_IRQS, uaddr); + break; + } + } + + return r; +} + +static int vgic_create(struct kvm_device *dev, u32 type) +{ + return kvm_vgic_create(dev->kvm, type); +} + +static void vgic_destroy(struct kvm_device *dev) +{ + kfree(dev); +} + +int kvm_register_vgic_device(unsigned long type) +{ + int ret = -ENODEV; + + switch (type) { + case KVM_DEV_TYPE_ARM_VGIC_V2: + ret = kvm_register_device_ops(&kvm_arm_vgic_v2_ops, + KVM_DEV_TYPE_ARM_VGIC_V2); + break; + case KVM_DEV_TYPE_ARM_VGIC_V3: + ret = kvm_register_device_ops(&kvm_arm_vgic_v3_ops, + KVM_DEV_TYPE_ARM_VGIC_V3); + + if (ret) + break; + ret = kvm_vgic_register_its_device(); + break; + } + + return ret; +} + +int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, + struct vgic_reg_attr *reg_attr) +{ + int cpuid; + + cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >> + KVM_DEV_ARM_VGIC_CPUID_SHIFT; + + if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) + return -EINVAL; + + reg_attr->vcpu = kvm_get_vcpu(dev->kvm, cpuid); + reg_attr->addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; + + return 0; +} + +/** + * vgic_v2_attr_regs_access - allows user space to access VGIC v2 state + * + * @dev: kvm device handle + * @attr: kvm device attribute + * @is_write: true if userspace is writing a register + */ +static int vgic_v2_attr_regs_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + bool is_write) +{ + u32 __user *uaddr = (u32 __user *)(unsigned long)attr->addr; + struct vgic_reg_attr reg_attr; + gpa_t addr; + struct kvm_vcpu *vcpu; + int ret; + u32 val; + + ret = vgic_v2_parse_attr(dev, attr, ®_attr); + if (ret) + return ret; + + vcpu = reg_attr.vcpu; + addr = reg_attr.addr; + + if (is_write) + if (get_user(val, uaddr)) + return -EFAULT; + + mutex_lock(&dev->kvm->lock); + + if (!lock_all_vcpus(dev->kvm)) { + mutex_unlock(&dev->kvm->lock); + return -EBUSY; + } + + mutex_lock(&dev->kvm->arch.config_lock); + + ret = vgic_init(dev->kvm); + if (ret) + goto out; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: + ret = vgic_v2_cpuif_uaccess(vcpu, is_write, addr, &val); + break; + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + ret = vgic_v2_dist_uaccess(vcpu, is_write, addr, &val); + break; + default: + ret = -EINVAL; + break; + } + +out: + mutex_unlock(&dev->kvm->arch.config_lock); + unlock_all_vcpus(dev->kvm); + mutex_unlock(&dev->kvm->lock); + + if (!ret && !is_write) + ret = put_user(val, uaddr); + + return ret; +} + +static int vgic_v2_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: + return vgic_v2_attr_regs_access(dev, attr, true); + default: + return vgic_set_common_attr(dev, attr); + } +} + +static int vgic_v2_get_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: + return vgic_v2_attr_regs_access(dev, attr, false); + default: + return vgic_get_common_attr(dev, attr); + } +} + +static int vgic_v2_has_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: + switch (attr->attr) { + case KVM_VGIC_V2_ADDR_TYPE_DIST: + case KVM_VGIC_V2_ADDR_TYPE_CPU: + return 0; + } + break; + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: + return vgic_v2_has_attr_regs(dev, attr); + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: + return 0; + case KVM_DEV_ARM_VGIC_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_ARM_VGIC_CTRL_INIT: + return 0; + } + } + return -ENXIO; +} + +struct kvm_device_ops kvm_arm_vgic_v2_ops = { + .name = "kvm-arm-vgic-v2", + .create = vgic_create, + .destroy = vgic_destroy, + .set_attr = vgic_v2_set_attr, + .get_attr = vgic_v2_get_attr, + .has_attr = vgic_v2_has_attr, +}; + +int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, + struct vgic_reg_attr *reg_attr) +{ + unsigned long vgic_mpidr, mpidr_reg; + + /* + * For KVM_DEV_ARM_VGIC_GRP_DIST_REGS group, + * attr might not hold MPIDR. Hence assume vcpu0. + */ + if (attr->group != KVM_DEV_ARM_VGIC_GRP_DIST_REGS) { + vgic_mpidr = (attr->attr & KVM_DEV_ARM_VGIC_V3_MPIDR_MASK) >> + KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT; + + mpidr_reg = VGIC_TO_MPIDR(vgic_mpidr); + reg_attr->vcpu = kvm_mpidr_to_vcpu(dev->kvm, mpidr_reg); + } else { + reg_attr->vcpu = kvm_get_vcpu(dev->kvm, 0); + } + + if (!reg_attr->vcpu) + return -EINVAL; + + reg_attr->addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; + + return 0; +} + +/* + * vgic_v3_attr_regs_access - allows user space to access VGIC v3 state + * + * @dev: kvm device handle + * @attr: kvm device attribute + * @is_write: true if userspace is writing a register + */ +static int vgic_v3_attr_regs_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + bool is_write) +{ + struct vgic_reg_attr reg_attr; + gpa_t addr; + struct kvm_vcpu *vcpu; + bool uaccess; + u32 val; + int ret; + + ret = vgic_v3_parse_attr(dev, attr, ®_attr); + if (ret) + return ret; + + vcpu = reg_attr.vcpu; + addr = reg_attr.addr; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: + /* Sysregs uaccess is performed by the sysreg handling code */ + uaccess = false; + break; + default: + uaccess = true; + } + + if (uaccess && is_write) { + u32 __user *uaddr = (u32 __user *)(unsigned long)attr->addr; + if (get_user(val, uaddr)) + return -EFAULT; + } + + mutex_lock(&dev->kvm->lock); + + if (!lock_all_vcpus(dev->kvm)) { + mutex_unlock(&dev->kvm->lock); + return -EBUSY; + } + + mutex_lock(&dev->kvm->arch.config_lock); + + if (unlikely(!vgic_initialized(dev->kvm))) { + ret = -EBUSY; + goto out; + } + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + ret = vgic_v3_dist_uaccess(vcpu, is_write, addr, &val); + break; + case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: + ret = vgic_v3_redist_uaccess(vcpu, is_write, addr, &val); + break; + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: + ret = vgic_v3_cpu_sysregs_uaccess(vcpu, attr, is_write); + break; + case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: { + unsigned int info, intid; + + info = (attr->attr & KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK) >> + KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT; + if (info == VGIC_LEVEL_INFO_LINE_LEVEL) { + intid = attr->attr & + KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK; + ret = vgic_v3_line_level_info_uaccess(vcpu, is_write, + intid, &val); + } else { + ret = -EINVAL; + } + break; + } + default: + ret = -EINVAL; + break; + } + +out: + mutex_unlock(&dev->kvm->arch.config_lock); + unlock_all_vcpus(dev->kvm); + mutex_unlock(&dev->kvm->lock); + + if (!ret && uaccess && !is_write) { + u32 __user *uaddr = (u32 __user *)(unsigned long)attr->addr; + ret = put_user(val, uaddr); + } + + return ret; +} + +static int vgic_v3_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: + case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: + return vgic_v3_attr_regs_access(dev, attr, true); + default: + return vgic_set_common_attr(dev, attr); + } +} + +static int vgic_v3_get_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: + case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: + return vgic_v3_attr_regs_access(dev, attr, false); + default: + return vgic_get_common_attr(dev, attr); + } +} + +static int vgic_v3_has_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: + switch (attr->attr) { + case KVM_VGIC_V3_ADDR_TYPE_DIST: + case KVM_VGIC_V3_ADDR_TYPE_REDIST: + case KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION: + return 0; + } + break; + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: + return vgic_v3_has_attr_regs(dev, attr); + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: + return 0; + case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: { + if (((attr->attr & KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK) >> + KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) == + VGIC_LEVEL_INFO_LINE_LEVEL) + return 0; + break; + } + case KVM_DEV_ARM_VGIC_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_ARM_VGIC_CTRL_INIT: + return 0; + case KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES: + return 0; + } + } + return -ENXIO; +} + +struct kvm_device_ops kvm_arm_vgic_v3_ops = { + .name = "kvm-arm-vgic-v3", + .create = vgic_create, + .destroy = vgic_destroy, + .set_attr = vgic_v3_set_attr, + .get_attr = vgic_v3_get_attr, + .has_attr = vgic_v3_has_attr, +}; diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v2.c b/arch/arm64/kvm/vgic/vgic-mmio-v2.c new file mode 100644 index 0000000000..e070cda86e --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-mmio-v2.c @@ -0,0 +1,561 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VGICv2 MMIO handling functions + */ + +#include <linux/irqchip/arm-gic.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/nospec.h> + +#include <kvm/iodev.h> +#include <kvm/arm_vgic.h> + +#include "vgic.h" +#include "vgic-mmio.h" + +/* + * The Revision field in the IIDR have the following meanings: + * + * Revision 1: Report GICv2 interrupts as group 0 instead of group 1 + * Revision 2: Interrupt groups are guest-configurable and signaled using + * their configured groups. + */ + +static unsigned long vgic_mmio_read_v2_misc(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + struct vgic_dist *vgic = &vcpu->kvm->arch.vgic; + u32 value; + + switch (addr & 0x0c) { + case GIC_DIST_CTRL: + value = vgic->enabled ? GICD_ENABLE : 0; + break; + case GIC_DIST_CTR: + value = vgic->nr_spis + VGIC_NR_PRIVATE_IRQS; + value = (value >> 5) - 1; + value |= (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; + break; + case GIC_DIST_IIDR: + value = (PRODUCT_ID_KVM << GICD_IIDR_PRODUCT_ID_SHIFT) | + (vgic->implementation_rev << GICD_IIDR_REVISION_SHIFT) | + (IMPLEMENTER_ARM << GICD_IIDR_IMPLEMENTER_SHIFT); + break; + default: + return 0; + } + + return value; +} + +static void vgic_mmio_write_v2_misc(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + bool was_enabled = dist->enabled; + + switch (addr & 0x0c) { + case GIC_DIST_CTRL: + dist->enabled = val & GICD_ENABLE; + if (!was_enabled && dist->enabled) + vgic_kick_vcpus(vcpu->kvm); + break; + case GIC_DIST_CTR: + case GIC_DIST_IIDR: + /* Nothing to do */ + return; + } +} + +static int vgic_mmio_uaccess_write_v2_misc(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + u32 reg; + + switch (addr & 0x0c) { + case GIC_DIST_IIDR: + reg = vgic_mmio_read_v2_misc(vcpu, addr, len); + if ((reg ^ val) & ~GICD_IIDR_REVISION_MASK) + return -EINVAL; + + /* + * If we observe a write to GICD_IIDR we know that userspace + * has been updated and has had a chance to cope with older + * kernels (VGICv2 IIDR.Revision == 0) incorrectly reporting + * interrupts as group 1, and therefore we now allow groups to + * be user writable. Doing this by default would break + * migration from old kernels to new kernels with legacy + * userspace. + */ + reg = FIELD_GET(GICD_IIDR_REVISION_MASK, reg); + switch (reg) { + case KVM_VGIC_IMP_REV_2: + case KVM_VGIC_IMP_REV_3: + vcpu->kvm->arch.vgic.v2_groups_user_writable = true; + dist->implementation_rev = reg; + return 0; + default: + return -EINVAL; + } + } + + vgic_mmio_write_v2_misc(vcpu, addr, len, val); + return 0; +} + +static int vgic_mmio_uaccess_write_v2_group(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + if (vcpu->kvm->arch.vgic.v2_groups_user_writable) + vgic_mmio_write_group(vcpu, addr, len, val); + + return 0; +} + +static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + int nr_vcpus = atomic_read(&source_vcpu->kvm->online_vcpus); + int intid = val & 0xf; + int targets = (val >> 16) & 0xff; + int mode = (val >> 24) & 0x03; + struct kvm_vcpu *vcpu; + unsigned long flags, c; + + switch (mode) { + case 0x0: /* as specified by targets */ + break; + case 0x1: + targets = (1U << nr_vcpus) - 1; /* all, ... */ + targets &= ~(1U << source_vcpu->vcpu_id); /* but self */ + break; + case 0x2: /* this very vCPU only */ + targets = (1U << source_vcpu->vcpu_id); + break; + case 0x3: /* reserved */ + return; + } + + kvm_for_each_vcpu(c, vcpu, source_vcpu->kvm) { + struct vgic_irq *irq; + + if (!(targets & (1U << c))) + continue; + + irq = vgic_get_irq(source_vcpu->kvm, vcpu, intid); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->pending_latch = true; + irq->source |= 1U << source_vcpu->vcpu_id; + + vgic_queue_irq_unlock(source_vcpu->kvm, irq, flags); + vgic_put_irq(source_vcpu->kvm, irq); + } +} + +static unsigned long vgic_mmio_read_target(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 8); + int i; + u64 val = 0; + + for (i = 0; i < len; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + val |= (u64)irq->targets << (i * 8); + + vgic_put_irq(vcpu->kvm, irq); + } + + return val; +} + +static void vgic_mmio_write_target(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 8); + u8 cpu_mask = GENMASK(atomic_read(&vcpu->kvm->online_vcpus) - 1, 0); + int i; + unsigned long flags; + + /* GICD_ITARGETSR[0-7] are read-only */ + if (intid < VGIC_NR_PRIVATE_IRQS) + return; + + for (i = 0; i < len; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid + i); + int target; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + irq->targets = (val >> (i * 8)) & cpu_mask; + target = irq->targets ? __ffs(irq->targets) : 0; + irq->target_vcpu = kvm_get_vcpu(vcpu->kvm, target); + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + } +} + +static unsigned long vgic_mmio_read_sgipend(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = addr & 0x0f; + int i; + u64 val = 0; + + for (i = 0; i < len; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + val |= (u64)irq->source << (i * 8); + + vgic_put_irq(vcpu->kvm, irq); + } + return val; +} + +static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = addr & 0x0f; + int i; + unsigned long flags; + + for (i = 0; i < len; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + irq->source &= ~((val >> (i * 8)) & 0xff); + if (!irq->source) + irq->pending_latch = false; + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + } +} + +static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = addr & 0x0f; + int i; + unsigned long flags; + + for (i = 0; i < len; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + irq->source |= (val >> (i * 8)) & 0xff; + + if (irq->source) { + irq->pending_latch = true; + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + } else { + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + } + vgic_put_irq(vcpu->kvm, irq); + } +} + +#define GICC_ARCH_VERSION_V2 0x2 + +/* These are for userland accesses only, there is no guest-facing emulation. */ +static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + struct vgic_vmcr vmcr; + u32 val; + + vgic_get_vmcr(vcpu, &vmcr); + + switch (addr & 0xff) { + case GIC_CPU_CTRL: + val = vmcr.grpen0 << GIC_CPU_CTRL_EnableGrp0_SHIFT; + val |= vmcr.grpen1 << GIC_CPU_CTRL_EnableGrp1_SHIFT; + val |= vmcr.ackctl << GIC_CPU_CTRL_AckCtl_SHIFT; + val |= vmcr.fiqen << GIC_CPU_CTRL_FIQEn_SHIFT; + val |= vmcr.cbpr << GIC_CPU_CTRL_CBPR_SHIFT; + val |= vmcr.eoim << GIC_CPU_CTRL_EOImodeNS_SHIFT; + + break; + case GIC_CPU_PRIMASK: + /* + * Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the + * PMR field as GICH_VMCR.VMPriMask rather than + * GICC_PMR.Priority, so we expose the upper five bits of + * priority mask to userspace using the lower bits in the + * unsigned long. + */ + val = (vmcr.pmr & GICV_PMR_PRIORITY_MASK) >> + GICV_PMR_PRIORITY_SHIFT; + break; + case GIC_CPU_BINPOINT: + val = vmcr.bpr; + break; + case GIC_CPU_ALIAS_BINPOINT: + val = vmcr.abpr; + break; + case GIC_CPU_IDENT: + val = ((PRODUCT_ID_KVM << 20) | + (GICC_ARCH_VERSION_V2 << 16) | + IMPLEMENTER_ARM); + break; + default: + return 0; + } + + return val; +} + +static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_vmcr vmcr; + + vgic_get_vmcr(vcpu, &vmcr); + + switch (addr & 0xff) { + case GIC_CPU_CTRL: + vmcr.grpen0 = !!(val & GIC_CPU_CTRL_EnableGrp0); + vmcr.grpen1 = !!(val & GIC_CPU_CTRL_EnableGrp1); + vmcr.ackctl = !!(val & GIC_CPU_CTRL_AckCtl); + vmcr.fiqen = !!(val & GIC_CPU_CTRL_FIQEn); + vmcr.cbpr = !!(val & GIC_CPU_CTRL_CBPR); + vmcr.eoim = !!(val & GIC_CPU_CTRL_EOImodeNS); + + break; + case GIC_CPU_PRIMASK: + /* + * Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the + * PMR field as GICH_VMCR.VMPriMask rather than + * GICC_PMR.Priority, so we expose the upper five bits of + * priority mask to userspace using the lower bits in the + * unsigned long. + */ + vmcr.pmr = (val << GICV_PMR_PRIORITY_SHIFT) & + GICV_PMR_PRIORITY_MASK; + break; + case GIC_CPU_BINPOINT: + vmcr.bpr = val; + break; + case GIC_CPU_ALIAS_BINPOINT: + vmcr.abpr = val; + break; + } + + vgic_set_vmcr(vcpu, &vmcr); +} + +static unsigned long vgic_mmio_read_apr(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + int n; /* which APRn is this */ + + n = (addr >> 2) & 0x3; + + if (kvm_vgic_global_state.type == VGIC_V2) { + /* GICv2 hardware systems support max. 32 groups */ + if (n != 0) + return 0; + return vcpu->arch.vgic_cpu.vgic_v2.vgic_apr; + } else { + struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3; + + if (n > vgic_v3_max_apr_idx(vcpu)) + return 0; + + n = array_index_nospec(n, 4); + + /* GICv3 only uses ICH_AP1Rn for memory mapped (GICv2) guests */ + return vgicv3->vgic_ap1r[n]; + } +} + +static void vgic_mmio_write_apr(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + int n; /* which APRn is this */ + + n = (addr >> 2) & 0x3; + + if (kvm_vgic_global_state.type == VGIC_V2) { + /* GICv2 hardware systems support max. 32 groups */ + if (n != 0) + return; + vcpu->arch.vgic_cpu.vgic_v2.vgic_apr = val; + } else { + struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3; + + if (n > vgic_v3_max_apr_idx(vcpu)) + return; + + n = array_index_nospec(n, 4); + + /* GICv3 only uses ICH_AP1Rn for memory mapped (GICv2) guests */ + vgicv3->vgic_ap1r[n] = val; + } +} + +static const struct vgic_register_region vgic_v2_dist_registers[] = { + REGISTER_DESC_WITH_LENGTH_UACCESS(GIC_DIST_CTRL, + vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, + NULL, vgic_mmio_uaccess_write_v2_misc, + 12, VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_IGROUP, + vgic_mmio_read_group, vgic_mmio_write_group, + NULL, vgic_mmio_uaccess_write_v2_group, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_SET, + vgic_mmio_read_enable, vgic_mmio_write_senable, + NULL, vgic_uaccess_write_senable, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_CLEAR, + vgic_mmio_read_enable, vgic_mmio_write_cenable, + NULL, vgic_uaccess_write_cenable, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET, + vgic_mmio_read_pending, vgic_mmio_write_spending, + vgic_uaccess_read_pending, vgic_uaccess_write_spending, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR, + vgic_mmio_read_pending, vgic_mmio_write_cpending, + vgic_uaccess_read_pending, vgic_uaccess_write_cpending, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET, + vgic_mmio_read_active, vgic_mmio_write_sactive, + vgic_uaccess_read_active, vgic_mmio_uaccess_write_sactive, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_CLEAR, + vgic_mmio_read_active, vgic_mmio_write_cactive, + vgic_uaccess_read_active, vgic_mmio_uaccess_write_cactive, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PRI, + vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL, + 8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_TARGET, + vgic_mmio_read_target, vgic_mmio_write_target, NULL, NULL, 8, + VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_CONFIG, + vgic_mmio_read_config, vgic_mmio_write_config, NULL, NULL, 2, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GIC_DIST_SOFTINT, + vgic_mmio_read_raz, vgic_mmio_write_sgir, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GIC_DIST_SGI_PENDING_CLEAR, + vgic_mmio_read_sgipend, vgic_mmio_write_sgipendc, 16, + VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), + REGISTER_DESC_WITH_LENGTH(GIC_DIST_SGI_PENDING_SET, + vgic_mmio_read_sgipend, vgic_mmio_write_sgipends, 16, + VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), +}; + +static const struct vgic_register_region vgic_v2_cpu_registers[] = { + REGISTER_DESC_WITH_LENGTH(GIC_CPU_CTRL, + vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GIC_CPU_PRIMASK, + vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GIC_CPU_BINPOINT, + vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GIC_CPU_ALIAS_BINPOINT, + vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GIC_CPU_ACTIVEPRIO, + vgic_mmio_read_apr, vgic_mmio_write_apr, 16, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GIC_CPU_IDENT, + vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, + VGIC_ACCESS_32bit), +}; + +unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev) +{ + dev->regions = vgic_v2_dist_registers; + dev->nr_regions = ARRAY_SIZE(vgic_v2_dist_registers); + + kvm_iodevice_init(&dev->dev, &kvm_io_gic_ops); + + return SZ_4K; +} + +int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + const struct vgic_register_region *region; + struct vgic_io_device iodev; + struct vgic_reg_attr reg_attr; + struct kvm_vcpu *vcpu; + gpa_t addr; + int ret; + + ret = vgic_v2_parse_attr(dev, attr, ®_attr); + if (ret) + return ret; + + vcpu = reg_attr.vcpu; + addr = reg_attr.addr; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + iodev.regions = vgic_v2_dist_registers; + iodev.nr_regions = ARRAY_SIZE(vgic_v2_dist_registers); + iodev.base_addr = 0; + break; + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: + iodev.regions = vgic_v2_cpu_registers; + iodev.nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers); + iodev.base_addr = 0; + break; + default: + return -ENXIO; + } + + /* We only support aligned 32-bit accesses. */ + if (addr & 3) + return -ENXIO; + + region = vgic_get_mmio_region(vcpu, &iodev, addr, sizeof(u32)); + if (!region) + return -ENXIO; + + return 0; +} + +int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write, + int offset, u32 *val) +{ + struct vgic_io_device dev = { + .regions = vgic_v2_cpu_registers, + .nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers), + .iodev_type = IODEV_CPUIF, + }; + + return vgic_uaccess(vcpu, &dev, is_write, offset, val); +} + +int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, + int offset, u32 *val) +{ + struct vgic_io_device dev = { + .regions = vgic_v2_dist_registers, + .nr_regions = ARRAY_SIZE(vgic_v2_dist_registers), + .iodev_type = IODEV_DIST, + }; + + return vgic_uaccess(vcpu, &dev, is_write, offset, val); +} diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c new file mode 100644 index 0000000000..ae5a3a7176 --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -0,0 +1,1186 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VGICv3 MMIO handling functions + */ + +#include <linux/bitfield.h> +#include <linux/irqchip/arm-gic-v3.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/interrupt.h> +#include <kvm/iodev.h> +#include <kvm/arm_vgic.h> + +#include <asm/kvm_emulate.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_mmu.h> + +#include "vgic.h" +#include "vgic-mmio.h" + +/* extract @num bytes at @offset bytes offset in data */ +unsigned long extract_bytes(u64 data, unsigned int offset, + unsigned int num) +{ + return (data >> (offset * 8)) & GENMASK_ULL(num * 8 - 1, 0); +} + +/* allows updates of any half of a 64-bit register (or the whole thing) */ +u64 update_64bit_reg(u64 reg, unsigned int offset, unsigned int len, + unsigned long val) +{ + int lower = (offset & 4) * 8; + int upper = lower + 8 * len - 1; + + reg &= ~GENMASK_ULL(upper, lower); + val &= GENMASK_ULL(len * 8 - 1, 0); + + return reg | ((u64)val << lower); +} + +bool vgic_has_its(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + + if (dist->vgic_model != KVM_DEV_TYPE_ARM_VGIC_V3) + return false; + + return dist->has_its; +} + +bool vgic_supports_direct_msis(struct kvm *kvm) +{ + return (kvm_vgic_global_state.has_gicv4_1 || + (kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm))); +} + +/* + * The Revision field in the IIDR have the following meanings: + * + * Revision 2: Interrupt groups are guest-configurable and signaled using + * their configured groups. + */ + +static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + struct vgic_dist *vgic = &vcpu->kvm->arch.vgic; + u32 value = 0; + + switch (addr & 0x0c) { + case GICD_CTLR: + if (vgic->enabled) + value |= GICD_CTLR_ENABLE_SS_G1; + value |= GICD_CTLR_ARE_NS | GICD_CTLR_DS; + if (vgic->nassgireq) + value |= GICD_CTLR_nASSGIreq; + break; + case GICD_TYPER: + value = vgic->nr_spis + VGIC_NR_PRIVATE_IRQS; + value = (value >> 5) - 1; + if (vgic_has_its(vcpu->kvm)) { + value |= (INTERRUPT_ID_BITS_ITS - 1) << 19; + value |= GICD_TYPER_LPIS; + } else { + value |= (INTERRUPT_ID_BITS_SPIS - 1) << 19; + } + break; + case GICD_TYPER2: + if (kvm_vgic_global_state.has_gicv4_1 && gic_cpuif_has_vsgi()) + value = GICD_TYPER2_nASSGIcap; + break; + case GICD_IIDR: + value = (PRODUCT_ID_KVM << GICD_IIDR_PRODUCT_ID_SHIFT) | + (vgic->implementation_rev << GICD_IIDR_REVISION_SHIFT) | + (IMPLEMENTER_ARM << GICD_IIDR_IMPLEMENTER_SHIFT); + break; + default: + return 0; + } + + return value; +} + +static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + switch (addr & 0x0c) { + case GICD_CTLR: { + bool was_enabled, is_hwsgi; + + mutex_lock(&vcpu->kvm->arch.config_lock); + + was_enabled = dist->enabled; + is_hwsgi = dist->nassgireq; + + dist->enabled = val & GICD_CTLR_ENABLE_SS_G1; + + /* Not a GICv4.1? No HW SGIs */ + if (!kvm_vgic_global_state.has_gicv4_1 || !gic_cpuif_has_vsgi()) + val &= ~GICD_CTLR_nASSGIreq; + + /* Dist stays enabled? nASSGIreq is RO */ + if (was_enabled && dist->enabled) { + val &= ~GICD_CTLR_nASSGIreq; + val |= FIELD_PREP(GICD_CTLR_nASSGIreq, is_hwsgi); + } + + /* Switching HW SGIs? */ + dist->nassgireq = val & GICD_CTLR_nASSGIreq; + if (is_hwsgi != dist->nassgireq) + vgic_v4_configure_vsgis(vcpu->kvm); + + if (kvm_vgic_global_state.has_gicv4_1 && + was_enabled != dist->enabled) + kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_RELOAD_GICv4); + else if (!was_enabled && dist->enabled) + vgic_kick_vcpus(vcpu->kvm); + + mutex_unlock(&vcpu->kvm->arch.config_lock); + break; + } + case GICD_TYPER: + case GICD_TYPER2: + case GICD_IIDR: + /* This is at best for documentation purposes... */ + return; + } +} + +static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + u32 reg; + + switch (addr & 0x0c) { + case GICD_TYPER2: + if (val != vgic_mmio_read_v3_misc(vcpu, addr, len)) + return -EINVAL; + return 0; + case GICD_IIDR: + reg = vgic_mmio_read_v3_misc(vcpu, addr, len); + if ((reg ^ val) & ~GICD_IIDR_REVISION_MASK) + return -EINVAL; + + reg = FIELD_GET(GICD_IIDR_REVISION_MASK, reg); + switch (reg) { + case KVM_VGIC_IMP_REV_2: + case KVM_VGIC_IMP_REV_3: + dist->implementation_rev = reg; + return 0; + default: + return -EINVAL; + } + case GICD_CTLR: + /* Not a GICv4.1? No HW SGIs */ + if (!kvm_vgic_global_state.has_gicv4_1) + val &= ~GICD_CTLR_nASSGIreq; + + dist->enabled = val & GICD_CTLR_ENABLE_SS_G1; + dist->nassgireq = val & GICD_CTLR_nASSGIreq; + return 0; + } + + vgic_mmio_write_v3_misc(vcpu, addr, len, val); + return 0; +} + +static unsigned long vgic_mmio_read_irouter(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + int intid = VGIC_ADDR_TO_INTID(addr, 64); + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid); + unsigned long ret = 0; + + if (!irq) + return 0; + + /* The upper word is RAZ for us. */ + if (!(addr & 4)) + ret = extract_bytes(READ_ONCE(irq->mpidr), addr & 7, len); + + vgic_put_irq(vcpu->kvm, irq); + return ret; +} + +static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + int intid = VGIC_ADDR_TO_INTID(addr, 64); + struct vgic_irq *irq; + unsigned long flags; + + /* The upper word is WI for us since we don't implement Aff3. */ + if (addr & 4) + return; + + irq = vgic_get_irq(vcpu->kvm, NULL, intid); + + if (!irq) + return; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + /* We only care about and preserve Aff0, Aff1 and Aff2. */ + irq->mpidr = val & GENMASK(23, 0); + irq->target_vcpu = kvm_mpidr_to_vcpu(vcpu->kvm, irq->mpidr); + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); +} + +bool vgic_lpis_enabled(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + return atomic_read(&vgic_cpu->ctlr) == GICR_CTLR_ENABLE_LPIS; +} + +static unsigned long vgic_mmio_read_v3r_ctlr(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + unsigned long val; + + val = atomic_read(&vgic_cpu->ctlr); + if (vgic_get_implementation_rev(vcpu) >= KVM_VGIC_IMP_REV_3) + val |= GICR_CTLR_IR | GICR_CTLR_CES; + + return val; +} + +static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u32 ctlr; + + if (!vgic_has_its(vcpu->kvm)) + return; + + if (!(val & GICR_CTLR_ENABLE_LPIS)) { + /* + * Don't disable if RWP is set, as there already an + * ongoing disable. Funky guest... + */ + ctlr = atomic_cmpxchg_acquire(&vgic_cpu->ctlr, + GICR_CTLR_ENABLE_LPIS, + GICR_CTLR_RWP); + if (ctlr != GICR_CTLR_ENABLE_LPIS) + return; + + vgic_flush_pending_lpis(vcpu); + vgic_its_invalidate_cache(vcpu->kvm); + atomic_set_release(&vgic_cpu->ctlr, 0); + } else { + ctlr = atomic_cmpxchg_acquire(&vgic_cpu->ctlr, 0, + GICR_CTLR_ENABLE_LPIS); + if (ctlr != 0) + return; + + vgic_enable_lpis(vcpu); + } +} + +static bool vgic_mmio_vcpu_rdist_is_last(struct kvm_vcpu *vcpu) +{ + struct vgic_dist *vgic = &vcpu->kvm->arch.vgic; + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_redist_region *iter, *rdreg = vgic_cpu->rdreg; + + if (!rdreg) + return false; + + if (vgic_cpu->rdreg_index < rdreg->free_index - 1) { + return false; + } else if (rdreg->count && vgic_cpu->rdreg_index == (rdreg->count - 1)) { + struct list_head *rd_regions = &vgic->rd_regions; + gpa_t end = rdreg->base + rdreg->count * KVM_VGIC_V3_REDIST_SIZE; + + /* + * the rdist is the last one of the redist region, + * check whether there is no other contiguous rdist region + */ + list_for_each_entry(iter, rd_regions, list) { + if (iter->base == end && iter->free_index > 0) + return false; + } + } + return true; +} + +static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu); + int target_vcpu_id = vcpu->vcpu_id; + u64 value; + + value = (u64)(mpidr & GENMASK(23, 0)) << 32; + value |= ((target_vcpu_id & 0xffff) << 8); + + if (vgic_has_its(vcpu->kvm)) + value |= GICR_TYPER_PLPIS; + + if (vgic_mmio_vcpu_rdist_is_last(vcpu)) + value |= GICR_TYPER_LAST; + + return extract_bytes(value, addr & 7, len); +} + +static unsigned long vgic_mmio_read_v3r_iidr(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); +} + +static unsigned long vgic_mmio_read_v3_idregs(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + switch (addr & 0xffff) { + case GICD_PIDR2: + /* report a GICv3 compliant implementation */ + return 0x3b; + } + + return 0; +} + +static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for (i = 0; i < len * 8; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + /* + * pending_latch is set irrespective of irq type + * (level or edge) to avoid dependency that VM should + * restore irq config before pending info. + */ + irq->pending_latch = test_bit(i, &val); + + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + irq->pending_latch); + irq->pending_latch = false; + } + + if (irq->pending_latch) + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + else + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + vgic_put_irq(vcpu->kvm, irq); + } + + return 0; +} + +/* We want to avoid outer shareable. */ +u64 vgic_sanitise_shareability(u64 field) +{ + switch (field) { + case GIC_BASER_OuterShareable: + return GIC_BASER_InnerShareable; + default: + return field; + } +} + +/* Avoid any inner non-cacheable mapping. */ +u64 vgic_sanitise_inner_cacheability(u64 field) +{ + switch (field) { + case GIC_BASER_CACHE_nCnB: + case GIC_BASER_CACHE_nC: + return GIC_BASER_CACHE_RaWb; + default: + return field; + } +} + +/* Non-cacheable or same-as-inner are OK. */ +u64 vgic_sanitise_outer_cacheability(u64 field) +{ + switch (field) { + case GIC_BASER_CACHE_SameAsInner: + case GIC_BASER_CACHE_nC: + return field; + default: + return GIC_BASER_CACHE_SameAsInner; + } +} + +u64 vgic_sanitise_field(u64 reg, u64 field_mask, int field_shift, + u64 (*sanitise_fn)(u64)) +{ + u64 field = (reg & field_mask) >> field_shift; + + field = sanitise_fn(field) << field_shift; + return (reg & ~field_mask) | field; +} + +#define PROPBASER_RES0_MASK \ + (GENMASK_ULL(63, 59) | GENMASK_ULL(55, 52) | GENMASK_ULL(6, 5)) +#define PENDBASER_RES0_MASK \ + (BIT_ULL(63) | GENMASK_ULL(61, 59) | GENMASK_ULL(55, 52) | \ + GENMASK_ULL(15, 12) | GENMASK_ULL(6, 0)) + +static u64 vgic_sanitise_pendbaser(u64 reg) +{ + reg = vgic_sanitise_field(reg, GICR_PENDBASER_SHAREABILITY_MASK, + GICR_PENDBASER_SHAREABILITY_SHIFT, + vgic_sanitise_shareability); + reg = vgic_sanitise_field(reg, GICR_PENDBASER_INNER_CACHEABILITY_MASK, + GICR_PENDBASER_INNER_CACHEABILITY_SHIFT, + vgic_sanitise_inner_cacheability); + reg = vgic_sanitise_field(reg, GICR_PENDBASER_OUTER_CACHEABILITY_MASK, + GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT, + vgic_sanitise_outer_cacheability); + + reg &= ~PENDBASER_RES0_MASK; + + return reg; +} + +static u64 vgic_sanitise_propbaser(u64 reg) +{ + reg = vgic_sanitise_field(reg, GICR_PROPBASER_SHAREABILITY_MASK, + GICR_PROPBASER_SHAREABILITY_SHIFT, + vgic_sanitise_shareability); + reg = vgic_sanitise_field(reg, GICR_PROPBASER_INNER_CACHEABILITY_MASK, + GICR_PROPBASER_INNER_CACHEABILITY_SHIFT, + vgic_sanitise_inner_cacheability); + reg = vgic_sanitise_field(reg, GICR_PROPBASER_OUTER_CACHEABILITY_MASK, + GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT, + vgic_sanitise_outer_cacheability); + + reg &= ~PROPBASER_RES0_MASK; + return reg; +} + +static unsigned long vgic_mmio_read_propbase(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + return extract_bytes(dist->propbaser, addr & 7, len); +} + +static void vgic_mmio_write_propbase(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + u64 old_propbaser, propbaser; + + /* Storing a value with LPIs already enabled is undefined */ + if (vgic_lpis_enabled(vcpu)) + return; + + do { + old_propbaser = READ_ONCE(dist->propbaser); + propbaser = old_propbaser; + propbaser = update_64bit_reg(propbaser, addr & 4, len, val); + propbaser = vgic_sanitise_propbaser(propbaser); + } while (cmpxchg64(&dist->propbaser, old_propbaser, + propbaser) != old_propbaser); +} + +static unsigned long vgic_mmio_read_pendbase(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u64 value = vgic_cpu->pendbaser; + + value &= ~GICR_PENDBASER_PTZ; + + return extract_bytes(value, addr & 7, len); +} + +static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u64 old_pendbaser, pendbaser; + + /* Storing a value with LPIs already enabled is undefined */ + if (vgic_lpis_enabled(vcpu)) + return; + + do { + old_pendbaser = READ_ONCE(vgic_cpu->pendbaser); + pendbaser = old_pendbaser; + pendbaser = update_64bit_reg(pendbaser, addr & 4, len, val); + pendbaser = vgic_sanitise_pendbaser(pendbaser); + } while (cmpxchg64(&vgic_cpu->pendbaser, old_pendbaser, + pendbaser) != old_pendbaser); +} + +static unsigned long vgic_mmio_read_sync(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return !!atomic_read(&vcpu->arch.vgic_cpu.syncr_busy); +} + +static void vgic_set_rdist_busy(struct kvm_vcpu *vcpu, bool busy) +{ + if (busy) { + atomic_inc(&vcpu->arch.vgic_cpu.syncr_busy); + smp_mb__after_atomic(); + } else { + smp_mb__before_atomic(); + atomic_dec(&vcpu->arch.vgic_cpu.syncr_busy); + } +} + +static void vgic_mmio_write_invlpi(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_irq *irq; + + /* + * If the guest wrote only to the upper 32bit part of the + * register, drop the write on the floor, as it is only for + * vPEs (which we don't support for obvious reasons). + * + * Also discard the access if LPIs are not enabled. + */ + if ((addr & 4) || !vgic_lpis_enabled(vcpu)) + return; + + vgic_set_rdist_busy(vcpu, true); + + irq = vgic_get_irq(vcpu->kvm, NULL, lower_32_bits(val)); + if (irq) { + vgic_its_inv_lpi(vcpu->kvm, irq); + vgic_put_irq(vcpu->kvm, irq); + } + + vgic_set_rdist_busy(vcpu, false); +} + +static void vgic_mmio_write_invall(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + /* See vgic_mmio_write_invlpi() for the early return rationale */ + if ((addr & 4) || !vgic_lpis_enabled(vcpu)) + return; + + vgic_set_rdist_busy(vcpu, true); + vgic_its_invall(vcpu); + vgic_set_rdist_busy(vcpu, false); +} + +/* + * The GICv3 per-IRQ registers are split to control PPIs and SGIs in the + * redistributors, while SPIs are covered by registers in the distributor + * block. Trying to set private IRQs in this block gets ignored. + * We take some special care here to fix the calculation of the register + * offset. + */ +#define REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(off, rd, wr, ur, uw, bpi, acc) \ + { \ + .reg_offset = off, \ + .bits_per_irq = bpi, \ + .len = (bpi * VGIC_NR_PRIVATE_IRQS) / 8, \ + .access_flags = acc, \ + .read = vgic_mmio_read_raz, \ + .write = vgic_mmio_write_wi, \ + }, { \ + .reg_offset = off + (bpi * VGIC_NR_PRIVATE_IRQS) / 8, \ + .bits_per_irq = bpi, \ + .len = (bpi * (1024 - VGIC_NR_PRIVATE_IRQS)) / 8, \ + .access_flags = acc, \ + .read = rd, \ + .write = wr, \ + .uaccess_read = ur, \ + .uaccess_write = uw, \ + } + +static const struct vgic_register_region vgic_v3_dist_registers[] = { + REGISTER_DESC_WITH_LENGTH_UACCESS(GICD_CTLR, + vgic_mmio_read_v3_misc, vgic_mmio_write_v3_misc, + NULL, vgic_mmio_uaccess_write_v3_misc, + 16, VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICD_STATUSR, + vgic_mmio_read_rao, vgic_mmio_write_wi, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGROUPR, + vgic_mmio_read_group, vgic_mmio_write_group, NULL, NULL, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISENABLER, + vgic_mmio_read_enable, vgic_mmio_write_senable, + NULL, vgic_uaccess_write_senable, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICENABLER, + vgic_mmio_read_enable, vgic_mmio_write_cenable, + NULL, vgic_uaccess_write_cenable, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR, + vgic_mmio_read_pending, vgic_mmio_write_spending, + vgic_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR, + vgic_mmio_read_pending, vgic_mmio_write_cpending, + vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER, + vgic_mmio_read_active, vgic_mmio_write_sactive, + vgic_uaccess_read_active, vgic_mmio_uaccess_write_sactive, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICACTIVER, + vgic_mmio_read_active, vgic_mmio_write_cactive, + vgic_uaccess_read_active, vgic_mmio_uaccess_write_cactive, + 1, VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IPRIORITYR, + vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL, + 8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ITARGETSR, + vgic_mmio_read_raz, vgic_mmio_write_wi, NULL, NULL, 8, + VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICFGR, + vgic_mmio_read_config, vgic_mmio_write_config, NULL, NULL, 2, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGRPMODR, + vgic_mmio_read_raz, vgic_mmio_write_wi, NULL, NULL, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IROUTER, + vgic_mmio_read_irouter, vgic_mmio_write_irouter, NULL, NULL, 64, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICD_IDREGS, + vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48, + VGIC_ACCESS_32bit), +}; + +static const struct vgic_register_region vgic_v3_rd_registers[] = { + /* RD_base registers */ + REGISTER_DESC_WITH_LENGTH(GICR_CTLR, + vgic_mmio_read_v3r_ctlr, vgic_mmio_write_v3r_ctlr, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICR_STATUSR, + vgic_mmio_read_raz, vgic_mmio_write_wi, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICR_IIDR, + vgic_mmio_read_v3r_iidr, vgic_mmio_write_wi, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_TYPER, + vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, + NULL, vgic_mmio_uaccess_write_wi, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICR_WAKER, + vgic_mmio_read_raz, vgic_mmio_write_wi, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICR_PROPBASER, + vgic_mmio_read_propbase, vgic_mmio_write_propbase, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICR_PENDBASER, + vgic_mmio_read_pendbase, vgic_mmio_write_pendbase, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICR_INVLPIR, + vgic_mmio_read_raz, vgic_mmio_write_invlpi, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICR_INVALLR, + vgic_mmio_read_raz, vgic_mmio_write_invall, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICR_SYNCR, + vgic_mmio_read_sync, vgic_mmio_write_wi, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICR_IDREGS, + vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48, + VGIC_ACCESS_32bit), + /* SGI_base registers */ + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IGROUPR0, + vgic_mmio_read_group, vgic_mmio_write_group, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISENABLER0, + vgic_mmio_read_enable, vgic_mmio_write_senable, + NULL, vgic_uaccess_write_senable, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICENABLER0, + vgic_mmio_read_enable, vgic_mmio_write_cenable, + NULL, vgic_uaccess_write_cenable, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISPENDR0, + vgic_mmio_read_pending, vgic_mmio_write_spending, + vgic_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICPENDR0, + vgic_mmio_read_pending, vgic_mmio_write_cpending, + vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISACTIVER0, + vgic_mmio_read_active, vgic_mmio_write_sactive, + vgic_uaccess_read_active, vgic_mmio_uaccess_write_sactive, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICACTIVER0, + vgic_mmio_read_active, vgic_mmio_write_cactive, + vgic_uaccess_read_active, vgic_mmio_uaccess_write_cactive, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IPRIORITYR0, + vgic_mmio_read_priority, vgic_mmio_write_priority, 32, + VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ICFGR0, + vgic_mmio_read_config, vgic_mmio_write_config, 8, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IGRPMODR0, + vgic_mmio_read_raz, vgic_mmio_write_wi, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_NSACR, + vgic_mmio_read_raz, vgic_mmio_write_wi, 4, + VGIC_ACCESS_32bit), +}; + +unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev) +{ + dev->regions = vgic_v3_dist_registers; + dev->nr_regions = ARRAY_SIZE(vgic_v3_dist_registers); + + kvm_iodevice_init(&dev->dev, &kvm_io_gic_ops); + + return SZ_64K; +} + +/** + * vgic_register_redist_iodev - register a single redist iodev + * @vcpu: The VCPU to which the redistributor belongs + * + * Register a KVM iodev for this VCPU's redistributor using the address + * provided. + * + * Return 0 on success, -ERRNO otherwise. + */ +int vgic_register_redist_iodev(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + struct vgic_dist *vgic = &kvm->arch.vgic; + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev; + struct vgic_redist_region *rdreg; + gpa_t rd_base; + int ret = 0; + + lockdep_assert_held(&kvm->slots_lock); + mutex_lock(&kvm->arch.config_lock); + + if (!IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) + goto out_unlock; + + /* + * We may be creating VCPUs before having set the base address for the + * redistributor region, in which case we will come back to this + * function for all VCPUs when the base address is set. Just return + * without doing any work for now. + */ + rdreg = vgic_v3_rdist_free_slot(&vgic->rd_regions); + if (!rdreg) + goto out_unlock; + + if (!vgic_v3_check_base(kvm)) { + ret = -EINVAL; + goto out_unlock; + } + + vgic_cpu->rdreg = rdreg; + vgic_cpu->rdreg_index = rdreg->free_index; + + rd_base = rdreg->base + rdreg->free_index * KVM_VGIC_V3_REDIST_SIZE; + + kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops); + rd_dev->base_addr = rd_base; + rd_dev->iodev_type = IODEV_REDIST; + rd_dev->regions = vgic_v3_rd_registers; + rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rd_registers); + rd_dev->redist_vcpu = vcpu; + + mutex_unlock(&kvm->arch.config_lock); + + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base, + 2 * SZ_64K, &rd_dev->dev); + if (ret) + return ret; + + /* Protected by slots_lock */ + rdreg->free_index++; + return 0; + +out_unlock: + mutex_unlock(&kvm->arch.config_lock); + return ret; +} + +void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu) +{ + struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev; + + kvm_io_bus_unregister_dev(vcpu->kvm, KVM_MMIO_BUS, &rd_dev->dev); +} + +static int vgic_register_all_redist_iodevs(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + unsigned long c; + int ret = 0; + + kvm_for_each_vcpu(c, vcpu, kvm) { + ret = vgic_register_redist_iodev(vcpu); + if (ret) + break; + } + + if (ret) { + /* The current c failed, so iterate over the previous ones. */ + int i; + + for (i = 0; i < c; i++) { + vcpu = kvm_get_vcpu(kvm, i); + vgic_unregister_redist_iodev(vcpu); + } + } + + return ret; +} + +/** + * vgic_v3_alloc_redist_region - Allocate a new redistributor region + * + * Performs various checks before inserting the rdist region in the list. + * Those tests depend on whether the size of the rdist region is known + * (ie. count != 0). The list is sorted by rdist region index. + * + * @kvm: kvm handle + * @index: redist region index + * @base: base of the new rdist region + * @count: number of redistributors the region is made of (0 in the old style + * single region, whose size is induced from the number of vcpus) + * + * Return 0 on success, < 0 otherwise + */ +static int vgic_v3_alloc_redist_region(struct kvm *kvm, uint32_t index, + gpa_t base, uint32_t count) +{ + struct vgic_dist *d = &kvm->arch.vgic; + struct vgic_redist_region *rdreg; + struct list_head *rd_regions = &d->rd_regions; + int nr_vcpus = atomic_read(&kvm->online_vcpus); + size_t size = count ? count * KVM_VGIC_V3_REDIST_SIZE + : nr_vcpus * KVM_VGIC_V3_REDIST_SIZE; + int ret; + + /* cross the end of memory ? */ + if (base + size < base) + return -EINVAL; + + if (list_empty(rd_regions)) { + if (index != 0) + return -EINVAL; + } else { + rdreg = list_last_entry(rd_regions, + struct vgic_redist_region, list); + + /* Don't mix single region and discrete redist regions */ + if (!count && rdreg->count) + return -EINVAL; + + if (!count) + return -EEXIST; + + if (index != rdreg->index + 1) + return -EINVAL; + } + + /* + * For legacy single-region redistributor regions (!count), + * check that the redistributor region does not overlap with the + * distributor's address space. + */ + if (!count && !IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) && + vgic_dist_overlap(kvm, base, size)) + return -EINVAL; + + /* collision with any other rdist region? */ + if (vgic_v3_rdist_overlap(kvm, base, size)) + return -EINVAL; + + rdreg = kzalloc(sizeof(*rdreg), GFP_KERNEL_ACCOUNT); + if (!rdreg) + return -ENOMEM; + + rdreg->base = VGIC_ADDR_UNDEF; + + ret = vgic_check_iorange(kvm, rdreg->base, base, SZ_64K, size); + if (ret) + goto free; + + rdreg->base = base; + rdreg->count = count; + rdreg->free_index = 0; + rdreg->index = index; + + list_add_tail(&rdreg->list, rd_regions); + return 0; +free: + kfree(rdreg); + return ret; +} + +void vgic_v3_free_redist_region(struct vgic_redist_region *rdreg) +{ + list_del(&rdreg->list); + kfree(rdreg); +} + +int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count) +{ + int ret; + + mutex_lock(&kvm->arch.config_lock); + ret = vgic_v3_alloc_redist_region(kvm, index, addr, count); + mutex_unlock(&kvm->arch.config_lock); + if (ret) + return ret; + + /* + * Register iodevs for each existing VCPU. Adding more VCPUs + * afterwards will register the iodevs when needed. + */ + ret = vgic_register_all_redist_iodevs(kvm); + if (ret) { + struct vgic_redist_region *rdreg; + + mutex_lock(&kvm->arch.config_lock); + rdreg = vgic_v3_rdist_region_from_index(kvm, index); + vgic_v3_free_redist_region(rdreg); + mutex_unlock(&kvm->arch.config_lock); + return ret; + } + + return 0; +} + +int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + const struct vgic_register_region *region; + struct vgic_io_device iodev; + struct vgic_reg_attr reg_attr; + struct kvm_vcpu *vcpu; + gpa_t addr; + int ret; + + ret = vgic_v3_parse_attr(dev, attr, ®_attr); + if (ret) + return ret; + + vcpu = reg_attr.vcpu; + addr = reg_attr.addr; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + iodev.regions = vgic_v3_dist_registers; + iodev.nr_regions = ARRAY_SIZE(vgic_v3_dist_registers); + iodev.base_addr = 0; + break; + case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:{ + iodev.regions = vgic_v3_rd_registers; + iodev.nr_regions = ARRAY_SIZE(vgic_v3_rd_registers); + iodev.base_addr = 0; + break; + } + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: + return vgic_v3_has_cpu_sysregs_attr(vcpu, attr); + default: + return -ENXIO; + } + + /* We only support aligned 32-bit accesses. */ + if (addr & 3) + return -ENXIO; + + region = vgic_get_mmio_region(vcpu, &iodev, addr, sizeof(u32)); + if (!region) + return -ENXIO; + + return 0; +} +/* + * Compare a given affinity (level 1-3 and a level 0 mask, from the SGI + * generation register ICC_SGI1R_EL1) with a given VCPU. + * If the VCPU's MPIDR matches, return the level0 affinity, otherwise + * return -1. + */ +static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu) +{ + unsigned long affinity; + int level0; + + /* + * Split the current VCPU's MPIDR into affinity level 0 and the + * rest as this is what we have to compare against. + */ + affinity = kvm_vcpu_get_mpidr_aff(vcpu); + level0 = MPIDR_AFFINITY_LEVEL(affinity, 0); + affinity &= ~MPIDR_LEVEL_MASK; + + /* bail out if the upper three levels don't match */ + if (sgi_aff != affinity) + return -1; + + /* Is this VCPU's bit set in the mask ? */ + if (!(sgi_cpu_mask & BIT(level0))) + return -1; + + return level0; +} + +/* + * The ICC_SGI* registers encode the affinity differently from the MPIDR, + * so provide a wrapper to use the existing defines to isolate a certain + * affinity level. + */ +#define SGI_AFFINITY_LEVEL(reg, level) \ + ((((reg) & ICC_SGI1R_AFFINITY_## level ##_MASK) \ + >> ICC_SGI1R_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level)) + +/** + * vgic_v3_dispatch_sgi - handle SGI requests from VCPUs + * @vcpu: The VCPU requesting a SGI + * @reg: The value written into ICC_{ASGI1,SGI0,SGI1}R by that VCPU + * @allow_group1: Does the sysreg access allow generation of G1 SGIs + * + * With GICv3 (and ARE=1) CPUs trigger SGIs by writing to a system register. + * This will trap in sys_regs.c and call this function. + * This ICC_SGI1R_EL1 register contains the upper three affinity levels of the + * target processors as well as a bitmask of 16 Aff0 CPUs. + * If the interrupt routing mode bit is not set, we iterate over all VCPUs to + * check for matching ones. If this bit is set, we signal all, but not the + * calling VCPU. + */ +void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *c_vcpu; + u16 target_cpus; + u64 mpidr; + int sgi; + int vcpu_id = vcpu->vcpu_id; + bool broadcast; + unsigned long c, flags; + + sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT; + broadcast = reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT); + target_cpus = (reg & ICC_SGI1R_TARGET_LIST_MASK) >> ICC_SGI1R_TARGET_LIST_SHIFT; + mpidr = SGI_AFFINITY_LEVEL(reg, 3); + mpidr |= SGI_AFFINITY_LEVEL(reg, 2); + mpidr |= SGI_AFFINITY_LEVEL(reg, 1); + + /* + * We iterate over all VCPUs to find the MPIDRs matching the request. + * If we have handled one CPU, we clear its bit to detect early + * if we are already finished. This avoids iterating through all + * VCPUs when most of the times we just signal a single VCPU. + */ + kvm_for_each_vcpu(c, c_vcpu, kvm) { + struct vgic_irq *irq; + + /* Exit early if we have dealt with all requested CPUs */ + if (!broadcast && target_cpus == 0) + break; + + /* Don't signal the calling VCPU */ + if (broadcast && c == vcpu_id) + continue; + + if (!broadcast) { + int level0; + + level0 = match_mpidr(mpidr, target_cpus, c_vcpu); + if (level0 == -1) + continue; + + /* remove this matching VCPU from the mask */ + target_cpus &= ~BIT(level0); + } + + irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + /* + * An access targeting Group0 SGIs can only generate + * those, while an access targeting Group1 SGIs can + * generate interrupts of either group. + */ + if (!irq->group || allow_group1) { + if (!irq->hw) { + irq->pending_latch = true; + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + } else { + /* HW SGI? Ask the GIC to inject it */ + int err; + err = irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + true); + WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + } + } else { + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + } + + vgic_put_irq(vcpu->kvm, irq); + } +} + +int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, + int offset, u32 *val) +{ + struct vgic_io_device dev = { + .regions = vgic_v3_dist_registers, + .nr_regions = ARRAY_SIZE(vgic_v3_dist_registers), + }; + + return vgic_uaccess(vcpu, &dev, is_write, offset, val); +} + +int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write, + int offset, u32 *val) +{ + struct vgic_io_device rd_dev = { + .regions = vgic_v3_rd_registers, + .nr_regions = ARRAY_SIZE(vgic_v3_rd_registers), + }; + + return vgic_uaccess(vcpu, &rd_dev, is_write, offset, val); +} + +int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write, + u32 intid, u32 *val) +{ + if (intid % 32) + return -EINVAL; + + if (is_write) + vgic_write_irq_line_level_info(vcpu, intid, *val); + else + *val = vgic_read_irq_line_level_info(vcpu, intid); + + return 0; +} diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c new file mode 100644 index 0000000000..ff558c05e9 --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-mmio.c @@ -0,0 +1,1118 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VGIC MMIO handling functions + */ + +#include <linux/bitops.h> +#include <linux/bsearch.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <kvm/iodev.h> +#include <kvm/arm_arch_timer.h> +#include <kvm/arm_vgic.h> + +#include "vgic.h" +#include "vgic-mmio.h" + +unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return 0; +} + +unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return -1UL; +} + +void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val) +{ + /* Ignore */ +} + +int vgic_mmio_uaccess_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val) +{ + /* Ignore */ + return 0; +} + +unsigned long vgic_mmio_read_group(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + u32 value = 0; + int i; + + /* Loop over all IRQs affected by this read */ + for (i = 0; i < len * 8; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + if (irq->group) + value |= BIT(i); + + vgic_put_irq(vcpu->kvm, irq); + } + + return value; +} + +static void vgic_update_vsgi(struct vgic_irq *irq) +{ + WARN_ON(its_prop_update_vsgi(irq->host_irq, irq->priority, irq->group)); +} + +void vgic_mmio_write_group(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for (i = 0; i < len * 8; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->group = !!(val & BIT(i)); + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + vgic_update_vsgi(irq); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + } else { + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + } + + vgic_put_irq(vcpu->kvm, irq); + } +} + +/* + * Read accesses to both GICD_ICENABLER and GICD_ISENABLER return the value + * of the enabled bit, so there is only one function for both here. + */ +unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + u32 value = 0; + int i; + + /* Loop over all IRQs affected by this read */ + for (i = 0; i < len * 8; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + if (irq->enabled) + value |= (1U << i); + + vgic_put_irq(vcpu->kvm, irq); + } + + return value; +} + +void vgic_mmio_write_senable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + if (!irq->enabled) { + struct irq_data *data; + + irq->enabled = true; + data = &irq_to_desc(irq->host_irq)->irq_data; + while (irqd_irq_disabled(data)) + enable_irq(irq->host_irq); + } + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + + continue; + } else if (vgic_irq_is_mapped_level(irq)) { + bool was_high = irq->line_level; + + /* + * We need to update the state of the interrupt because + * the guest might have changed the state of the device + * while the interrupt was disabled at the VGIC level. + */ + irq->line_level = vgic_get_phys_line_level(irq); + /* + * Deactivate the physical interrupt so the GIC will let + * us know when it is asserted again. + */ + if (!irq->active && was_high && !irq->line_level) + vgic_irq_set_phys_active(irq, false); + } + irq->enabled = true; + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + + vgic_put_irq(vcpu->kvm, irq); + } +} + +void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + if (irq->hw && vgic_irq_is_sgi(irq->intid) && irq->enabled) + disable_irq_nosync(irq->host_irq); + + irq->enabled = false; + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + } +} + +int vgic_uaccess_write_senable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->enabled = true; + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + + vgic_put_irq(vcpu->kvm, irq); + } + + return 0; +} + +int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->enabled = false; + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + vgic_put_irq(vcpu->kvm, irq); + } + + return 0; +} + +static unsigned long __read_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + bool is_user) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + u32 value = 0; + int i; + + /* Loop over all IRQs affected by this read */ + for (i = 0; i < len * 8; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + unsigned long flags; + bool val; + + /* + * When used from userspace with a GICv3 model: + * + * Pending state of interrupt is latched in pending_latch + * variable. Userspace will save and restore pending state + * and line_level separately. + * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst + * for handling of ISPENDR and ICPENDR. + */ + raw_spin_lock_irqsave(&irq->irq_lock, flags); + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + int err; + + val = false; + err = irq_get_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + &val); + WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); + } else if (!is_user && vgic_irq_is_mapped_level(irq)) { + val = vgic_get_phys_line_level(irq); + } else { + switch (vcpu->kvm->arch.vgic.vgic_model) { + case KVM_DEV_TYPE_ARM_VGIC_V3: + if (is_user) { + val = irq->pending_latch; + break; + } + fallthrough; + default: + val = irq_is_pending(irq); + break; + } + } + + value |= ((u32)val << i); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + vgic_put_irq(vcpu->kvm, irq); + } + + return value; +} + +unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return __read_pending(vcpu, addr, len, false); +} + +unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return __read_pending(vcpu, addr, len, true); +} + +static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq) +{ + return (vgic_irq_is_sgi(irq->intid) && + vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2); +} + +void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + /* GICD_ISPENDR0 SGI bits are WI */ + if (is_vgic_v2_sgi(vcpu, irq)) { + vgic_put_irq(vcpu->kvm, irq); + continue; + } + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + /* HW SGI? Ask the GIC to inject it */ + int err; + err = irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + true); + WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + + continue; + } + + irq->pending_latch = true; + if (irq->hw) + vgic_irq_set_phys_active(irq, true); + + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + vgic_put_irq(vcpu->kvm, irq); + } +} + +int vgic_uaccess_write_spending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->pending_latch = true; + + /* + * GICv2 SGIs are terribly broken. We can't restore + * the source of the interrupt, so just pick the vcpu + * itself as the source... + */ + if (is_vgic_v2_sgi(vcpu, irq)) + irq->source |= BIT(vcpu->vcpu_id); + + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + + vgic_put_irq(vcpu->kvm, irq); + } + + return 0; +} + +/* Must be called with irq->irq_lock held */ +static void vgic_hw_irq_cpending(struct kvm_vcpu *vcpu, struct vgic_irq *irq) +{ + irq->pending_latch = false; + + /* + * We don't want the guest to effectively mask the physical + * interrupt by doing a write to SPENDR followed by a write to + * CPENDR for HW interrupts, so we clear the active state on + * the physical side if the virtual interrupt is not active. + * This may lead to taking an additional interrupt on the + * host, but that should not be a problem as the worst that + * can happen is an additional vgic injection. We also clear + * the pending state to maintain proper semantics for edge HW + * interrupts. + */ + vgic_irq_set_phys_pending(irq, false); + if (!irq->active) + vgic_irq_set_phys_active(irq, false); +} + +void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + /* GICD_ICPENDR0 SGI bits are WI */ + if (is_vgic_v2_sgi(vcpu, irq)) { + vgic_put_irq(vcpu->kvm, irq); + continue; + } + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + /* HW SGI? Ask the GIC to clear its pending bit */ + int err; + err = irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + false); + WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + + continue; + } + + if (irq->hw) + vgic_hw_irq_cpending(vcpu, irq); + else + irq->pending_latch = false; + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + } +} + +int vgic_uaccess_write_cpending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + /* + * More fun with GICv2 SGIs! If we're clearing one of them + * from userspace, which source vcpu to clear? Let's not + * even think of it, and blow the whole set. + */ + if (is_vgic_v2_sgi(vcpu, irq)) + irq->source = 0; + + irq->pending_latch = false; + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + vgic_put_irq(vcpu->kvm, irq); + } + + return 0; +} + +/* + * If we are fiddling with an IRQ's active state, we have to make sure the IRQ + * is not queued on some running VCPU's LRs, because then the change to the + * active state can be overwritten when the VCPU's state is synced coming back + * from the guest. + * + * For shared interrupts as well as GICv3 private interrupts accessed from the + * non-owning CPU, we have to stop all the VCPUs because interrupts can be + * migrated while we don't hold the IRQ locks and we don't want to be chasing + * moving targets. + * + * For GICv2 private interrupts we don't have to do anything because + * userspace accesses to the VGIC state already require all VCPUs to be + * stopped, and only the VCPU itself can modify its private interrupts + * active state, which guarantees that the VCPU is not running. + */ +static void vgic_access_active_prepare(struct kvm_vcpu *vcpu, u32 intid) +{ + if ((vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 && + vcpu != kvm_get_running_vcpu()) || + intid >= VGIC_NR_PRIVATE_IRQS) + kvm_arm_halt_guest(vcpu->kvm); +} + +/* See vgic_access_active_prepare */ +static void vgic_access_active_finish(struct kvm_vcpu *vcpu, u32 intid) +{ + if ((vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 && + vcpu != kvm_get_running_vcpu()) || + intid >= VGIC_NR_PRIVATE_IRQS) + kvm_arm_resume_guest(vcpu->kvm); +} + +static unsigned long __vgic_mmio_read_active(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + u32 value = 0; + int i; + + /* Loop over all IRQs affected by this read */ + for (i = 0; i < len * 8; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + /* + * Even for HW interrupts, don't evaluate the HW state as + * all the guest is interested in is the virtual state. + */ + if (irq->active) + value |= (1U << i); + + vgic_put_irq(vcpu->kvm, irq); + } + + return value; +} + +unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + u32 val; + + mutex_lock(&vcpu->kvm->arch.config_lock); + vgic_access_active_prepare(vcpu, intid); + + val = __vgic_mmio_read_active(vcpu, addr, len); + + vgic_access_active_finish(vcpu, intid); + mutex_unlock(&vcpu->kvm->arch.config_lock); + + return val; +} + +unsigned long vgic_uaccess_read_active(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return __vgic_mmio_read_active(vcpu, addr, len); +} + +/* Must be called with irq->irq_lock held */ +static void vgic_hw_irq_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, + bool active, bool is_uaccess) +{ + if (is_uaccess) + return; + + irq->active = active; + vgic_irq_set_phys_active(irq, active); +} + +static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, + bool active) +{ + unsigned long flags; + struct kvm_vcpu *requester_vcpu = kvm_get_running_vcpu(); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + if (irq->hw && !vgic_irq_is_sgi(irq->intid)) { + vgic_hw_irq_change_active(vcpu, irq, active, !requester_vcpu); + } else if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + /* + * GICv4.1 VSGI feature doesn't track an active state, + * so let's not kid ourselves, there is nothing we can + * do here. + */ + irq->active = false; + } else { + u32 model = vcpu->kvm->arch.vgic.vgic_model; + u8 active_source; + + irq->active = active; + + /* + * The GICv2 architecture indicates that the source CPUID for + * an SGI should be provided during an EOI which implies that + * the active state is stored somewhere, but at the same time + * this state is not architecturally exposed anywhere and we + * have no way of knowing the right source. + * + * This may lead to a VCPU not being able to receive + * additional instances of a particular SGI after migration + * for a GICv2 VM on some GIC implementations. Oh well. + */ + active_source = (requester_vcpu) ? requester_vcpu->vcpu_id : 0; + + if (model == KVM_DEV_TYPE_ARM_VGIC_V2 && + active && vgic_irq_is_sgi(irq->intid)) + irq->active_source = active_source; + } + + if (irq->active) + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + else + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); +} + +static void __vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + vgic_mmio_change_active(vcpu, irq, false); + vgic_put_irq(vcpu->kvm, irq); + } +} + +void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + + mutex_lock(&vcpu->kvm->arch.config_lock); + vgic_access_active_prepare(vcpu, intid); + + __vgic_mmio_write_cactive(vcpu, addr, len, val); + + vgic_access_active_finish(vcpu, intid); + mutex_unlock(&vcpu->kvm->arch.config_lock); +} + +int vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + __vgic_mmio_write_cactive(vcpu, addr, len, val); + return 0; +} + +static void __vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + vgic_mmio_change_active(vcpu, irq, true); + vgic_put_irq(vcpu->kvm, irq); + } +} + +void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + + mutex_lock(&vcpu->kvm->arch.config_lock); + vgic_access_active_prepare(vcpu, intid); + + __vgic_mmio_write_sactive(vcpu, addr, len, val); + + vgic_access_active_finish(vcpu, intid); + mutex_unlock(&vcpu->kvm->arch.config_lock); +} + +int vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + __vgic_mmio_write_sactive(vcpu, addr, len, val); + return 0; +} + +unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 8); + int i; + u64 val = 0; + + for (i = 0; i < len; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + val |= (u64)irq->priority << (i * 8); + + vgic_put_irq(vcpu->kvm, irq); + } + + return val; +} + +/* + * We currently don't handle changing the priority of an interrupt that + * is already pending on a VCPU. If there is a need for this, we would + * need to make this VCPU exit and re-evaluate the priorities, potentially + * leading to this interrupt getting presented now to the guest (if it has + * been masked by the priority mask before). + */ +void vgic_mmio_write_priority(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 8); + int i; + unsigned long flags; + + for (i = 0; i < len; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + /* Narrow the priority range to what we actually support */ + irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS); + if (irq->hw && vgic_irq_is_sgi(irq->intid)) + vgic_update_vsgi(irq); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + vgic_put_irq(vcpu->kvm, irq); + } +} + +unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 2); + u32 value = 0; + int i; + + for (i = 0; i < len * 4; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + if (irq->config == VGIC_CONFIG_EDGE) + value |= (2U << (i * 2)); + + vgic_put_irq(vcpu->kvm, irq); + } + + return value; +} + +void vgic_mmio_write_config(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 2); + int i; + unsigned long flags; + + for (i = 0; i < len * 4; i++) { + struct vgic_irq *irq; + + /* + * The configuration cannot be changed for SGIs in general, + * for PPIs this is IMPLEMENTATION DEFINED. The arch timer + * code relies on PPIs being level triggered, so we also + * make them read-only here. + */ + if (intid + i < VGIC_NR_PRIVATE_IRQS) + continue; + + irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + if (test_bit(i * 2 + 1, &val)) + irq->config = VGIC_CONFIG_EDGE; + else + irq->config = VGIC_CONFIG_LEVEL; + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + } +} + +u32 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid) +{ + int i; + u32 val = 0; + int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; + + for (i = 0; i < 32; i++) { + struct vgic_irq *irq; + + if ((intid + i) < VGIC_NR_SGIS || (intid + i) >= nr_irqs) + continue; + + irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + if (irq->config == VGIC_CONFIG_LEVEL && irq->line_level) + val |= (1U << i); + + vgic_put_irq(vcpu->kvm, irq); + } + + return val; +} + +void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid, + const u32 val) +{ + int i; + int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; + unsigned long flags; + + for (i = 0; i < 32; i++) { + struct vgic_irq *irq; + bool new_level; + + if ((intid + i) < VGIC_NR_SGIS || (intid + i) >= nr_irqs) + continue; + + irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + /* + * Line level is set irrespective of irq type + * (level or edge) to avoid dependency that VM should + * restore irq config before line level. + */ + new_level = !!(val & (1U << i)); + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->line_level = new_level; + if (new_level) + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + else + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + vgic_put_irq(vcpu->kvm, irq); + } +} + +static int match_region(const void *key, const void *elt) +{ + const unsigned int offset = (unsigned long)key; + const struct vgic_register_region *region = elt; + + if (offset < region->reg_offset) + return -1; + + if (offset >= region->reg_offset + region->len) + return 1; + + return 0; +} + +const struct vgic_register_region * +vgic_find_mmio_region(const struct vgic_register_region *regions, + int nr_regions, unsigned int offset) +{ + return bsearch((void *)(uintptr_t)offset, regions, nr_regions, + sizeof(regions[0]), match_region); +} + +void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) +{ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_set_vmcr(vcpu, vmcr); + else + vgic_v3_set_vmcr(vcpu, vmcr); +} + +void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) +{ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_get_vmcr(vcpu, vmcr); + else + vgic_v3_get_vmcr(vcpu, vmcr); +} + +/* + * kvm_mmio_read_buf() returns a value in a format where it can be converted + * to a byte array and be directly observed as the guest wanted it to appear + * in memory if it had done the store itself, which is LE for the GIC, as the + * guest knows the GIC is always LE. + * + * We convert this value to the CPUs native format to deal with it as a data + * value. + */ +unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len) +{ + unsigned long data = kvm_mmio_read_buf(val, len); + + switch (len) { + case 1: + return data; + case 2: + return le16_to_cpu(data); + case 4: + return le32_to_cpu(data); + default: + return le64_to_cpu(data); + } +} + +/* + * kvm_mmio_write_buf() expects a value in a format such that if converted to + * a byte array it is observed as the guest would see it if it could perform + * the load directly. Since the GIC is LE, and the guest knows this, the + * guest expects a value in little endian format. + * + * We convert the data value from the CPUs native format to LE so that the + * value is returned in the proper format. + */ +void vgic_data_host_to_mmio_bus(void *buf, unsigned int len, + unsigned long data) +{ + switch (len) { + case 1: + break; + case 2: + data = cpu_to_le16(data); + break; + case 4: + data = cpu_to_le32(data); + break; + default: + data = cpu_to_le64(data); + } + + kvm_mmio_write_buf(buf, len, data); +} + +static +struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev) +{ + return container_of(dev, struct vgic_io_device, dev); +} + +static bool check_region(const struct kvm *kvm, + const struct vgic_register_region *region, + gpa_t addr, int len) +{ + int flags, nr_irqs = kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; + + switch (len) { + case sizeof(u8): + flags = VGIC_ACCESS_8bit; + break; + case sizeof(u32): + flags = VGIC_ACCESS_32bit; + break; + case sizeof(u64): + flags = VGIC_ACCESS_64bit; + break; + default: + return false; + } + + if ((region->access_flags & flags) && IS_ALIGNED(addr, len)) { + if (!region->bits_per_irq) + return true; + + /* Do we access a non-allocated IRQ? */ + return VGIC_ADDR_TO_INTID(addr, region->bits_per_irq) < nr_irqs; + } + + return false; +} + +const struct vgic_register_region * +vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev, + gpa_t addr, int len) +{ + const struct vgic_register_region *region; + + region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, + addr - iodev->base_addr); + if (!region || !check_region(vcpu->kvm, region, addr, len)) + return NULL; + + return region; +} + +static int vgic_uaccess_read(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev, + gpa_t addr, u32 *val) +{ + const struct vgic_register_region *region; + struct kvm_vcpu *r_vcpu; + + region = vgic_get_mmio_region(vcpu, iodev, addr, sizeof(u32)); + if (!region) { + *val = 0; + return 0; + } + + r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; + if (region->uaccess_read) + *val = region->uaccess_read(r_vcpu, addr, sizeof(u32)); + else + *val = region->read(r_vcpu, addr, sizeof(u32)); + + return 0; +} + +static int vgic_uaccess_write(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev, + gpa_t addr, const u32 *val) +{ + const struct vgic_register_region *region; + struct kvm_vcpu *r_vcpu; + + region = vgic_get_mmio_region(vcpu, iodev, addr, sizeof(u32)); + if (!region) + return 0; + + r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; + if (region->uaccess_write) + return region->uaccess_write(r_vcpu, addr, sizeof(u32), *val); + + region->write(r_vcpu, addr, sizeof(u32), *val); + return 0; +} + +/* + * Userland access to VGIC registers. + */ +int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev, + bool is_write, int offset, u32 *val) +{ + if (is_write) + return vgic_uaccess_write(vcpu, dev, offset, val); + else + return vgic_uaccess_read(vcpu, dev, offset, val); +} + +static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, + gpa_t addr, int len, void *val) +{ + struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); + const struct vgic_register_region *region; + unsigned long data = 0; + + region = vgic_get_mmio_region(vcpu, iodev, addr, len); + if (!region) { + memset(val, 0, len); + return 0; + } + + switch (iodev->iodev_type) { + case IODEV_CPUIF: + data = region->read(vcpu, addr, len); + break; + case IODEV_DIST: + data = region->read(vcpu, addr, len); + break; + case IODEV_REDIST: + data = region->read(iodev->redist_vcpu, addr, len); + break; + case IODEV_ITS: + data = region->its_read(vcpu->kvm, iodev->its, addr, len); + break; + } + + vgic_data_host_to_mmio_bus(val, len, data); + return 0; +} + +static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, + gpa_t addr, int len, const void *val) +{ + struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); + const struct vgic_register_region *region; + unsigned long data = vgic_data_mmio_bus_to_host(val, len); + + region = vgic_get_mmio_region(vcpu, iodev, addr, len); + if (!region) + return 0; + + switch (iodev->iodev_type) { + case IODEV_CPUIF: + region->write(vcpu, addr, len, data); + break; + case IODEV_DIST: + region->write(vcpu, addr, len, data); + break; + case IODEV_REDIST: + region->write(iodev->redist_vcpu, addr, len, data); + break; + case IODEV_ITS: + region->its_write(vcpu->kvm, iodev->its, addr, len, data); + break; + } + + return 0; +} + +const struct kvm_io_device_ops kvm_io_gic_ops = { + .read = dispatch_mmio_read, + .write = dispatch_mmio_write, +}; + +int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, + enum vgic_type type) +{ + struct vgic_io_device *io_device = &kvm->arch.vgic.dist_iodev; + unsigned int len; + + switch (type) { + case VGIC_V2: + len = vgic_v2_init_dist_iodev(io_device); + break; + case VGIC_V3: + len = vgic_v3_init_dist_iodev(io_device); + break; + default: + BUG_ON(1); + } + + io_device->base_addr = dist_base_address; + io_device->iodev_type = IODEV_DIST; + io_device->redist_vcpu = NULL; + + return kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist_base_address, + len, &io_device->dev); +} diff --git a/arch/arm64/kvm/vgic/vgic-mmio.h b/arch/arm64/kvm/vgic/vgic-mmio.h new file mode 100644 index 0000000000..5b490a4dfa --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-mmio.h @@ -0,0 +1,230 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2015, 2016 ARM Ltd. + */ +#ifndef __KVM_ARM_VGIC_MMIO_H__ +#define __KVM_ARM_VGIC_MMIO_H__ + +struct vgic_register_region { + unsigned int reg_offset; + unsigned int len; + unsigned int bits_per_irq; + unsigned int access_flags; + union { + unsigned long (*read)(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len); + unsigned long (*its_read)(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len); + }; + union { + void (*write)(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val); + void (*its_write)(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val); + }; + unsigned long (*uaccess_read)(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len); + union { + int (*uaccess_write)(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val); + int (*uaccess_its_write)(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val); + }; +}; + +extern const struct kvm_io_device_ops kvm_io_gic_ops; + +#define VGIC_ACCESS_8bit 1 +#define VGIC_ACCESS_32bit 2 +#define VGIC_ACCESS_64bit 4 + +/* + * Generate a mask that covers the number of bytes required to address + * up to 1024 interrupts, each represented by <bits> bits. This assumes + * that <bits> is a power of two. + */ +#define VGIC_ADDR_IRQ_MASK(bits) (((bits) * 1024 / 8) - 1) + +/* + * (addr & mask) gives us the _byte_ offset for the INT ID. + * We multiply this by 8 the get the _bit_ offset, then divide this by + * the number of bits to learn the actual INT ID. + * But instead of a division (which requires a "long long div" implementation), + * we shift by the binary logarithm of <bits>. + * This assumes that <bits> is a power of two. + */ +#define VGIC_ADDR_TO_INTID(addr, bits) (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \ + 8 >> ilog2(bits)) + +/* + * Some VGIC registers store per-IRQ information, with a different number + * of bits per IRQ. For those registers this macro is used. + * The _WITH_LENGTH version instantiates registers with a fixed length + * and is mutually exclusive with the _PER_IRQ version. + */ +#define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, ur, uw, bpi, acc) \ + { \ + .reg_offset = off, \ + .bits_per_irq = bpi, \ + .len = bpi * 1024 / 8, \ + .access_flags = acc, \ + .read = rd, \ + .write = wr, \ + .uaccess_read = ur, \ + .uaccess_write = uw, \ + } + +#define REGISTER_DESC_WITH_LENGTH(off, rd, wr, length, acc) \ + { \ + .reg_offset = off, \ + .bits_per_irq = 0, \ + .len = length, \ + .access_flags = acc, \ + .read = rd, \ + .write = wr, \ + } + +#define REGISTER_DESC_WITH_LENGTH_UACCESS(off, rd, wr, urd, uwr, length, acc) \ + { \ + .reg_offset = off, \ + .bits_per_irq = 0, \ + .len = length, \ + .access_flags = acc, \ + .read = rd, \ + .write = wr, \ + .uaccess_read = urd, \ + .uaccess_write = uwr, \ + } + +unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len); + +void vgic_data_host_to_mmio_bus(void *buf, unsigned int len, + unsigned long data); + +unsigned long extract_bytes(u64 data, unsigned int offset, + unsigned int num); + +u64 update_64bit_reg(u64 reg, unsigned int offset, unsigned int len, + unsigned long val); + +unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + +unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + +void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val); + +int vgic_mmio_uaccess_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val); + +unsigned long vgic_mmio_read_group(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len); + +void vgic_mmio_write_group(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val); + +unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + +void vgic_mmio_write_senable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +int vgic_uaccess_write_senable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + +unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + +void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +int vgic_uaccess_write_spending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +int vgic_uaccess_write_cpending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + +unsigned long vgic_uaccess_read_active(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + +void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +int vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +int vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + +void vgic_mmio_write_priority(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + +void vgic_mmio_write_config(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev, + bool is_write, int offset, u32 *val); + +u32 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid); + +void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid, + const u32 val); + +unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev); + +unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev); + +u64 vgic_sanitise_outer_cacheability(u64 reg); +u64 vgic_sanitise_inner_cacheability(u64 reg); +u64 vgic_sanitise_shareability(u64 reg); +u64 vgic_sanitise_field(u64 reg, u64 field_mask, int field_shift, + u64 (*sanitise_fn)(u64)); + +/* Find the proper register handler entry given a certain address offset */ +const struct vgic_register_region * +vgic_find_mmio_region(const struct vgic_register_region *regions, + int nr_regions, unsigned int offset); + +#endif diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c new file mode 100644 index 0000000000..7e9cdb78f7 --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-v2.c @@ -0,0 +1,480 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015, 2016 ARM Ltd. + */ + +#include <linux/irqchip/arm-gic.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <kvm/arm_vgic.h> +#include <asm/kvm_mmu.h> + +#include "vgic.h" + +static inline void vgic_v2_write_lr(int lr, u32 val) +{ + void __iomem *base = kvm_vgic_global_state.vctrl_base; + + writel_relaxed(val, base + GICH_LR0 + (lr * 4)); +} + +void vgic_v2_init_lrs(void) +{ + int i; + + for (i = 0; i < kvm_vgic_global_state.nr_lr; i++) + vgic_v2_write_lr(i, 0); +} + +void vgic_v2_set_underflow(struct kvm_vcpu *vcpu) +{ + struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; + + cpuif->vgic_hcr |= GICH_HCR_UIE; +} + +static bool lr_signals_eoi_mi(u32 lr_val) +{ + return !(lr_val & GICH_LR_STATE) && (lr_val & GICH_LR_EOI) && + !(lr_val & GICH_LR_HW); +} + +/* + * transfer the content of the LRs back into the corresponding ap_list: + * - active bit is transferred as is + * - pending bit is + * - transferred as is in case of edge sensitive IRQs + * - set to the line-level (resample time) for level sensitive IRQs + */ +void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2; + int lr; + + DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); + + cpuif->vgic_hcr &= ~GICH_HCR_UIE; + + for (lr = 0; lr < vgic_cpu->vgic_v2.used_lrs; lr++) { + u32 val = cpuif->vgic_lr[lr]; + u32 cpuid, intid = val & GICH_LR_VIRTUALID; + struct vgic_irq *irq; + bool deactivated; + + /* Extract the source vCPU id from the LR */ + cpuid = val & GICH_LR_PHYSID_CPUID; + cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT; + cpuid &= 7; + + /* Notify fds when the guest EOI'ed a level-triggered SPI */ + if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid)) + kvm_notify_acked_irq(vcpu->kvm, 0, + intid - VGIC_NR_PRIVATE_IRQS); + + irq = vgic_get_irq(vcpu->kvm, vcpu, intid); + + raw_spin_lock(&irq->irq_lock); + + /* Always preserve the active bit, note deactivation */ + deactivated = irq->active && !(val & GICH_LR_ACTIVE_BIT); + irq->active = !!(val & GICH_LR_ACTIVE_BIT); + + if (irq->active && vgic_irq_is_sgi(intid)) + irq->active_source = cpuid; + + /* Edge is the only case where we preserve the pending bit */ + if (irq->config == VGIC_CONFIG_EDGE && + (val & GICH_LR_PENDING_BIT)) { + irq->pending_latch = true; + + if (vgic_irq_is_sgi(intid)) + irq->source |= (1 << cpuid); + } + + /* + * Clear soft pending state when level irqs have been acked. + */ + if (irq->config == VGIC_CONFIG_LEVEL && !(val & GICH_LR_STATE)) + irq->pending_latch = false; + + /* Handle resampling for mapped interrupts if required */ + vgic_irq_handle_resampling(irq, deactivated, val & GICH_LR_PENDING_BIT); + + raw_spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); + } + + cpuif->used_lrs = 0; +} + +/* + * Populates the particular LR with the state of a given IRQ: + * - for an edge sensitive IRQ the pending state is cleared in struct vgic_irq + * - for a level sensitive IRQ the pending state value is unchanged; + * it is dictated directly by the input level + * + * If @irq describes an SGI with multiple sources, we choose the + * lowest-numbered source VCPU and clear that bit in the source bitmap. + * + * The irq_lock must be held by the caller. + */ +void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) +{ + u32 val = irq->intid; + bool allow_pending = true; + + if (irq->active) { + val |= GICH_LR_ACTIVE_BIT; + if (vgic_irq_is_sgi(irq->intid)) + val |= irq->active_source << GICH_LR_PHYSID_CPUID_SHIFT; + if (vgic_irq_is_multi_sgi(irq)) { + allow_pending = false; + val |= GICH_LR_EOI; + } + } + + if (irq->group) + val |= GICH_LR_GROUP1; + + if (irq->hw && !vgic_irq_needs_resampling(irq)) { + val |= GICH_LR_HW; + val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT; + /* + * Never set pending+active on a HW interrupt, as the + * pending state is kept at the physical distributor + * level. + */ + if (irq->active) + allow_pending = false; + } else { + if (irq->config == VGIC_CONFIG_LEVEL) { + val |= GICH_LR_EOI; + + /* + * Software resampling doesn't work very well + * if we allow P+A, so let's not do that. + */ + if (irq->active) + allow_pending = false; + } + } + + if (allow_pending && irq_is_pending(irq)) { + val |= GICH_LR_PENDING_BIT; + + if (irq->config == VGIC_CONFIG_EDGE) + irq->pending_latch = false; + + if (vgic_irq_is_sgi(irq->intid)) { + u32 src = ffs(irq->source); + + if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n", + irq->intid)) + return; + + val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; + irq->source &= ~(1 << (src - 1)); + if (irq->source) { + irq->pending_latch = true; + val |= GICH_LR_EOI; + } + } + } + + /* + * Level-triggered mapped IRQs are special because we only observe + * rising edges as input to the VGIC. We therefore lower the line + * level here, so that we can take new virtual IRQs. See + * vgic_v2_fold_lr_state for more info. + */ + if (vgic_irq_is_mapped_level(irq) && (val & GICH_LR_PENDING_BIT)) + irq->line_level = false; + + /* The GICv2 LR only holds five bits of priority. */ + val |= (irq->priority >> 3) << GICH_LR_PRIORITY_SHIFT; + + vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = val; +} + +void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr) +{ + vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = 0; +} + +void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + u32 vmcr; + + vmcr = (vmcrp->grpen0 << GICH_VMCR_ENABLE_GRP0_SHIFT) & + GICH_VMCR_ENABLE_GRP0_MASK; + vmcr |= (vmcrp->grpen1 << GICH_VMCR_ENABLE_GRP1_SHIFT) & + GICH_VMCR_ENABLE_GRP1_MASK; + vmcr |= (vmcrp->ackctl << GICH_VMCR_ACK_CTL_SHIFT) & + GICH_VMCR_ACK_CTL_MASK; + vmcr |= (vmcrp->fiqen << GICH_VMCR_FIQ_EN_SHIFT) & + GICH_VMCR_FIQ_EN_MASK; + vmcr |= (vmcrp->cbpr << GICH_VMCR_CBPR_SHIFT) & + GICH_VMCR_CBPR_MASK; + vmcr |= (vmcrp->eoim << GICH_VMCR_EOI_MODE_SHIFT) & + GICH_VMCR_EOI_MODE_MASK; + vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & + GICH_VMCR_ALIAS_BINPOINT_MASK; + vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & + GICH_VMCR_BINPOINT_MASK; + vmcr |= ((vmcrp->pmr >> GICV_PMR_PRIORITY_SHIFT) << + GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK; + + cpu_if->vgic_vmcr = vmcr; +} + +void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + u32 vmcr; + + vmcr = cpu_if->vgic_vmcr; + + vmcrp->grpen0 = (vmcr & GICH_VMCR_ENABLE_GRP0_MASK) >> + GICH_VMCR_ENABLE_GRP0_SHIFT; + vmcrp->grpen1 = (vmcr & GICH_VMCR_ENABLE_GRP1_MASK) >> + GICH_VMCR_ENABLE_GRP1_SHIFT; + vmcrp->ackctl = (vmcr & GICH_VMCR_ACK_CTL_MASK) >> + GICH_VMCR_ACK_CTL_SHIFT; + vmcrp->fiqen = (vmcr & GICH_VMCR_FIQ_EN_MASK) >> + GICH_VMCR_FIQ_EN_SHIFT; + vmcrp->cbpr = (vmcr & GICH_VMCR_CBPR_MASK) >> + GICH_VMCR_CBPR_SHIFT; + vmcrp->eoim = (vmcr & GICH_VMCR_EOI_MODE_MASK) >> + GICH_VMCR_EOI_MODE_SHIFT; + + vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> + GICH_VMCR_ALIAS_BINPOINT_SHIFT; + vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> + GICH_VMCR_BINPOINT_SHIFT; + vmcrp->pmr = ((vmcr & GICH_VMCR_PRIMASK_MASK) >> + GICH_VMCR_PRIMASK_SHIFT) << GICV_PMR_PRIORITY_SHIFT; +} + +void vgic_v2_enable(struct kvm_vcpu *vcpu) +{ + /* + * By forcing VMCR to zero, the GIC will restore the binary + * points to their reset values. Anything else resets to zero + * anyway. + */ + vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; + + /* Get the show on the road... */ + vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; +} + +/* check for overlapping regions and for regions crossing the end of memory */ +static bool vgic_v2_check_base(gpa_t dist_base, gpa_t cpu_base) +{ + if (dist_base + KVM_VGIC_V2_DIST_SIZE < dist_base) + return false; + if (cpu_base + KVM_VGIC_V2_CPU_SIZE < cpu_base) + return false; + + if (dist_base + KVM_VGIC_V2_DIST_SIZE <= cpu_base) + return true; + if (cpu_base + KVM_VGIC_V2_CPU_SIZE <= dist_base) + return true; + + return false; +} + +int vgic_v2_map_resources(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + int ret = 0; + + if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || + IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) { + kvm_debug("Need to set vgic cpu and dist addresses first\n"); + return -ENXIO; + } + + if (!vgic_v2_check_base(dist->vgic_dist_base, dist->vgic_cpu_base)) { + kvm_debug("VGIC CPU and dist frames overlap\n"); + return -EINVAL; + } + + /* + * Initialize the vgic if this hasn't already been done on demand by + * accessing the vgic state from userspace. + */ + ret = vgic_init(kvm); + if (ret) { + kvm_err("Unable to initialize VGIC dynamic data structures\n"); + return ret; + } + + if (!static_branch_unlikely(&vgic_v2_cpuif_trap)) { + ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base, + kvm_vgic_global_state.vcpu_base, + KVM_VGIC_V2_CPU_SIZE, true); + if (ret) { + kvm_err("Unable to remap VGIC CPU to VCPU\n"); + return ret; + } + } + + return 0; +} + +DEFINE_STATIC_KEY_FALSE(vgic_v2_cpuif_trap); + +/** + * vgic_v2_probe - probe for a VGICv2 compatible interrupt controller + * @info: pointer to the GIC description + * + * Returns 0 if the VGICv2 has been probed successfully, returns an error code + * otherwise + */ +int vgic_v2_probe(const struct gic_kvm_info *info) +{ + int ret; + u32 vtr; + + if (is_protected_kvm_enabled()) { + kvm_err("GICv2 not supported in protected mode\n"); + return -ENXIO; + } + + if (!info->vctrl.start) { + kvm_err("GICH not present in the firmware table\n"); + return -ENXIO; + } + + if (!PAGE_ALIGNED(info->vcpu.start) || + !PAGE_ALIGNED(resource_size(&info->vcpu))) { + kvm_info("GICV region size/alignment is unsafe, using trapping (reduced performance)\n"); + + ret = create_hyp_io_mappings(info->vcpu.start, + resource_size(&info->vcpu), + &kvm_vgic_global_state.vcpu_base_va, + &kvm_vgic_global_state.vcpu_hyp_va); + if (ret) { + kvm_err("Cannot map GICV into hyp\n"); + goto out; + } + + static_branch_enable(&vgic_v2_cpuif_trap); + } + + ret = create_hyp_io_mappings(info->vctrl.start, + resource_size(&info->vctrl), + &kvm_vgic_global_state.vctrl_base, + &kvm_vgic_global_state.vctrl_hyp); + if (ret) { + kvm_err("Cannot map VCTRL into hyp\n"); + goto out; + } + + vtr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VTR); + kvm_vgic_global_state.nr_lr = (vtr & 0x3f) + 1; + + ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); + if (ret) { + kvm_err("Cannot register GICv2 KVM device\n"); + goto out; + } + + kvm_vgic_global_state.can_emulate_gicv2 = true; + kvm_vgic_global_state.vcpu_base = info->vcpu.start; + kvm_vgic_global_state.type = VGIC_V2; + kvm_vgic_global_state.max_gic_vcpus = VGIC_V2_MAX_CPUS; + + kvm_debug("vgic-v2@%llx\n", info->vctrl.start); + + return 0; +out: + if (kvm_vgic_global_state.vctrl_base) + iounmap(kvm_vgic_global_state.vctrl_base); + if (kvm_vgic_global_state.vcpu_base_va) + iounmap(kvm_vgic_global_state.vcpu_base_va); + + return ret; +} + +static void save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + u64 used_lrs = cpu_if->used_lrs; + u64 elrsr; + int i; + + elrsr = readl_relaxed(base + GICH_ELRSR0); + if (unlikely(used_lrs > 32)) + elrsr |= ((u64)readl_relaxed(base + GICH_ELRSR1)) << 32; + + for (i = 0; i < used_lrs; i++) { + if (elrsr & (1UL << i)) + cpu_if->vgic_lr[i] &= ~GICH_LR_STATE; + else + cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4)); + + writel_relaxed(0, base + GICH_LR0 + (i * 4)); + } +} + +void vgic_v2_save_state(struct kvm_vcpu *vcpu) +{ + void __iomem *base = kvm_vgic_global_state.vctrl_base; + u64 used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs; + + if (!base) + return; + + if (used_lrs) { + save_lrs(vcpu, base); + writel_relaxed(0, base + GICH_HCR); + } +} + +void vgic_v2_restore_state(struct kvm_vcpu *vcpu) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + void __iomem *base = kvm_vgic_global_state.vctrl_base; + u64 used_lrs = cpu_if->used_lrs; + int i; + + if (!base) + return; + + if (used_lrs) { + writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR); + for (i = 0; i < used_lrs; i++) { + writel_relaxed(cpu_if->vgic_lr[i], + base + GICH_LR0 + (i * 4)); + } + } +} + +void vgic_v2_load(struct kvm_vcpu *vcpu) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + + writel_relaxed(cpu_if->vgic_vmcr, + kvm_vgic_global_state.vctrl_base + GICH_VMCR); + writel_relaxed(cpu_if->vgic_apr, + kvm_vgic_global_state.vctrl_base + GICH_APR); +} + +void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + + cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR); +} + +void vgic_v2_put(struct kvm_vcpu *vcpu) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + + vgic_v2_vmcr_sync(vcpu); + cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR); +} diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c new file mode 100644 index 0000000000..3dfc8b84e0 --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -0,0 +1,760 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/irqchip/arm-gic-v3.h> +#include <linux/irq.h> +#include <linux/irqdomain.h> +#include <linux/kstrtox.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <kvm/arm_vgic.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> +#include <asm/kvm_asm.h> + +#include "vgic.h" + +static bool group0_trap; +static bool group1_trap; +static bool common_trap; +static bool dir_trap; +static bool gicv4_enable; + +void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; + + cpuif->vgic_hcr |= ICH_HCR_UIE; +} + +static bool lr_signals_eoi_mi(u64 lr_val) +{ + return !(lr_val & ICH_LR_STATE) && (lr_val & ICH_LR_EOI) && + !(lr_val & ICH_LR_HW); +} + +void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3; + u32 model = vcpu->kvm->arch.vgic.vgic_model; + int lr; + + DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); + + cpuif->vgic_hcr &= ~ICH_HCR_UIE; + + for (lr = 0; lr < cpuif->used_lrs; lr++) { + u64 val = cpuif->vgic_lr[lr]; + u32 intid, cpuid; + struct vgic_irq *irq; + bool is_v2_sgi = false; + bool deactivated; + + cpuid = val & GICH_LR_PHYSID_CPUID; + cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT; + + if (model == KVM_DEV_TYPE_ARM_VGIC_V3) { + intid = val & ICH_LR_VIRTUAL_ID_MASK; + } else { + intid = val & GICH_LR_VIRTUALID; + is_v2_sgi = vgic_irq_is_sgi(intid); + } + + /* Notify fds when the guest EOI'ed a level-triggered IRQ */ + if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid)) + kvm_notify_acked_irq(vcpu->kvm, 0, + intid - VGIC_NR_PRIVATE_IRQS); + + irq = vgic_get_irq(vcpu->kvm, vcpu, intid); + if (!irq) /* An LPI could have been unmapped. */ + continue; + + raw_spin_lock(&irq->irq_lock); + + /* Always preserve the active bit, note deactivation */ + deactivated = irq->active && !(val & ICH_LR_ACTIVE_BIT); + irq->active = !!(val & ICH_LR_ACTIVE_BIT); + + if (irq->active && is_v2_sgi) + irq->active_source = cpuid; + + /* Edge is the only case where we preserve the pending bit */ + if (irq->config == VGIC_CONFIG_EDGE && + (val & ICH_LR_PENDING_BIT)) { + irq->pending_latch = true; + + if (is_v2_sgi) + irq->source |= (1 << cpuid); + } + + /* + * Clear soft pending state when level irqs have been acked. + */ + if (irq->config == VGIC_CONFIG_LEVEL && !(val & ICH_LR_STATE)) + irq->pending_latch = false; + + /* Handle resampling for mapped interrupts if required */ + vgic_irq_handle_resampling(irq, deactivated, val & ICH_LR_PENDING_BIT); + + raw_spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); + } + + cpuif->used_lrs = 0; +} + +/* Requires the irq to be locked already */ +void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) +{ + u32 model = vcpu->kvm->arch.vgic.vgic_model; + u64 val = irq->intid; + bool allow_pending = true, is_v2_sgi; + + is_v2_sgi = (vgic_irq_is_sgi(irq->intid) && + model == KVM_DEV_TYPE_ARM_VGIC_V2); + + if (irq->active) { + val |= ICH_LR_ACTIVE_BIT; + if (is_v2_sgi) + val |= irq->active_source << GICH_LR_PHYSID_CPUID_SHIFT; + if (vgic_irq_is_multi_sgi(irq)) { + allow_pending = false; + val |= ICH_LR_EOI; + } + } + + if (irq->hw && !vgic_irq_needs_resampling(irq)) { + val |= ICH_LR_HW; + val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT; + /* + * Never set pending+active on a HW interrupt, as the + * pending state is kept at the physical distributor + * level. + */ + if (irq->active) + allow_pending = false; + } else { + if (irq->config == VGIC_CONFIG_LEVEL) { + val |= ICH_LR_EOI; + + /* + * Software resampling doesn't work very well + * if we allow P+A, so let's not do that. + */ + if (irq->active) + allow_pending = false; + } + } + + if (allow_pending && irq_is_pending(irq)) { + val |= ICH_LR_PENDING_BIT; + + if (irq->config == VGIC_CONFIG_EDGE) + irq->pending_latch = false; + + if (vgic_irq_is_sgi(irq->intid) && + model == KVM_DEV_TYPE_ARM_VGIC_V2) { + u32 src = ffs(irq->source); + + if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n", + irq->intid)) + return; + + val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; + irq->source &= ~(1 << (src - 1)); + if (irq->source) { + irq->pending_latch = true; + val |= ICH_LR_EOI; + } + } + } + + /* + * Level-triggered mapped IRQs are special because we only observe + * rising edges as input to the VGIC. We therefore lower the line + * level here, so that we can take new virtual IRQs. See + * vgic_v3_fold_lr_state for more info. + */ + if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT)) + irq->line_level = false; + + if (irq->group) + val |= ICH_LR_GROUP; + + val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT; + + vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = val; +} + +void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr) +{ + vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = 0; +} + +void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + u32 model = vcpu->kvm->arch.vgic.vgic_model; + u32 vmcr; + + if (model == KVM_DEV_TYPE_ARM_VGIC_V2) { + vmcr = (vmcrp->ackctl << ICH_VMCR_ACK_CTL_SHIFT) & + ICH_VMCR_ACK_CTL_MASK; + vmcr |= (vmcrp->fiqen << ICH_VMCR_FIQ_EN_SHIFT) & + ICH_VMCR_FIQ_EN_MASK; + } else { + /* + * When emulating GICv3 on GICv3 with SRE=1 on the + * VFIQEn bit is RES1 and the VAckCtl bit is RES0. + */ + vmcr = ICH_VMCR_FIQ_EN_MASK; + } + + vmcr |= (vmcrp->cbpr << ICH_VMCR_CBPR_SHIFT) & ICH_VMCR_CBPR_MASK; + vmcr |= (vmcrp->eoim << ICH_VMCR_EOIM_SHIFT) & ICH_VMCR_EOIM_MASK; + vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK; + vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK; + vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK; + vmcr |= (vmcrp->grpen0 << ICH_VMCR_ENG0_SHIFT) & ICH_VMCR_ENG0_MASK; + vmcr |= (vmcrp->grpen1 << ICH_VMCR_ENG1_SHIFT) & ICH_VMCR_ENG1_MASK; + + cpu_if->vgic_vmcr = vmcr; +} + +void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + u32 model = vcpu->kvm->arch.vgic.vgic_model; + u32 vmcr; + + vmcr = cpu_if->vgic_vmcr; + + if (model == KVM_DEV_TYPE_ARM_VGIC_V2) { + vmcrp->ackctl = (vmcr & ICH_VMCR_ACK_CTL_MASK) >> + ICH_VMCR_ACK_CTL_SHIFT; + vmcrp->fiqen = (vmcr & ICH_VMCR_FIQ_EN_MASK) >> + ICH_VMCR_FIQ_EN_SHIFT; + } else { + /* + * When emulating GICv3 on GICv3 with SRE=1 on the + * VFIQEn bit is RES1 and the VAckCtl bit is RES0. + */ + vmcrp->fiqen = 1; + vmcrp->ackctl = 0; + } + + vmcrp->cbpr = (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT; + vmcrp->eoim = (vmcr & ICH_VMCR_EOIM_MASK) >> ICH_VMCR_EOIM_SHIFT; + vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT; + vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; + vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; + vmcrp->grpen0 = (vmcr & ICH_VMCR_ENG0_MASK) >> ICH_VMCR_ENG0_SHIFT; + vmcrp->grpen1 = (vmcr & ICH_VMCR_ENG1_MASK) >> ICH_VMCR_ENG1_SHIFT; +} + +#define INITIAL_PENDBASER_VALUE \ + (GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWb) | \ + GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, SameAsInner) | \ + GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable)) + +void vgic_v3_enable(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3; + + /* + * By forcing VMCR to zero, the GIC will restore the binary + * points to their reset values. Anything else resets to zero + * anyway. + */ + vgic_v3->vgic_vmcr = 0; + + /* + * If we are emulating a GICv3, we do it in an non-GICv2-compatible + * way, so we force SRE to 1 to demonstrate this to the guest. + * Also, we don't support any form of IRQ/FIQ bypass. + * This goes with the spec allowing the value to be RAO/WI. + */ + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + vgic_v3->vgic_sre = (ICC_SRE_EL1_DIB | + ICC_SRE_EL1_DFB | + ICC_SRE_EL1_SRE); + vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE; + } else { + vgic_v3->vgic_sre = 0; + } + + vcpu->arch.vgic_cpu.num_id_bits = (kvm_vgic_global_state.ich_vtr_el2 & + ICH_VTR_ID_BITS_MASK) >> + ICH_VTR_ID_BITS_SHIFT; + vcpu->arch.vgic_cpu.num_pri_bits = ((kvm_vgic_global_state.ich_vtr_el2 & + ICH_VTR_PRI_BITS_MASK) >> + ICH_VTR_PRI_BITS_SHIFT) + 1; + + /* Get the show on the road... */ + vgic_v3->vgic_hcr = ICH_HCR_EN; + if (group0_trap) + vgic_v3->vgic_hcr |= ICH_HCR_TALL0; + if (group1_trap) + vgic_v3->vgic_hcr |= ICH_HCR_TALL1; + if (common_trap) + vgic_v3->vgic_hcr |= ICH_HCR_TC; + if (dir_trap) + vgic_v3->vgic_hcr |= ICH_HCR_TDIR; +} + +int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq) +{ + struct kvm_vcpu *vcpu; + int byte_offset, bit_nr; + gpa_t pendbase, ptr; + bool status; + u8 val; + int ret; + unsigned long flags; + +retry: + vcpu = irq->target_vcpu; + if (!vcpu) + return 0; + + pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser); + + byte_offset = irq->intid / BITS_PER_BYTE; + bit_nr = irq->intid % BITS_PER_BYTE; + ptr = pendbase + byte_offset; + + ret = kvm_read_guest_lock(kvm, ptr, &val, 1); + if (ret) + return ret; + + status = val & (1 << bit_nr); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + if (irq->target_vcpu != vcpu) { + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + goto retry; + } + irq->pending_latch = status; + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + + if (status) { + /* clear consumed data */ + val &= ~(1 << bit_nr); + ret = vgic_write_guest_lock(kvm, ptr, &val, 1); + if (ret) + return ret; + } + return 0; +} + +/* + * The deactivation of the doorbell interrupt will trigger the + * unmapping of the associated vPE. + */ +static void unmap_all_vpes(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + int i; + + for (i = 0; i < dist->its_vm.nr_vpes; i++) + free_irq(dist->its_vm.vpes[i]->irq, kvm_get_vcpu(kvm, i)); +} + +static void map_all_vpes(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + int i; + + for (i = 0; i < dist->its_vm.nr_vpes; i++) + WARN_ON(vgic_v4_request_vpe_irq(kvm_get_vcpu(kvm, i), + dist->its_vm.vpes[i]->irq)); +} + +/** + * vgic_v3_save_pending_tables - Save the pending tables into guest RAM + * kvm lock and all vcpu lock must be held + */ +int vgic_v3_save_pending_tables(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_irq *irq; + gpa_t last_ptr = ~(gpa_t)0; + bool vlpi_avail = false; + int ret = 0; + u8 val; + + if (unlikely(!vgic_initialized(kvm))) + return -ENXIO; + + /* + * A preparation for getting any VLPI states. + * The above vgic initialized check also ensures that the allocation + * and enabling of the doorbells have already been done. + */ + if (kvm_vgic_global_state.has_gicv4_1) { + unmap_all_vpes(kvm); + vlpi_avail = true; + } + + list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { + int byte_offset, bit_nr; + struct kvm_vcpu *vcpu; + gpa_t pendbase, ptr; + bool is_pending; + bool stored; + + vcpu = irq->target_vcpu; + if (!vcpu) + continue; + + pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser); + + byte_offset = irq->intid / BITS_PER_BYTE; + bit_nr = irq->intid % BITS_PER_BYTE; + ptr = pendbase + byte_offset; + + if (ptr != last_ptr) { + ret = kvm_read_guest_lock(kvm, ptr, &val, 1); + if (ret) + goto out; + last_ptr = ptr; + } + + stored = val & (1U << bit_nr); + + is_pending = irq->pending_latch; + + if (irq->hw && vlpi_avail) + vgic_v4_get_vlpi_state(irq, &is_pending); + + if (stored == is_pending) + continue; + + if (is_pending) + val |= 1 << bit_nr; + else + val &= ~(1 << bit_nr); + + ret = vgic_write_guest_lock(kvm, ptr, &val, 1); + if (ret) + goto out; + } + +out: + if (vlpi_avail) + map_all_vpes(kvm); + + return ret; +} + +/** + * vgic_v3_rdist_overlap - check if a region overlaps with any + * existing redistributor region + * + * @kvm: kvm handle + * @base: base of the region + * @size: size of region + * + * Return: true if there is an overlap + */ +bool vgic_v3_rdist_overlap(struct kvm *kvm, gpa_t base, size_t size) +{ + struct vgic_dist *d = &kvm->arch.vgic; + struct vgic_redist_region *rdreg; + + list_for_each_entry(rdreg, &d->rd_regions, list) { + if ((base + size > rdreg->base) && + (base < rdreg->base + vgic_v3_rd_region_size(kvm, rdreg))) + return true; + } + return false; +} + +/* + * Check for overlapping regions and for regions crossing the end of memory + * for base addresses which have already been set. + */ +bool vgic_v3_check_base(struct kvm *kvm) +{ + struct vgic_dist *d = &kvm->arch.vgic; + struct vgic_redist_region *rdreg; + + if (!IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) && + d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE < d->vgic_dist_base) + return false; + + list_for_each_entry(rdreg, &d->rd_regions, list) { + size_t sz = vgic_v3_rd_region_size(kvm, rdreg); + + if (vgic_check_iorange(kvm, VGIC_ADDR_UNDEF, + rdreg->base, SZ_64K, sz)) + return false; + } + + if (IS_VGIC_ADDR_UNDEF(d->vgic_dist_base)) + return true; + + return !vgic_v3_rdist_overlap(kvm, d->vgic_dist_base, + KVM_VGIC_V3_DIST_SIZE); +} + +/** + * vgic_v3_rdist_free_slot - Look up registered rdist regions and identify one + * which has free space to put a new rdist region. + * + * @rd_regions: redistributor region list head + * + * A redistributor regions maps n redistributors, n = region size / (2 x 64kB). + * Stride between redistributors is 0 and regions are filled in the index order. + * + * Return: the redist region handle, if any, that has space to map a new rdist + * region. + */ +struct vgic_redist_region *vgic_v3_rdist_free_slot(struct list_head *rd_regions) +{ + struct vgic_redist_region *rdreg; + + list_for_each_entry(rdreg, rd_regions, list) { + if (!vgic_v3_redist_region_full(rdreg)) + return rdreg; + } + return NULL; +} + +struct vgic_redist_region *vgic_v3_rdist_region_from_index(struct kvm *kvm, + u32 index) +{ + struct list_head *rd_regions = &kvm->arch.vgic.rd_regions; + struct vgic_redist_region *rdreg; + + list_for_each_entry(rdreg, rd_regions, list) { + if (rdreg->index == index) + return rdreg; + } + return NULL; +} + + +int vgic_v3_map_resources(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + unsigned long c; + + kvm_for_each_vcpu(c, vcpu, kvm) { + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + if (IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) { + kvm_debug("vcpu %ld redistributor base not set\n", c); + return -ENXIO; + } + } + + if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base)) { + kvm_debug("Need to set vgic distributor addresses first\n"); + return -ENXIO; + } + + if (!vgic_v3_check_base(kvm)) { + kvm_debug("VGIC redist and dist frames overlap\n"); + return -EINVAL; + } + + /* + * For a VGICv3 we require the userland to explicitly initialize + * the VGIC before we need to use it. + */ + if (!vgic_initialized(kvm)) { + return -EBUSY; + } + + if (kvm_vgic_global_state.has_gicv4_1) + vgic_v4_configure_vsgis(kvm); + + return 0; +} + +DEFINE_STATIC_KEY_FALSE(vgic_v3_cpuif_trap); + +static int __init early_group0_trap_cfg(char *buf) +{ + return kstrtobool(buf, &group0_trap); +} +early_param("kvm-arm.vgic_v3_group0_trap", early_group0_trap_cfg); + +static int __init early_group1_trap_cfg(char *buf) +{ + return kstrtobool(buf, &group1_trap); +} +early_param("kvm-arm.vgic_v3_group1_trap", early_group1_trap_cfg); + +static int __init early_common_trap_cfg(char *buf) +{ + return kstrtobool(buf, &common_trap); +} +early_param("kvm-arm.vgic_v3_common_trap", early_common_trap_cfg); + +static int __init early_gicv4_enable(char *buf) +{ + return kstrtobool(buf, &gicv4_enable); +} +early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable); + +static const struct midr_range broken_seis[] = { + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM), + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM), + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_PRO), + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO), + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX), + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX), + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD), + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE), + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_PRO), + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_PRO), + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_MAX), + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_MAX), + {}, +}; + +static bool vgic_v3_broken_seis(void) +{ + return ((kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) && + is_midr_in_range_list(read_cpuid_id(), broken_seis)); +} + +/** + * vgic_v3_probe - probe for a VGICv3 compatible interrupt controller + * @info: pointer to the GIC description + * + * Returns 0 if the VGICv3 has been probed successfully, returns an error code + * otherwise + */ +int vgic_v3_probe(const struct gic_kvm_info *info) +{ + u64 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_gic_config); + bool has_v2; + int ret; + + has_v2 = ich_vtr_el2 >> 63; + ich_vtr_el2 = (u32)ich_vtr_el2; + + /* + * The ListRegs field is 5 bits, but there is an architectural + * maximum of 16 list registers. Just ignore bit 4... + */ + kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1; + kvm_vgic_global_state.can_emulate_gicv2 = false; + kvm_vgic_global_state.ich_vtr_el2 = ich_vtr_el2; + + /* GICv4 support? */ + if (info->has_v4) { + kvm_vgic_global_state.has_gicv4 = gicv4_enable; + kvm_vgic_global_state.has_gicv4_1 = info->has_v4_1 && gicv4_enable; + kvm_info("GICv4%s support %sabled\n", + kvm_vgic_global_state.has_gicv4_1 ? ".1" : "", + gicv4_enable ? "en" : "dis"); + } + + kvm_vgic_global_state.vcpu_base = 0; + + if (!info->vcpu.start) { + kvm_info("GICv3: no GICV resource entry\n"); + } else if (!has_v2) { + pr_warn(FW_BUG "CPU interface incapable of MMIO access\n"); + } else if (!PAGE_ALIGNED(info->vcpu.start)) { + pr_warn("GICV physical address 0x%llx not page aligned\n", + (unsigned long long)info->vcpu.start); + } else if (kvm_get_mode() != KVM_MODE_PROTECTED) { + kvm_vgic_global_state.vcpu_base = info->vcpu.start; + kvm_vgic_global_state.can_emulate_gicv2 = true; + ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); + if (ret) { + kvm_err("Cannot register GICv2 KVM device.\n"); + return ret; + } + kvm_info("vgic-v2@%llx\n", info->vcpu.start); + } + ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V3); + if (ret) { + kvm_err("Cannot register GICv3 KVM device.\n"); + kvm_unregister_device_ops(KVM_DEV_TYPE_ARM_VGIC_V2); + return ret; + } + + if (kvm_vgic_global_state.vcpu_base == 0) + kvm_info("disabling GICv2 emulation\n"); + + if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_30115)) { + group0_trap = true; + group1_trap = true; + } + + if (vgic_v3_broken_seis()) { + kvm_info("GICv3 with broken locally generated SEI\n"); + + kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_SEIS_MASK; + group0_trap = true; + group1_trap = true; + if (ich_vtr_el2 & ICH_VTR_TDS_MASK) + dir_trap = true; + else + common_trap = true; + } + + if (group0_trap || group1_trap || common_trap | dir_trap) { + kvm_info("GICv3 sysreg trapping enabled ([%s%s%s%s], reduced performance)\n", + group0_trap ? "G0" : "", + group1_trap ? "G1" : "", + common_trap ? "C" : "", + dir_trap ? "D" : ""); + static_branch_enable(&vgic_v3_cpuif_trap); + } + + kvm_vgic_global_state.vctrl_base = NULL; + kvm_vgic_global_state.type = VGIC_V3; + kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS; + + return 0; +} + +void vgic_v3_load(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + + /* + * If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen + * is dependent on ICC_SRE_EL1.SRE, and we have to perform the + * VMCR_EL2 save/restore in the world switch. + */ + if (likely(cpu_if->vgic_sre)) + kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr); + + kvm_call_hyp(__vgic_v3_restore_aprs, cpu_if); + + if (has_vhe()) + __vgic_v3_activate_traps(cpu_if); + + WARN_ON(vgic_v4_load(vcpu)); +} + +void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + + if (likely(cpu_if->vgic_sre)) + cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr); +} + +void vgic_v3_put(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + + WARN_ON(vgic_v4_put(vcpu)); + + vgic_v3_vmcr_sync(vcpu); + + kvm_call_hyp(__vgic_v3_save_aprs, cpu_if); + + if (has_vhe()) + __vgic_v3_deactivate_traps(cpu_if); +} diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c new file mode 100644 index 0000000000..339a55194b --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -0,0 +1,521 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2017 ARM Ltd. + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/irqdomain.h> +#include <linux/kvm_host.h> +#include <linux/irqchip/arm-gic-v3.h> + +#include "vgic.h" + +/* + * How KVM uses GICv4 (insert rude comments here): + * + * The vgic-v4 layer acts as a bridge between several entities: + * - The GICv4 ITS representation offered by the ITS driver + * - VFIO, which is in charge of the PCI endpoint + * - The virtual ITS, which is the only thing the guest sees + * + * The configuration of VLPIs is triggered by a callback from VFIO, + * instructing KVM that a PCI device has been configured to deliver + * MSIs to a vITS. + * + * kvm_vgic_v4_set_forwarding() is thus called with the routing entry, + * and this is used to find the corresponding vITS data structures + * (ITS instance, device, event and irq) using a process that is + * extremely similar to the injection of an MSI. + * + * At this stage, we can link the guest's view of an LPI (uniquely + * identified by the routing entry) and the host irq, using the GICv4 + * driver mapping operation. Should the mapping succeed, we've then + * successfully upgraded the guest's LPI to a VLPI. We can then start + * with updating GICv4's view of the property table and generating an + * INValidation in order to kickstart the delivery of this VLPI to the + * guest directly, without software intervention. Well, almost. + * + * When the PCI endpoint is deconfigured, this operation is reversed + * with VFIO calling kvm_vgic_v4_unset_forwarding(). + * + * Once the VLPI has been mapped, it needs to follow any change the + * guest performs on its LPI through the vITS. For that, a number of + * command handlers have hooks to communicate these changes to the HW: + * - Any invalidation triggers a call to its_prop_update_vlpi() + * - The INT command results in a irq_set_irqchip_state(), which + * generates an INT on the corresponding VLPI. + * - The CLEAR command results in a irq_set_irqchip_state(), which + * generates an CLEAR on the corresponding VLPI. + * - DISCARD translates into an unmap, similar to a call to + * kvm_vgic_v4_unset_forwarding(). + * - MOVI is translated by an update of the existing mapping, changing + * the target vcpu, resulting in a VMOVI being generated. + * - MOVALL is translated by a string of mapping updates (similar to + * the handling of MOVI). MOVALL is horrible. + * + * Note that a DISCARD/MAPTI sequence emitted from the guest without + * reprogramming the PCI endpoint after MAPTI does not result in a + * VLPI being mapped, as there is no callback from VFIO (the guest + * will get the interrupt via the normal SW injection). Fixing this is + * not trivial, and requires some horrible messing with the VFIO + * internals. Not fun. Don't do that. + * + * Then there is the scheduling. Each time a vcpu is about to run on a + * physical CPU, KVM must tell the corresponding redistributor about + * it. And if we've migrated our vcpu from one CPU to another, we must + * tell the ITS (so that the messages reach the right redistributor). + * This is done in two steps: first issue a irq_set_affinity() on the + * irq corresponding to the vcpu, then call its_make_vpe_resident(). + * You must be in a non-preemptible context. On exit, a call to + * its_make_vpe_non_resident() tells the redistributor that we're done + * with the vcpu. + * + * Finally, the doorbell handling: Each vcpu is allocated an interrupt + * which will fire each time a VLPI is made pending whilst the vcpu is + * not running. Each time the vcpu gets blocked, the doorbell + * interrupt gets enabled. When the vcpu is unblocked (for whatever + * reason), the doorbell interrupt is disabled. + */ + +#define DB_IRQ_FLAGS (IRQ_NOAUTOEN | IRQ_DISABLE_UNLAZY | IRQ_NO_BALANCING) + +static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info) +{ + struct kvm_vcpu *vcpu = info; + + /* We got the message, no need to fire again */ + if (!kvm_vgic_global_state.has_gicv4_1 && + !irqd_irq_disabled(&irq_to_desc(irq)->irq_data)) + disable_irq_nosync(irq); + + /* + * The v4.1 doorbell can fire concurrently with the vPE being + * made non-resident. Ensure we only update pending_last + * *after* the non-residency sequence has completed. + */ + raw_spin_lock(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vpe_lock); + vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true; + raw_spin_unlock(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vpe_lock); + + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); + kvm_vcpu_kick(vcpu); + + return IRQ_HANDLED; +} + +static void vgic_v4_sync_sgi_config(struct its_vpe *vpe, struct vgic_irq *irq) +{ + vpe->sgi_config[irq->intid].enabled = irq->enabled; + vpe->sgi_config[irq->intid].group = irq->group; + vpe->sgi_config[irq->intid].priority = irq->priority; +} + +static void vgic_v4_enable_vsgis(struct kvm_vcpu *vcpu) +{ + struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + int i; + + /* + * With GICv4.1, every virtual SGI can be directly injected. So + * let's pretend that they are HW interrupts, tied to a host + * IRQ. The SGI code will do its magic. + */ + for (i = 0; i < VGIC_NR_SGIS; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, i); + struct irq_desc *desc; + unsigned long flags; + int ret; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + if (irq->hw) + goto unlock; + + irq->hw = true; + irq->host_irq = irq_find_mapping(vpe->sgi_domain, i); + + /* Transfer the full irq state to the vPE */ + vgic_v4_sync_sgi_config(vpe, irq); + desc = irq_to_desc(irq->host_irq); + ret = irq_domain_activate_irq(irq_desc_get_irq_data(desc), + false); + if (!WARN_ON(ret)) { + /* Transfer pending state */ + ret = irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + irq->pending_latch); + WARN_ON(ret); + irq->pending_latch = false; + } + unlock: + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + } +} + +static void vgic_v4_disable_vsgis(struct kvm_vcpu *vcpu) +{ + int i; + + for (i = 0; i < VGIC_NR_SGIS; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, i); + struct irq_desc *desc; + unsigned long flags; + int ret; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + if (!irq->hw) + goto unlock; + + irq->hw = false; + ret = irq_get_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + &irq->pending_latch); + WARN_ON(ret); + + desc = irq_to_desc(irq->host_irq); + irq_domain_deactivate_irq(irq_desc_get_irq_data(desc)); + unlock: + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + } +} + +void vgic_v4_configure_vsgis(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + unsigned long i; + + lockdep_assert_held(&kvm->arch.config_lock); + + kvm_arm_halt_guest(kvm); + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (dist->nassgireq) + vgic_v4_enable_vsgis(vcpu); + else + vgic_v4_disable_vsgis(vcpu); + } + + kvm_arm_resume_guest(kvm); +} + +/* + * Must be called with GICv4.1 and the vPE unmapped, which + * indicates the invalidation of any VPT caches associated + * with the vPE, thus we can get the VLPI state by peeking + * at the VPT. + */ +void vgic_v4_get_vlpi_state(struct vgic_irq *irq, bool *val) +{ + struct its_vpe *vpe = &irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + int mask = BIT(irq->intid % BITS_PER_BYTE); + void *va; + u8 *ptr; + + va = page_address(vpe->vpt_page); + ptr = va + irq->intid / BITS_PER_BYTE; + + *val = !!(*ptr & mask); +} + +int vgic_v4_request_vpe_irq(struct kvm_vcpu *vcpu, int irq) +{ + return request_irq(irq, vgic_v4_doorbell_handler, 0, "vcpu", vcpu); +} + +/** + * vgic_v4_init - Initialize the GICv4 data structures + * @kvm: Pointer to the VM being initialized + * + * We may be called each time a vITS is created, or when the + * vgic is initialized. In both cases, the number of vcpus + * should now be fixed. + */ +int vgic_v4_init(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int nr_vcpus, ret; + unsigned long i; + + lockdep_assert_held(&kvm->arch.config_lock); + + if (!kvm_vgic_global_state.has_gicv4) + return 0; /* Nothing to see here... move along. */ + + if (dist->its_vm.vpes) + return 0; + + nr_vcpus = atomic_read(&kvm->online_vcpus); + + dist->its_vm.vpes = kcalloc(nr_vcpus, sizeof(*dist->its_vm.vpes), + GFP_KERNEL_ACCOUNT); + if (!dist->its_vm.vpes) + return -ENOMEM; + + dist->its_vm.nr_vpes = nr_vcpus; + + kvm_for_each_vcpu(i, vcpu, kvm) + dist->its_vm.vpes[i] = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + + ret = its_alloc_vcpu_irqs(&dist->its_vm); + if (ret < 0) { + kvm_err("VPE IRQ allocation failure\n"); + kfree(dist->its_vm.vpes); + dist->its_vm.nr_vpes = 0; + dist->its_vm.vpes = NULL; + return ret; + } + + kvm_for_each_vcpu(i, vcpu, kvm) { + int irq = dist->its_vm.vpes[i]->irq; + unsigned long irq_flags = DB_IRQ_FLAGS; + + /* + * Don't automatically enable the doorbell, as we're + * flipping it back and forth when the vcpu gets + * blocked. Also disable the lazy disabling, as the + * doorbell could kick us out of the guest too + * early... + * + * On GICv4.1, the doorbell is managed in HW and must + * be left enabled. + */ + if (kvm_vgic_global_state.has_gicv4_1) + irq_flags &= ~IRQ_NOAUTOEN; + irq_set_status_flags(irq, irq_flags); + + ret = vgic_v4_request_vpe_irq(vcpu, irq); + if (ret) { + kvm_err("failed to allocate vcpu IRQ%d\n", irq); + /* + * Trick: adjust the number of vpes so we know + * how many to nuke on teardown... + */ + dist->its_vm.nr_vpes = i; + break; + } + } + + if (ret) + vgic_v4_teardown(kvm); + + return ret; +} + +/** + * vgic_v4_teardown - Free the GICv4 data structures + * @kvm: Pointer to the VM being destroyed + */ +void vgic_v4_teardown(struct kvm *kvm) +{ + struct its_vm *its_vm = &kvm->arch.vgic.its_vm; + int i; + + lockdep_assert_held(&kvm->arch.config_lock); + + if (!its_vm->vpes) + return; + + for (i = 0; i < its_vm->nr_vpes; i++) { + struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, i); + int irq = its_vm->vpes[i]->irq; + + irq_clear_status_flags(irq, DB_IRQ_FLAGS); + free_irq(irq, vcpu); + } + + its_free_vcpu_irqs(its_vm); + kfree(its_vm->vpes); + its_vm->nr_vpes = 0; + its_vm->vpes = NULL; +} + +int vgic_v4_put(struct kvm_vcpu *vcpu) +{ + struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + + if (!vgic_supports_direct_msis(vcpu->kvm) || !vpe->resident) + return 0; + + return its_make_vpe_non_resident(vpe, !!vcpu_get_flag(vcpu, IN_WFI)); +} + +int vgic_v4_load(struct kvm_vcpu *vcpu) +{ + struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + int err; + + if (!vgic_supports_direct_msis(vcpu->kvm) || vpe->resident) + return 0; + + if (vcpu_get_flag(vcpu, IN_WFI)) + return 0; + + /* + * Before making the VPE resident, make sure the redistributor + * corresponding to our current CPU expects us here. See the + * doc in drivers/irqchip/irq-gic-v4.c to understand how this + * turns into a VMOVP command at the ITS level. + */ + err = irq_set_affinity(vpe->irq, cpumask_of(smp_processor_id())); + if (err) + return err; + + err = its_make_vpe_resident(vpe, false, vcpu->kvm->arch.vgic.enabled); + if (err) + return err; + + /* + * Now that the VPE is resident, let's get rid of a potential + * doorbell interrupt that would still be pending. This is a + * GICv4.0 only "feature"... + */ + if (!kvm_vgic_global_state.has_gicv4_1) + err = irq_set_irqchip_state(vpe->irq, IRQCHIP_STATE_PENDING, false); + + return err; +} + +void vgic_v4_commit(struct kvm_vcpu *vcpu) +{ + struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + + /* + * No need to wait for the vPE to be ready across a shallow guest + * exit, as only a vcpu_put will invalidate it. + */ + if (!vpe->ready) + its_commit_vpe(vpe); +} + +static struct vgic_its *vgic_get_its(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *irq_entry) +{ + struct kvm_msi msi = (struct kvm_msi) { + .address_lo = irq_entry->msi.address_lo, + .address_hi = irq_entry->msi.address_hi, + .data = irq_entry->msi.data, + .flags = irq_entry->msi.flags, + .devid = irq_entry->msi.devid, + }; + + return vgic_msi_to_its(kvm, &msi); +} + +int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq, + struct kvm_kernel_irq_routing_entry *irq_entry) +{ + struct vgic_its *its; + struct vgic_irq *irq; + struct its_vlpi_map map; + unsigned long flags; + int ret; + + if (!vgic_supports_direct_msis(kvm)) + return 0; + + /* + * Get the ITS, and escape early on error (not a valid + * doorbell for any of our vITSs). + */ + its = vgic_get_its(kvm, irq_entry); + if (IS_ERR(its)) + return 0; + + mutex_lock(&its->its_lock); + + /* Perform the actual DevID/EventID -> LPI translation. */ + ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid, + irq_entry->msi.data, &irq); + if (ret) + goto out; + + /* + * Emit the mapping request. If it fails, the ITS probably + * isn't v4 compatible, so let's silently bail out. Holding + * the ITS lock should ensure that nothing can modify the + * target vcpu. + */ + map = (struct its_vlpi_map) { + .vm = &kvm->arch.vgic.its_vm, + .vpe = &irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe, + .vintid = irq->intid, + .properties = ((irq->priority & 0xfc) | + (irq->enabled ? LPI_PROP_ENABLED : 0) | + LPI_PROP_GROUP1), + .db_enabled = true, + }; + + ret = its_map_vlpi(virq, &map); + if (ret) + goto out; + + irq->hw = true; + irq->host_irq = virq; + atomic_inc(&map.vpe->vlpi_count); + + /* Transfer pending state */ + raw_spin_lock_irqsave(&irq->irq_lock, flags); + if (irq->pending_latch) { + ret = irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + irq->pending_latch); + WARN_RATELIMIT(ret, "IRQ %d", irq->host_irq); + + /* + * Clear pending_latch and communicate this state + * change via vgic_queue_irq_unlock. + */ + irq->pending_latch = false; + vgic_queue_irq_unlock(kvm, irq, flags); + } else { + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + } + +out: + mutex_unlock(&its->its_lock); + return ret; +} + +int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq, + struct kvm_kernel_irq_routing_entry *irq_entry) +{ + struct vgic_its *its; + struct vgic_irq *irq; + int ret; + + if (!vgic_supports_direct_msis(kvm)) + return 0; + + /* + * Get the ITS, and escape early on error (not a valid + * doorbell for any of our vITSs). + */ + its = vgic_get_its(kvm, irq_entry); + if (IS_ERR(its)) + return 0; + + mutex_lock(&its->its_lock); + + ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid, + irq_entry->msi.data, &irq); + if (ret) + goto out; + + WARN_ON(!(irq->hw && irq->host_irq == virq)); + if (irq->hw) { + atomic_dec(&irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count); + irq->hw = false; + ret = its_unmap_vlpi(virq); + } + +out: + mutex_unlock(&its->its_lock); + return ret; +} diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c new file mode 100644 index 0000000000..8be4c1ebde --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic.c @@ -0,0 +1,1078 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015, 2016 ARM Ltd. + */ + +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/list_sort.h> +#include <linux/nospec.h> + +#include <asm/kvm_hyp.h> + +#include "vgic.h" + +#define CREATE_TRACE_POINTS +#include "trace.h" + +struct vgic_global kvm_vgic_global_state __ro_after_init = { + .gicv3_cpuif = STATIC_KEY_FALSE_INIT, +}; + +/* + * Locking order is always: + * kvm->lock (mutex) + * vcpu->mutex (mutex) + * kvm->arch.config_lock (mutex) + * its->cmd_lock (mutex) + * its->its_lock (mutex) + * vgic_cpu->ap_list_lock must be taken with IRQs disabled + * kvm->lpi_list_lock must be taken with IRQs disabled + * vgic_irq->irq_lock must be taken with IRQs disabled + * + * As the ap_list_lock might be taken from the timer interrupt handler, + * we have to disable IRQs before taking this lock and everything lower + * than it. + * + * If you need to take multiple locks, always take the upper lock first, + * then the lower ones, e.g. first take the its_lock, then the irq_lock. + * If you are already holding a lock and need to take a higher one, you + * have to drop the lower ranking lock first and re-acquire it after having + * taken the upper one. + * + * When taking more than one ap_list_lock at the same time, always take the + * lowest numbered VCPU's ap_list_lock first, so: + * vcpuX->vcpu_id < vcpuY->vcpu_id: + * raw_spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock); + * raw_spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock); + * + * Since the VGIC must support injecting virtual interrupts from ISRs, we have + * to use the raw_spin_lock_irqsave/raw_spin_unlock_irqrestore versions of outer + * spinlocks for any lock that may be taken while injecting an interrupt. + */ + +/* + * Iterate over the VM's list of mapped LPIs to find the one with a + * matching interrupt ID and return a reference to the IRQ structure. + */ +static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_irq *irq = NULL; + unsigned long flags; + + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + + list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { + if (irq->intid != intid) + continue; + + /* + * This increases the refcount, the caller is expected to + * call vgic_put_irq() later once it's finished with the IRQ. + */ + vgic_get_irq_kref(irq); + goto out_unlock; + } + irq = NULL; + +out_unlock: + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); + + return irq; +} + +/* + * This looks up the virtual interrupt ID to get the corresponding + * struct vgic_irq. It also increases the refcount, so any caller is expected + * to call vgic_put_irq() once it's finished with this IRQ. + */ +struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, + u32 intid) +{ + /* SGIs and PPIs */ + if (intid <= VGIC_MAX_PRIVATE) { + intid = array_index_nospec(intid, VGIC_MAX_PRIVATE + 1); + return &vcpu->arch.vgic_cpu.private_irqs[intid]; + } + + /* SPIs */ + if (intid < (kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) { + intid = array_index_nospec(intid, kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS); + return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS]; + } + + /* LPIs */ + if (intid >= VGIC_MIN_LPI) + return vgic_get_lpi(kvm, intid); + + return NULL; +} + +/* + * We can't do anything in here, because we lack the kvm pointer to + * lock and remove the item from the lpi_list. So we keep this function + * empty and use the return value of kref_put() to trigger the freeing. + */ +static void vgic_irq_release(struct kref *ref) +{ +} + +/* + * Drop the refcount on the LPI. Must be called with lpi_list_lock held. + */ +void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + + if (!kref_put(&irq->refcount, vgic_irq_release)) + return; + + list_del(&irq->lpi_list); + dist->lpi_list_count--; + + kfree(irq); +} + +void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + unsigned long flags; + + if (irq->intid < VGIC_MIN_LPI) + return; + + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + __vgic_put_lpi_locked(kvm, irq); + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); +} + +void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_irq *irq, *tmp; + unsigned long flags; + + raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags); + + list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) { + if (irq->intid >= VGIC_MIN_LPI) { + raw_spin_lock(&irq->irq_lock); + list_del(&irq->ap_list); + irq->vcpu = NULL; + raw_spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); + } + } + + raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags); +} + +void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending) +{ + WARN_ON(irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + pending)); +} + +bool vgic_get_phys_line_level(struct vgic_irq *irq) +{ + bool line_level; + + BUG_ON(!irq->hw); + + if (irq->ops && irq->ops->get_input_level) + return irq->ops->get_input_level(irq->intid); + + WARN_ON(irq_get_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + &line_level)); + return line_level; +} + +/* Set/Clear the physical active state */ +void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active) +{ + + BUG_ON(!irq->hw); + WARN_ON(irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_ACTIVE, + active)); +} + +/** + * kvm_vgic_target_oracle - compute the target vcpu for an irq + * + * @irq: The irq to route. Must be already locked. + * + * Based on the current state of the interrupt (enabled, pending, + * active, vcpu and target_vcpu), compute the next vcpu this should be + * given to. Return NULL if this shouldn't be injected at all. + * + * Requires the IRQ lock to be held. + */ +static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq) +{ + lockdep_assert_held(&irq->irq_lock); + + /* If the interrupt is active, it must stay on the current vcpu */ + if (irq->active) + return irq->vcpu ? : irq->target_vcpu; + + /* + * If the IRQ is not active but enabled and pending, we should direct + * it to its configured target VCPU. + * If the distributor is disabled, pending interrupts shouldn't be + * forwarded. + */ + if (irq->enabled && irq_is_pending(irq)) { + if (unlikely(irq->target_vcpu && + !irq->target_vcpu->kvm->arch.vgic.enabled)) + return NULL; + + return irq->target_vcpu; + } + + /* If neither active nor pending and enabled, then this IRQ should not + * be queued to any VCPU. + */ + return NULL; +} + +/* + * The order of items in the ap_lists defines how we'll pack things in LRs as + * well, the first items in the list being the first things populated in the + * LRs. + * + * A hard rule is that active interrupts can never be pushed out of the LRs + * (and therefore take priority) since we cannot reliably trap on deactivation + * of IRQs and therefore they have to be present in the LRs. + * + * Otherwise things should be sorted by the priority field and the GIC + * hardware support will take care of preemption of priority groups etc. + * + * Return negative if "a" sorts before "b", 0 to preserve order, and positive + * to sort "b" before "a". + */ +static int vgic_irq_cmp(void *priv, const struct list_head *a, + const struct list_head *b) +{ + struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list); + struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list); + bool penda, pendb; + int ret; + + /* + * list_sort may call this function with the same element when + * the list is fairly long. + */ + if (unlikely(irqa == irqb)) + return 0; + + raw_spin_lock(&irqa->irq_lock); + raw_spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING); + + if (irqa->active || irqb->active) { + ret = (int)irqb->active - (int)irqa->active; + goto out; + } + + penda = irqa->enabled && irq_is_pending(irqa); + pendb = irqb->enabled && irq_is_pending(irqb); + + if (!penda || !pendb) { + ret = (int)pendb - (int)penda; + goto out; + } + + /* Both pending and enabled, sort by priority */ + ret = irqa->priority - irqb->priority; +out: + raw_spin_unlock(&irqb->irq_lock); + raw_spin_unlock(&irqa->irq_lock); + return ret; +} + +/* Must be called with the ap_list_lock held */ +static void vgic_sort_ap_list(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + lockdep_assert_held(&vgic_cpu->ap_list_lock); + + list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp); +} + +/* + * Only valid injection if changing level for level-triggered IRQs or for a + * rising edge, and in-kernel connected IRQ lines can only be controlled by + * their owner. + */ +static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owner) +{ + if (irq->owner != owner) + return false; + + switch (irq->config) { + case VGIC_CONFIG_LEVEL: + return irq->line_level != level; + case VGIC_CONFIG_EDGE: + return level; + } + + return false; +} + +/* + * Check whether an IRQ needs to (and can) be queued to a VCPU's ap list. + * Do the queuing if necessary, taking the right locks in the right order. + * Returns true when the IRQ was queued, false otherwise. + * + * Needs to be entered with the IRQ lock already held, but will return + * with all locks dropped. + */ +bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, + unsigned long flags) +{ + struct kvm_vcpu *vcpu; + + lockdep_assert_held(&irq->irq_lock); + +retry: + vcpu = vgic_target_oracle(irq); + if (irq->vcpu || !vcpu) { + /* + * If this IRQ is already on a VCPU's ap_list, then it + * cannot be moved or modified and there is no more work for + * us to do. + * + * Otherwise, if the irq is not pending and enabled, it does + * not need to be inserted into an ap_list and there is also + * no more work for us to do. + */ + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + /* + * We have to kick the VCPU here, because we could be + * queueing an edge-triggered interrupt for which we + * get no EOI maintenance interrupt. In that case, + * while the IRQ is already on the VCPU's AP list, the + * VCPU could have EOI'ed the original interrupt and + * won't see this one until it exits for some other + * reason. + */ + if (vcpu) { + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); + kvm_vcpu_kick(vcpu); + } + return false; + } + + /* + * We must unlock the irq lock to take the ap_list_lock where + * we are going to insert this new pending interrupt. + */ + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + /* someone can do stuff here, which we re-check below */ + + raw_spin_lock_irqsave(&vcpu->arch.vgic_cpu.ap_list_lock, flags); + raw_spin_lock(&irq->irq_lock); + + /* + * Did something change behind our backs? + * + * There are two cases: + * 1) The irq lost its pending state or was disabled behind our + * backs and/or it was queued to another VCPU's ap_list. + * 2) Someone changed the affinity on this irq behind our + * backs and we are now holding the wrong ap_list_lock. + * + * In both cases, drop the locks and retry. + */ + + if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) { + raw_spin_unlock(&irq->irq_lock); + raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, + flags); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + goto retry; + } + + /* + * Grab a reference to the irq to reflect the fact that it is + * now in the ap_list. + */ + vgic_get_irq_kref(irq); + list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head); + irq->vcpu = vcpu; + + raw_spin_unlock(&irq->irq_lock); + raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags); + + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); + kvm_vcpu_kick(vcpu); + + return true; +} + +/** + * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic + * @kvm: The VM structure pointer + * @cpuid: The CPU for PPIs + * @intid: The INTID to inject a new state to. + * @level: Edge-triggered: true: to trigger the interrupt + * false: to ignore the call + * Level-sensitive true: raise the input signal + * false: lower the input signal + * @owner: The opaque pointer to the owner of the IRQ being raised to verify + * that the caller is allowed to inject this IRQ. Userspace + * injections will have owner == NULL. + * + * The VGIC is not concerned with devices being active-LOW or active-HIGH for + * level-sensitive interrupts. You can think of the level parameter as 1 + * being HIGH and 0 being LOW and all devices being active-HIGH. + */ +int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, + bool level, void *owner) +{ + struct kvm_vcpu *vcpu; + struct vgic_irq *irq; + unsigned long flags; + int ret; + + trace_vgic_update_irq_pending(cpuid, intid, level); + + ret = vgic_lazy_init(kvm); + if (ret) + return ret; + + vcpu = kvm_get_vcpu(kvm, cpuid); + if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS) + return -EINVAL; + + irq = vgic_get_irq(kvm, vcpu, intid); + if (!irq) + return -EINVAL; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + if (!vgic_validate_injection(irq, level, owner)) { + /* Nothing to see here, move along... */ + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(kvm, irq); + return 0; + } + + if (irq->config == VGIC_CONFIG_LEVEL) + irq->line_level = level; + else + irq->pending_latch = true; + + vgic_queue_irq_unlock(kvm, irq, flags); + vgic_put_irq(kvm, irq); + + return 0; +} + +/* @irq->irq_lock must be held */ +static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq, + unsigned int host_irq, + struct irq_ops *ops) +{ + struct irq_desc *desc; + struct irq_data *data; + + /* + * Find the physical IRQ number corresponding to @host_irq + */ + desc = irq_to_desc(host_irq); + if (!desc) { + kvm_err("%s: no interrupt descriptor\n", __func__); + return -EINVAL; + } + data = irq_desc_get_irq_data(desc); + while (data->parent_data) + data = data->parent_data; + + irq->hw = true; + irq->host_irq = host_irq; + irq->hwintid = data->hwirq; + irq->ops = ops; + return 0; +} + +/* @irq->irq_lock must be held */ +static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq) +{ + irq->hw = false; + irq->hwintid = 0; + irq->ops = NULL; +} + +int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, + u32 vintid, struct irq_ops *ops) +{ + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + unsigned long flags; + int ret; + + BUG_ON(!irq); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + ret = kvm_vgic_map_irq(vcpu, irq, host_irq, ops); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + + return ret; +} + +/** + * kvm_vgic_reset_mapped_irq - Reset a mapped IRQ + * @vcpu: The VCPU pointer + * @vintid: The INTID of the interrupt + * + * Reset the active and pending states of a mapped interrupt. Kernel + * subsystems injecting mapped interrupts should reset their interrupt lines + * when we are doing a reset of the VM. + */ +void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid) +{ + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + unsigned long flags; + + if (!irq->hw) + goto out; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->active = false; + irq->pending_latch = false; + irq->line_level = false; + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); +out: + vgic_put_irq(vcpu->kvm, irq); +} + +int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid) +{ + struct vgic_irq *irq; + unsigned long flags; + + if (!vgic_initialized(vcpu->kvm)) + return -EAGAIN; + + irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + BUG_ON(!irq); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + kvm_vgic_unmap_irq(irq); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + + return 0; +} + +int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid) +{ + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + unsigned long flags; + int ret = -1; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + if (irq->hw) + ret = irq->hwintid; + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + vgic_put_irq(vcpu->kvm, irq); + return ret; +} + +/** + * kvm_vgic_set_owner - Set the owner of an interrupt for a VM + * + * @vcpu: Pointer to the VCPU (used for PPIs) + * @intid: The virtual INTID identifying the interrupt (PPI or SPI) + * @owner: Opaque pointer to the owner + * + * Returns 0 if intid is not already used by another in-kernel device and the + * owner is set, otherwise returns an error code. + */ +int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner) +{ + struct vgic_irq *irq; + unsigned long flags; + int ret = 0; + + if (!vgic_initialized(vcpu->kvm)) + return -EAGAIN; + + /* SGIs and LPIs cannot be wired up to any device */ + if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid)) + return -EINVAL; + + irq = vgic_get_irq(vcpu->kvm, vcpu, intid); + raw_spin_lock_irqsave(&irq->irq_lock, flags); + if (irq->owner && irq->owner != owner) + ret = -EEXIST; + else + irq->owner = owner; + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + return ret; +} + +/** + * vgic_prune_ap_list - Remove non-relevant interrupts from the list + * + * @vcpu: The VCPU pointer + * + * Go over the list of "interesting" interrupts, and prune those that we + * won't have to consider in the near future. + */ +static void vgic_prune_ap_list(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_irq *irq, *tmp; + + DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); + +retry: + raw_spin_lock(&vgic_cpu->ap_list_lock); + + list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) { + struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB; + bool target_vcpu_needs_kick = false; + + raw_spin_lock(&irq->irq_lock); + + BUG_ON(vcpu != irq->vcpu); + + target_vcpu = vgic_target_oracle(irq); + + if (!target_vcpu) { + /* + * We don't need to process this interrupt any + * further, move it off the list. + */ + list_del(&irq->ap_list); + irq->vcpu = NULL; + raw_spin_unlock(&irq->irq_lock); + + /* + * This vgic_put_irq call matches the + * vgic_get_irq_kref in vgic_queue_irq_unlock, + * where we added the LPI to the ap_list. As + * we remove the irq from the list, we drop + * also drop the refcount. + */ + vgic_put_irq(vcpu->kvm, irq); + continue; + } + + if (target_vcpu == vcpu) { + /* We're on the right CPU */ + raw_spin_unlock(&irq->irq_lock); + continue; + } + + /* This interrupt looks like it has to be migrated. */ + + raw_spin_unlock(&irq->irq_lock); + raw_spin_unlock(&vgic_cpu->ap_list_lock); + + /* + * Ensure locking order by always locking the smallest + * ID first. + */ + if (vcpu->vcpu_id < target_vcpu->vcpu_id) { + vcpuA = vcpu; + vcpuB = target_vcpu; + } else { + vcpuA = target_vcpu; + vcpuB = vcpu; + } + + raw_spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock); + raw_spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock, + SINGLE_DEPTH_NESTING); + raw_spin_lock(&irq->irq_lock); + + /* + * If the affinity has been preserved, move the + * interrupt around. Otherwise, it means things have + * changed while the interrupt was unlocked, and we + * need to replay this. + * + * In all cases, we cannot trust the list not to have + * changed, so we restart from the beginning. + */ + if (target_vcpu == vgic_target_oracle(irq)) { + struct vgic_cpu *new_cpu = &target_vcpu->arch.vgic_cpu; + + list_del(&irq->ap_list); + irq->vcpu = target_vcpu; + list_add_tail(&irq->ap_list, &new_cpu->ap_list_head); + target_vcpu_needs_kick = true; + } + + raw_spin_unlock(&irq->irq_lock); + raw_spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock); + raw_spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock); + + if (target_vcpu_needs_kick) { + kvm_make_request(KVM_REQ_IRQ_PENDING, target_vcpu); + kvm_vcpu_kick(target_vcpu); + } + + goto retry; + } + + raw_spin_unlock(&vgic_cpu->ap_list_lock); +} + +static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu) +{ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_fold_lr_state(vcpu); + else + vgic_v3_fold_lr_state(vcpu); +} + +/* Requires the irq_lock to be held. */ +static inline void vgic_populate_lr(struct kvm_vcpu *vcpu, + struct vgic_irq *irq, int lr) +{ + lockdep_assert_held(&irq->irq_lock); + + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_populate_lr(vcpu, irq, lr); + else + vgic_v3_populate_lr(vcpu, irq, lr); +} + +static inline void vgic_clear_lr(struct kvm_vcpu *vcpu, int lr) +{ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_clear_lr(vcpu, lr); + else + vgic_v3_clear_lr(vcpu, lr); +} + +static inline void vgic_set_underflow(struct kvm_vcpu *vcpu) +{ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_set_underflow(vcpu); + else + vgic_v3_set_underflow(vcpu); +} + +/* Requires the ap_list_lock to be held. */ +static int compute_ap_list_depth(struct kvm_vcpu *vcpu, + bool *multi_sgi) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_irq *irq; + int count = 0; + + *multi_sgi = false; + + lockdep_assert_held(&vgic_cpu->ap_list_lock); + + list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { + int w; + + raw_spin_lock(&irq->irq_lock); + /* GICv2 SGIs can count for more than one... */ + w = vgic_irq_get_lr_count(irq); + raw_spin_unlock(&irq->irq_lock); + + count += w; + *multi_sgi |= (w > 1); + } + return count; +} + +/* Requires the VCPU's ap_list_lock to be held. */ +static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_irq *irq; + int count; + bool multi_sgi; + u8 prio = 0xff; + int i = 0; + + lockdep_assert_held(&vgic_cpu->ap_list_lock); + + count = compute_ap_list_depth(vcpu, &multi_sgi); + if (count > kvm_vgic_global_state.nr_lr || multi_sgi) + vgic_sort_ap_list(vcpu); + + count = 0; + + list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { + raw_spin_lock(&irq->irq_lock); + + /* + * If we have multi-SGIs in the pipeline, we need to + * guarantee that they are all seen before any IRQ of + * lower priority. In that case, we need to filter out + * these interrupts by exiting early. This is easy as + * the AP list has been sorted already. + */ + if (multi_sgi && irq->priority > prio) { + _raw_spin_unlock(&irq->irq_lock); + break; + } + + if (likely(vgic_target_oracle(irq) == vcpu)) { + vgic_populate_lr(vcpu, irq, count++); + + if (irq->source) + prio = irq->priority; + } + + raw_spin_unlock(&irq->irq_lock); + + if (count == kvm_vgic_global_state.nr_lr) { + if (!list_is_last(&irq->ap_list, + &vgic_cpu->ap_list_head)) + vgic_set_underflow(vcpu); + break; + } + } + + /* Nuke remaining LRs */ + for (i = count ; i < kvm_vgic_global_state.nr_lr; i++) + vgic_clear_lr(vcpu, i); + + if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + vcpu->arch.vgic_cpu.vgic_v2.used_lrs = count; + else + vcpu->arch.vgic_cpu.vgic_v3.used_lrs = count; +} + +static inline bool can_access_vgic_from_kernel(void) +{ + /* + * GICv2 can always be accessed from the kernel because it is + * memory-mapped, and VHE systems can access GICv3 EL2 system + * registers. + */ + return !static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) || has_vhe(); +} + +static inline void vgic_save_state(struct kvm_vcpu *vcpu) +{ + if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + vgic_v2_save_state(vcpu); + else + __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); +} + +/* Sync back the hardware VGIC state into our emulation after a guest's run. */ +void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) +{ + int used_lrs; + + /* An empty ap_list_head implies used_lrs == 0 */ + if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) + return; + + if (can_access_vgic_from_kernel()) + vgic_save_state(vcpu); + + if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs; + else + used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs; + + if (used_lrs) + vgic_fold_lr_state(vcpu); + vgic_prune_ap_list(vcpu); +} + +static inline void vgic_restore_state(struct kvm_vcpu *vcpu) +{ + if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + vgic_v2_restore_state(vcpu); + else + __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); +} + +/* Flush our emulation state into the GIC hardware before entering the guest. */ +void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) +{ + /* + * If there are no virtual interrupts active or pending for this + * VCPU, then there is no work to do and we can bail out without + * taking any lock. There is a potential race with someone injecting + * interrupts to the VCPU, but it is a benign race as the VCPU will + * either observe the new interrupt before or after doing this check, + * and introducing additional synchronization mechanism doesn't change + * this. + * + * Note that we still need to go through the whole thing if anything + * can be directly injected (GICv4). + */ + if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head) && + !vgic_supports_direct_msis(vcpu->kvm)) + return; + + DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); + + if (!list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) { + raw_spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); + vgic_flush_lr_state(vcpu); + raw_spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); + } + + if (can_access_vgic_from_kernel()) + vgic_restore_state(vcpu); + + if (vgic_supports_direct_msis(vcpu->kvm)) + vgic_v4_commit(vcpu); +} + +void kvm_vgic_load(struct kvm_vcpu *vcpu) +{ + if (unlikely(!vgic_initialized(vcpu->kvm))) + return; + + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_load(vcpu); + else + vgic_v3_load(vcpu); +} + +void kvm_vgic_put(struct kvm_vcpu *vcpu) +{ + if (unlikely(!vgic_initialized(vcpu->kvm))) + return; + + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_put(vcpu); + else + vgic_v3_put(vcpu); +} + +void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu) +{ + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) + return; + + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_vmcr_sync(vcpu); + else + vgic_v3_vmcr_sync(vcpu); +} + +int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_irq *irq; + bool pending = false; + unsigned long flags; + struct vgic_vmcr vmcr; + + if (!vcpu->kvm->arch.vgic.enabled) + return false; + + if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last) + return true; + + vgic_get_vmcr(vcpu, &vmcr); + + raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags); + + list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { + raw_spin_lock(&irq->irq_lock); + pending = irq_is_pending(irq) && irq->enabled && + !irq->active && + irq->priority < vmcr.pmr; + raw_spin_unlock(&irq->irq_lock); + + if (pending) + break; + } + + raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags); + + return pending; +} + +void vgic_kick_vcpus(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + unsigned long c; + + /* + * We've injected an interrupt, time to find out who deserves + * a good kick... + */ + kvm_for_each_vcpu(c, vcpu, kvm) { + if (kvm_vgic_vcpu_pending_irq(vcpu)) { + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); + kvm_vcpu_kick(vcpu); + } + } +} + +bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid) +{ + struct vgic_irq *irq; + bool map_is_active; + unsigned long flags; + + if (!vgic_initialized(vcpu->kvm)) + return false; + + irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + raw_spin_lock_irqsave(&irq->irq_lock, flags); + map_is_active = irq->hw && irq->active; + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + + return map_is_active; +} + +/* + * Level-triggered mapped IRQs are special because we only observe rising + * edges as input to the VGIC. + * + * If the guest never acked the interrupt we have to sample the physical + * line and set the line level, because the device state could have changed + * or we simply need to process the still pending interrupt later. + * + * We could also have entered the guest with the interrupt active+pending. + * On the next exit, we need to re-evaluate the pending state, as it could + * otherwise result in a spurious interrupt by injecting a now potentially + * stale pending state. + * + * If this causes us to lower the level, we have to also clear the physical + * active state, since we will otherwise never be told when the interrupt + * becomes asserted again. + * + * Another case is when the interrupt requires a helping hand on + * deactivation (no HW deactivation, for example). + */ +void vgic_irq_handle_resampling(struct vgic_irq *irq, + bool lr_deactivated, bool lr_pending) +{ + if (vgic_irq_is_mapped_level(irq)) { + bool resample = false; + + if (unlikely(vgic_irq_needs_resampling(irq))) { + resample = !(irq->active || irq->pending_latch); + } else if (lr_pending || (lr_deactivated && irq->line_level)) { + irq->line_level = vgic_get_phys_line_level(irq); + resample = !irq->line_level; + } + + if (resample) + vgic_irq_set_phys_active(irq, false); + } +} diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h new file mode 100644 index 0000000000..8d134569d0 --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic.h @@ -0,0 +1,346 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2015, 2016 ARM Ltd. + */ +#ifndef __KVM_ARM_VGIC_NEW_H__ +#define __KVM_ARM_VGIC_NEW_H__ + +#include <linux/irqchip/arm-gic-common.h> +#include <asm/kvm_mmu.h> + +#define PRODUCT_ID_KVM 0x4b /* ASCII code K */ +#define IMPLEMENTER_ARM 0x43b + +#define VGIC_ADDR_UNDEF (-1) +#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF) + +#define INTERRUPT_ID_BITS_SPIS 10 +#define INTERRUPT_ID_BITS_ITS 16 +#define VGIC_PRI_BITS 5 + +#define vgic_irq_is_sgi(intid) ((intid) < VGIC_NR_SGIS) + +#define VGIC_AFFINITY_0_SHIFT 0 +#define VGIC_AFFINITY_0_MASK (0xffUL << VGIC_AFFINITY_0_SHIFT) +#define VGIC_AFFINITY_1_SHIFT 8 +#define VGIC_AFFINITY_1_MASK (0xffUL << VGIC_AFFINITY_1_SHIFT) +#define VGIC_AFFINITY_2_SHIFT 16 +#define VGIC_AFFINITY_2_MASK (0xffUL << VGIC_AFFINITY_2_SHIFT) +#define VGIC_AFFINITY_3_SHIFT 24 +#define VGIC_AFFINITY_3_MASK (0xffUL << VGIC_AFFINITY_3_SHIFT) + +#define VGIC_AFFINITY_LEVEL(reg, level) \ + ((((reg) & VGIC_AFFINITY_## level ##_MASK) \ + >> VGIC_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level)) + +/* + * The Userspace encodes the affinity differently from the MPIDR, + * Below macro converts vgic userspace format to MPIDR reg format. + */ +#define VGIC_TO_MPIDR(val) (VGIC_AFFINITY_LEVEL(val, 0) | \ + VGIC_AFFINITY_LEVEL(val, 1) | \ + VGIC_AFFINITY_LEVEL(val, 2) | \ + VGIC_AFFINITY_LEVEL(val, 3)) + +/* + * As per Documentation/virt/kvm/devices/arm-vgic-v3.rst, + * below macros are defined for CPUREG encoding. + */ +#define KVM_REG_ARM_VGIC_SYSREG_OP0_MASK 0x000000000000c000 +#define KVM_REG_ARM_VGIC_SYSREG_OP0_SHIFT 14 +#define KVM_REG_ARM_VGIC_SYSREG_OP1_MASK 0x0000000000003800 +#define KVM_REG_ARM_VGIC_SYSREG_OP1_SHIFT 11 +#define KVM_REG_ARM_VGIC_SYSREG_CRN_MASK 0x0000000000000780 +#define KVM_REG_ARM_VGIC_SYSREG_CRN_SHIFT 7 +#define KVM_REG_ARM_VGIC_SYSREG_CRM_MASK 0x0000000000000078 +#define KVM_REG_ARM_VGIC_SYSREG_CRM_SHIFT 3 +#define KVM_REG_ARM_VGIC_SYSREG_OP2_MASK 0x0000000000000007 +#define KVM_REG_ARM_VGIC_SYSREG_OP2_SHIFT 0 + +#define KVM_DEV_ARM_VGIC_SYSREG_MASK (KVM_REG_ARM_VGIC_SYSREG_OP0_MASK | \ + KVM_REG_ARM_VGIC_SYSREG_OP1_MASK | \ + KVM_REG_ARM_VGIC_SYSREG_CRN_MASK | \ + KVM_REG_ARM_VGIC_SYSREG_CRM_MASK | \ + KVM_REG_ARM_VGIC_SYSREG_OP2_MASK) + +/* + * As per Documentation/virt/kvm/devices/arm-vgic-its.rst, + * below macros are defined for ITS table entry encoding. + */ +#define KVM_ITS_CTE_VALID_SHIFT 63 +#define KVM_ITS_CTE_VALID_MASK BIT_ULL(63) +#define KVM_ITS_CTE_RDBASE_SHIFT 16 +#define KVM_ITS_CTE_ICID_MASK GENMASK_ULL(15, 0) +#define KVM_ITS_ITE_NEXT_SHIFT 48 +#define KVM_ITS_ITE_PINTID_SHIFT 16 +#define KVM_ITS_ITE_PINTID_MASK GENMASK_ULL(47, 16) +#define KVM_ITS_ITE_ICID_MASK GENMASK_ULL(15, 0) +#define KVM_ITS_DTE_VALID_SHIFT 63 +#define KVM_ITS_DTE_VALID_MASK BIT_ULL(63) +#define KVM_ITS_DTE_NEXT_SHIFT 49 +#define KVM_ITS_DTE_NEXT_MASK GENMASK_ULL(62, 49) +#define KVM_ITS_DTE_ITTADDR_SHIFT 5 +#define KVM_ITS_DTE_ITTADDR_MASK GENMASK_ULL(48, 5) +#define KVM_ITS_DTE_SIZE_MASK GENMASK_ULL(4, 0) +#define KVM_ITS_L1E_VALID_MASK BIT_ULL(63) +/* we only support 64 kB translation table page size */ +#define KVM_ITS_L1E_ADDR_MASK GENMASK_ULL(51, 16) + +#define KVM_VGIC_V3_RDIST_INDEX_MASK GENMASK_ULL(11, 0) +#define KVM_VGIC_V3_RDIST_FLAGS_MASK GENMASK_ULL(15, 12) +#define KVM_VGIC_V3_RDIST_FLAGS_SHIFT 12 +#define KVM_VGIC_V3_RDIST_BASE_MASK GENMASK_ULL(51, 16) +#define KVM_VGIC_V3_RDIST_COUNT_MASK GENMASK_ULL(63, 52) +#define KVM_VGIC_V3_RDIST_COUNT_SHIFT 52 + +#ifdef CONFIG_DEBUG_SPINLOCK +#define DEBUG_SPINLOCK_BUG_ON(p) BUG_ON(p) +#else +#define DEBUG_SPINLOCK_BUG_ON(p) +#endif + +static inline u32 vgic_get_implementation_rev(struct kvm_vcpu *vcpu) +{ + return vcpu->kvm->arch.vgic.implementation_rev; +} + +/* Requires the irq_lock to be held by the caller. */ +static inline bool irq_is_pending(struct vgic_irq *irq) +{ + if (irq->config == VGIC_CONFIG_EDGE) + return irq->pending_latch; + else + return irq->pending_latch || irq->line_level; +} + +static inline bool vgic_irq_is_mapped_level(struct vgic_irq *irq) +{ + return irq->config == VGIC_CONFIG_LEVEL && irq->hw; +} + +static inline int vgic_irq_get_lr_count(struct vgic_irq *irq) +{ + /* Account for the active state as an interrupt */ + if (vgic_irq_is_sgi(irq->intid) && irq->source) + return hweight8(irq->source) + irq->active; + + return irq_is_pending(irq) || irq->active; +} + +static inline bool vgic_irq_is_multi_sgi(struct vgic_irq *irq) +{ + return vgic_irq_get_lr_count(irq) > 1; +} + +static inline int vgic_write_guest_lock(struct kvm *kvm, gpa_t gpa, + const void *data, unsigned long len) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + int ret; + + dist->table_write_in_progress = true; + ret = kvm_write_guest_lock(kvm, gpa, data, len); + dist->table_write_in_progress = false; + + return ret; +} + +/* + * This struct provides an intermediate representation of the fields contained + * in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC + * state to userspace can generate either GICv2 or GICv3 CPU interface + * registers regardless of the hardware backed GIC used. + */ +struct vgic_vmcr { + u32 grpen0; + u32 grpen1; + + u32 ackctl; + u32 fiqen; + u32 cbpr; + u32 eoim; + + u32 abpr; + u32 bpr; + u32 pmr; /* Priority mask field in the GICC_PMR and + * ICC_PMR_EL1 priority field format */ +}; + +struct vgic_reg_attr { + struct kvm_vcpu *vcpu; + gpa_t addr; +}; + +int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, + struct vgic_reg_attr *reg_attr); +int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, + struct vgic_reg_attr *reg_attr); +const struct vgic_register_region * +vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev, + gpa_t addr, int len); +struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, + u32 intid); +void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq); +void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq); +bool vgic_get_phys_line_level(struct vgic_irq *irq); +void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending); +void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active); +bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, + unsigned long flags); +void vgic_kick_vcpus(struct kvm *kvm); +void vgic_irq_handle_resampling(struct vgic_irq *irq, + bool lr_deactivated, bool lr_pending); + +int vgic_check_iorange(struct kvm *kvm, phys_addr_t ioaddr, + phys_addr_t addr, phys_addr_t alignment, + phys_addr_t size); + +void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu); +void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); +void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr); +void vgic_v2_set_underflow(struct kvm_vcpu *vcpu); +int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr); +int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, + int offset, u32 *val); +int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write, + int offset, u32 *val); +void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +void vgic_v2_enable(struct kvm_vcpu *vcpu); +int vgic_v2_probe(const struct gic_kvm_info *info); +int vgic_v2_map_resources(struct kvm *kvm); +int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, + enum vgic_type); + +void vgic_v2_init_lrs(void); +void vgic_v2_load(struct kvm_vcpu *vcpu); +void vgic_v2_put(struct kvm_vcpu *vcpu); +void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu); + +void vgic_v2_save_state(struct kvm_vcpu *vcpu); +void vgic_v2_restore_state(struct kvm_vcpu *vcpu); + +static inline void vgic_get_irq_kref(struct vgic_irq *irq) +{ + if (irq->intid < VGIC_MIN_LPI) + return; + + kref_get(&irq->refcount); +} + +void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu); +void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); +void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr); +void vgic_v3_set_underflow(struct kvm_vcpu *vcpu); +void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +void vgic_v3_enable(struct kvm_vcpu *vcpu); +int vgic_v3_probe(const struct gic_kvm_info *info); +int vgic_v3_map_resources(struct kvm *kvm); +int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq); +int vgic_v3_save_pending_tables(struct kvm *kvm); +int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count); +int vgic_register_redist_iodev(struct kvm_vcpu *vcpu); +void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu); +bool vgic_v3_check_base(struct kvm *kvm); + +void vgic_v3_load(struct kvm_vcpu *vcpu); +void vgic_v3_put(struct kvm_vcpu *vcpu); +void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu); + +bool vgic_has_its(struct kvm *kvm); +int kvm_vgic_register_its_device(void); +void vgic_enable_lpis(struct kvm_vcpu *vcpu); +void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu); +int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi); +int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr); +int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, + int offset, u32 *val); +int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write, + int offset, u32 *val); +int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr, bool is_write); +int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); +int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write, + u32 intid, u32 *val); +int kvm_register_vgic_device(unsigned long type); +void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +int vgic_lazy_init(struct kvm *kvm); +int vgic_init(struct kvm *kvm); + +void vgic_debug_init(struct kvm *kvm); +void vgic_debug_destroy(struct kvm *kvm); + +static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *cpu_if = &vcpu->arch.vgic_cpu; + + /* + * num_pri_bits are initialized with HW supported values. + * We can rely safely on num_pri_bits even if VM has not + * restored ICC_CTLR_EL1 before restoring APnR registers. + */ + switch (cpu_if->num_pri_bits) { + case 7: return 3; + case 6: return 1; + default: return 0; + } +} + +static inline bool +vgic_v3_redist_region_full(struct vgic_redist_region *region) +{ + if (!region->count) + return false; + + return (region->free_index >= region->count); +} + +struct vgic_redist_region *vgic_v3_rdist_free_slot(struct list_head *rdregs); + +static inline size_t +vgic_v3_rd_region_size(struct kvm *kvm, struct vgic_redist_region *rdreg) +{ + if (!rdreg->count) + return atomic_read(&kvm->online_vcpus) * KVM_VGIC_V3_REDIST_SIZE; + else + return rdreg->count * KVM_VGIC_V3_REDIST_SIZE; +} + +struct vgic_redist_region *vgic_v3_rdist_region_from_index(struct kvm *kvm, + u32 index); +void vgic_v3_free_redist_region(struct vgic_redist_region *rdreg); + +bool vgic_v3_rdist_overlap(struct kvm *kvm, gpa_t base, size_t size); + +static inline bool vgic_dist_overlap(struct kvm *kvm, gpa_t base, size_t size) +{ + struct vgic_dist *d = &kvm->arch.vgic; + + return (base + size > d->vgic_dist_base) && + (base < d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE); +} + +bool vgic_lpis_enabled(struct kvm_vcpu *vcpu); +int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr); +int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, + u32 devid, u32 eventid, struct vgic_irq **irq); +struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi); +int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi); +void vgic_lpi_translation_cache_init(struct kvm *kvm); +void vgic_lpi_translation_cache_destroy(struct kvm *kvm); +void vgic_its_invalidate_cache(struct kvm *kvm); + +/* GICv4.1 MMIO interface */ +int vgic_its_inv_lpi(struct kvm *kvm, struct vgic_irq *irq); +int vgic_its_invall(struct kvm_vcpu *vcpu); + +bool vgic_supports_direct_msis(struct kvm *kvm); +int vgic_v4_init(struct kvm *kvm); +void vgic_v4_teardown(struct kvm *kvm); +void vgic_v4_configure_vsgis(struct kvm *kvm); +void vgic_v4_get_vlpi_state(struct vgic_irq *irq, bool *val); +int vgic_v4_request_vpe_irq(struct kvm_vcpu *vcpu, int irq); + +#endif |