diff options
Diffstat (limited to '')
-rw-r--r-- | kernel/irq/Kconfig | 152 | ||||
-rw-r--r-- | kernel/irq/Makefile | 21 | ||||
-rw-r--r-- | kernel/irq/affinity.c | 128 | ||||
-rw-r--r-- | kernel/irq/autoprobe.c | 184 | ||||
-rw-r--r-- | kernel/irq/chip.c | 1619 | ||||
-rw-r--r-- | kernel/irq/cpuhotplug.c | 233 | ||||
-rw-r--r-- | kernel/irq/debug.h | 49 | ||||
-rw-r--r-- | kernel/irq/debugfs.c | 263 | ||||
-rw-r--r-- | kernel/irq/devres.c | 284 | ||||
-rw-r--r-- | kernel/irq/dummychip.c | 64 | ||||
-rw-r--r-- | kernel/irq/generic-chip.c | 668 | ||||
-rw-r--r-- | kernel/irq/handle.c | 242 | ||||
-rw-r--r-- | kernel/irq/internals.h | 521 | ||||
-rw-r--r-- | kernel/irq/ipi-mux.c | 206 | ||||
-rw-r--r-- | kernel/irq/ipi.c | 345 | ||||
-rw-r--r-- | kernel/irq/irq_sim.c | 250 | ||||
-rw-r--r-- | kernel/irq/irqdesc.c | 1014 | ||||
-rw-r--r-- | kernel/irq/irqdomain.c | 1978 | ||||
-rw-r--r-- | kernel/irq/manage.c | 2942 | ||||
-rw-r--r-- | kernel/irq/matrix.c | 513 | ||||
-rw-r--r-- | kernel/irq/migration.c | 119 | ||||
-rw-r--r-- | kernel/irq/msi.c | 1667 | ||||
-rw-r--r-- | kernel/irq/pm.c | 260 | ||||
-rw-r--r-- | kernel/irq/proc.c | 538 | ||||
-rw-r--r-- | kernel/irq/resend.c | 202 | ||||
-rw-r--r-- | kernel/irq/settings.h | 188 | ||||
-rw-r--r-- | kernel/irq/spurious.c | 478 | ||||
-rw-r--r-- | kernel/irq/timings.c | 958 | ||||
-rw-r--r-- | kernel/irq_work.c | 327 |
29 files changed, 16413 insertions, 0 deletions
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig new file mode 100644 index 0000000000..2531f3496a --- /dev/null +++ b/kernel/irq/Kconfig @@ -0,0 +1,152 @@ +# SPDX-License-Identifier: GPL-2.0-only +menu "IRQ subsystem" +# Options selectable by the architecture code + +# Make sparse irq Kconfig switch below available +config MAY_HAVE_SPARSE_IRQ + bool + +# Legacy support, required for itanic +config GENERIC_IRQ_LEGACY + bool + +# Enable the generic irq autoprobe mechanism +config GENERIC_IRQ_PROBE + bool + +# Use the generic /proc/interrupts implementation +config GENERIC_IRQ_SHOW + bool + +# Print level/edge extra information +config GENERIC_IRQ_SHOW_LEVEL + bool + +# Supports effective affinity mask +config GENERIC_IRQ_EFFECTIVE_AFF_MASK + depends on SMP + bool + +# Support for delayed migration from interrupt context +config GENERIC_PENDING_IRQ + bool + +# Support for generic irq migrating off cpu before the cpu is offline. +config GENERIC_IRQ_MIGRATION + bool + +# Alpha specific irq affinity mechanism +config AUTO_IRQ_AFFINITY + bool + +# Interrupt injection mechanism +config GENERIC_IRQ_INJECTION + bool + +# Tasklet based software resend for pending interrupts on enable_irq() +config HARDIRQS_SW_RESEND + bool + +# Edge style eoi based handler (cell) +config IRQ_EDGE_EOI_HANDLER + bool + +# Generic configurable interrupt chip implementation +config GENERIC_IRQ_CHIP + bool + select IRQ_DOMAIN + +# Generic irq_domain hw <--> linux irq number translation +config IRQ_DOMAIN + bool + +# Support for simulated interrupts +config IRQ_SIM + bool + select IRQ_WORK + select IRQ_DOMAIN + +# Support for hierarchical irq domains +config IRQ_DOMAIN_HIERARCHY + bool + select IRQ_DOMAIN + +# Support for obsolete non-mapping irq domains +config IRQ_DOMAIN_NOMAP + bool + select IRQ_DOMAIN + +# Support for hierarchical fasteoi+edge and fasteoi+level handlers +config IRQ_FASTEOI_HIERARCHY_HANDLERS + bool + +# Generic IRQ IPI support +config GENERIC_IRQ_IPI + bool + depends on SMP + select IRQ_DOMAIN_HIERARCHY + +# Generic IRQ IPI Mux support +config GENERIC_IRQ_IPI_MUX + bool + depends on SMP + +# Generic MSI hierarchical interrupt domain support +config GENERIC_MSI_IRQ + bool + select IRQ_DOMAIN_HIERARCHY + +config IRQ_MSI_IOMMU + bool + +config IRQ_TIMINGS + bool + +config GENERIC_IRQ_MATRIX_ALLOCATOR + bool + +config GENERIC_IRQ_RESERVATION_MODE + bool + +# Support forced irq threading +config IRQ_FORCED_THREADING + bool + +config SPARSE_IRQ + bool "Support sparse irq numbering" if MAY_HAVE_SPARSE_IRQ + help + + Sparse irq numbering is useful for distro kernels that want + to define a high CONFIG_NR_CPUS value but still want to have + low kernel memory footprint on smaller machines. + + ( Sparse irqs can also be beneficial on NUMA boxes, as they spread + out the interrupt descriptors in a more NUMA-friendly way. ) + + If you don't know what to do here, say N. + +config GENERIC_IRQ_DEBUGFS + bool "Expose irq internals in debugfs" + depends on DEBUG_FS + select GENERIC_IRQ_INJECTION + default n + help + + Exposes internal state information through debugfs. Mostly for + developers and debugging of hard to diagnose interrupt problems. + + If you don't know what to do here, say N. + +endmenu + +config GENERIC_IRQ_MULTI_HANDLER + bool + help + Allow to specify the low level IRQ handler at run time. + +# Cavium Octeon is the last system to use this deprecated option +# Do not even think of enabling this on any new platform +config DEPRECATED_IRQ_CPU_ONOFFLINE + bool + depends on CAVIUM_OCTEON_SOC + default CAVIUM_OCTEON_SOC diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile new file mode 100644 index 0000000000..f19d3080bf --- /dev/null +++ b/kernel/irq/Makefile @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o +obj-$(CONFIG_IRQ_TIMINGS) += timings.o +ifeq ($(CONFIG_TEST_IRQ_TIMINGS),y) + CFLAGS_timings.o += -DDEBUG +endif +obj-$(CONFIG_GENERIC_IRQ_CHIP) += generic-chip.o +obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o +obj-$(CONFIG_IRQ_DOMAIN) += irqdomain.o +obj-$(CONFIG_IRQ_SIM) += irq_sim.o +obj-$(CONFIG_PROC_FS) += proc.o +obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o +obj-$(CONFIG_GENERIC_IRQ_MIGRATION) += cpuhotplug.o +obj-$(CONFIG_PM_SLEEP) += pm.o +obj-$(CONFIG_GENERIC_MSI_IRQ) += msi.o +obj-$(CONFIG_GENERIC_IRQ_IPI) += ipi.o +obj-$(CONFIG_GENERIC_IRQ_IPI_MUX) += ipi-mux.o +obj-$(CONFIG_SMP) += affinity.o +obj-$(CONFIG_GENERIC_IRQ_DEBUGFS) += debugfs.o +obj-$(CONFIG_GENERIC_IRQ_MATRIX_ALLOCATOR) += matrix.o diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c new file mode 100644 index 0000000000..44a4eba803 --- /dev/null +++ b/kernel/irq/affinity.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2016 Thomas Gleixner. + * Copyright (C) 2016-2017 Christoph Hellwig. + */ +#include <linux/interrupt.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/cpu.h> +#include <linux/group_cpus.h> + +static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs) +{ + affd->nr_sets = 1; + affd->set_size[0] = affvecs; +} + +/** + * irq_create_affinity_masks - Create affinity masks for multiqueue spreading + * @nvecs: The total number of vectors + * @affd: Description of the affinity requirements + * + * Returns the irq_affinity_desc pointer or NULL if allocation failed. + */ +struct irq_affinity_desc * +irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd) +{ + unsigned int affvecs, curvec, usedvecs, i; + struct irq_affinity_desc *masks = NULL; + + /* + * Determine the number of vectors which need interrupt affinities + * assigned. If the pre/post request exhausts the available vectors + * then nothing to do here except for invoking the calc_sets() + * callback so the device driver can adjust to the situation. + */ + if (nvecs > affd->pre_vectors + affd->post_vectors) + affvecs = nvecs - affd->pre_vectors - affd->post_vectors; + else + affvecs = 0; + + /* + * Simple invocations do not provide a calc_sets() callback. Install + * the generic one. + */ + if (!affd->calc_sets) + affd->calc_sets = default_calc_sets; + + /* Recalculate the sets */ + affd->calc_sets(affd, affvecs); + + if (WARN_ON_ONCE(affd->nr_sets > IRQ_AFFINITY_MAX_SETS)) + return NULL; + + /* Nothing to assign? */ + if (!affvecs) + return NULL; + + masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL); + if (!masks) + return NULL; + + /* Fill out vectors at the beginning that don't need affinity */ + for (curvec = 0; curvec < affd->pre_vectors; curvec++) + cpumask_copy(&masks[curvec].mask, irq_default_affinity); + + /* + * Spread on present CPUs starting from affd->pre_vectors. If we + * have multiple sets, build each sets affinity mask separately. + */ + for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) { + unsigned int this_vecs = affd->set_size[i]; + int j; + struct cpumask *result = group_cpus_evenly(this_vecs); + + if (!result) { + kfree(masks); + return NULL; + } + + for (j = 0; j < this_vecs; j++) + cpumask_copy(&masks[curvec + j].mask, &result[j]); + kfree(result); + + curvec += this_vecs; + usedvecs += this_vecs; + } + + /* Fill out vectors at the end that don't need affinity */ + if (usedvecs >= affvecs) + curvec = affd->pre_vectors + affvecs; + else + curvec = affd->pre_vectors + usedvecs; + for (; curvec < nvecs; curvec++) + cpumask_copy(&masks[curvec].mask, irq_default_affinity); + + /* Mark the managed interrupts */ + for (i = affd->pre_vectors; i < nvecs - affd->post_vectors; i++) + masks[i].is_managed = 1; + + return masks; +} + +/** + * irq_calc_affinity_vectors - Calculate the optimal number of vectors + * @minvec: The minimum number of vectors available + * @maxvec: The maximum number of vectors available + * @affd: Description of the affinity requirements + */ +unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec, + const struct irq_affinity *affd) +{ + unsigned int resv = affd->pre_vectors + affd->post_vectors; + unsigned int set_vecs; + + if (resv > minvec) + return 0; + + if (affd->calc_sets) { + set_vecs = maxvec - resv; + } else { + cpus_read_lock(); + set_vecs = cpumask_weight(cpu_possible_mask); + cpus_read_unlock(); + } + + return resv + min(set_vecs, maxvec - resv); +} diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c new file mode 100644 index 0000000000..ae60cae24e --- /dev/null +++ b/kernel/irq/autoprobe.c @@ -0,0 +1,184 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar + * + * This file contains the interrupt probing code and driver APIs. + */ + +#include <linux/irq.h> +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/delay.h> +#include <linux/async.h> + +#include "internals.h" + +/* + * Autodetection depends on the fact that any interrupt that + * comes in on to an unassigned handler will get stuck with + * "IRQS_WAITING" cleared and the interrupt disabled. + */ +static DEFINE_MUTEX(probing_active); + +/** + * probe_irq_on - begin an interrupt autodetect + * + * Commence probing for an interrupt. The interrupts are scanned + * and a mask of potential interrupt lines is returned. + * + */ +unsigned long probe_irq_on(void) +{ + struct irq_desc *desc; + unsigned long mask = 0; + int i; + + /* + * quiesce the kernel, or at least the asynchronous portion + */ + async_synchronize_full(); + mutex_lock(&probing_active); + /* + * something may have generated an irq long ago and we want to + * flush such a longstanding irq before considering it as spurious. + */ + for_each_irq_desc_reverse(i, desc) { + raw_spin_lock_irq(&desc->lock); + if (!desc->action && irq_settings_can_probe(desc)) { + /* + * Some chips need to know about probing in + * progress: + */ + if (desc->irq_data.chip->irq_set_type) + desc->irq_data.chip->irq_set_type(&desc->irq_data, + IRQ_TYPE_PROBE); + irq_activate_and_startup(desc, IRQ_NORESEND); + } + raw_spin_unlock_irq(&desc->lock); + } + + /* Wait for longstanding interrupts to trigger. */ + msleep(20); + + /* + * enable any unassigned irqs + * (we must startup again here because if a longstanding irq + * happened in the previous stage, it may have masked itself) + */ + for_each_irq_desc_reverse(i, desc) { + raw_spin_lock_irq(&desc->lock); + if (!desc->action && irq_settings_can_probe(desc)) { + desc->istate |= IRQS_AUTODETECT | IRQS_WAITING; + if (irq_activate_and_startup(desc, IRQ_NORESEND)) + desc->istate |= IRQS_PENDING; + } + raw_spin_unlock_irq(&desc->lock); + } + + /* + * Wait for spurious interrupts to trigger + */ + msleep(100); + + /* + * Now filter out any obviously spurious interrupts + */ + for_each_irq_desc(i, desc) { + raw_spin_lock_irq(&desc->lock); + + if (desc->istate & IRQS_AUTODETECT) { + /* It triggered already - consider it spurious. */ + if (!(desc->istate & IRQS_WAITING)) { + desc->istate &= ~IRQS_AUTODETECT; + irq_shutdown_and_deactivate(desc); + } else + if (i < 32) + mask |= 1 << i; + } + raw_spin_unlock_irq(&desc->lock); + } + + return mask; +} +EXPORT_SYMBOL(probe_irq_on); + +/** + * probe_irq_mask - scan a bitmap of interrupt lines + * @val: mask of interrupts to consider + * + * Scan the interrupt lines and return a bitmap of active + * autodetect interrupts. The interrupt probe logic state + * is then returned to its previous value. + * + * Note: we need to scan all the irq's even though we will + * only return autodetect irq numbers - just so that we reset + * them all to a known state. + */ +unsigned int probe_irq_mask(unsigned long val) +{ + unsigned int mask = 0; + struct irq_desc *desc; + int i; + + for_each_irq_desc(i, desc) { + raw_spin_lock_irq(&desc->lock); + if (desc->istate & IRQS_AUTODETECT) { + if (i < 16 && !(desc->istate & IRQS_WAITING)) + mask |= 1 << i; + + desc->istate &= ~IRQS_AUTODETECT; + irq_shutdown_and_deactivate(desc); + } + raw_spin_unlock_irq(&desc->lock); + } + mutex_unlock(&probing_active); + + return mask & val; +} +EXPORT_SYMBOL(probe_irq_mask); + +/** + * probe_irq_off - end an interrupt autodetect + * @val: mask of potential interrupts (unused) + * + * Scans the unused interrupt lines and returns the line which + * appears to have triggered the interrupt. If no interrupt was + * found then zero is returned. If more than one interrupt is + * found then minus the first candidate is returned to indicate + * their is doubt. + * + * The interrupt probe logic state is returned to its previous + * value. + * + * BUGS: When used in a module (which arguably shouldn't happen) + * nothing prevents two IRQ probe callers from overlapping. The + * results of this are non-optimal. + */ +int probe_irq_off(unsigned long val) +{ + int i, irq_found = 0, nr_of_irqs = 0; + struct irq_desc *desc; + + for_each_irq_desc(i, desc) { + raw_spin_lock_irq(&desc->lock); + + if (desc->istate & IRQS_AUTODETECT) { + if (!(desc->istate & IRQS_WAITING)) { + if (!nr_of_irqs) + irq_found = i; + nr_of_irqs++; + } + desc->istate &= ~IRQS_AUTODETECT; + irq_shutdown_and_deactivate(desc); + } + raw_spin_unlock_irq(&desc->lock); + } + mutex_unlock(&probing_active); + + if (nr_of_irqs > 1) + irq_found = -irq_found; + + return irq_found; +} +EXPORT_SYMBOL(probe_irq_off); + diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c new file mode 100644 index 0000000000..dc94e0bf2c --- /dev/null +++ b/kernel/irq/chip.c @@ -0,0 +1,1619 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar + * Copyright (C) 2005-2006, Thomas Gleixner, Russell King + * + * This file contains the core interrupt handling code, for irq-chip based + * architectures. Detailed information is available in + * Documentation/core-api/genericirq.rst + */ + +#include <linux/irq.h> +#include <linux/msi.h> +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/kernel_stat.h> +#include <linux/irqdomain.h> + +#include <trace/events/irq.h> + +#include "internals.h" + +static irqreturn_t bad_chained_irq(int irq, void *dev_id) +{ + WARN_ONCE(1, "Chained irq %d should not call an action\n", irq); + return IRQ_NONE; +} + +/* + * Chained handlers should never call action on their IRQ. This default + * action will emit warning if such thing happens. + */ +struct irqaction chained_action = { + .handler = bad_chained_irq, +}; + +/** + * irq_set_chip - set the irq chip for an irq + * @irq: irq number + * @chip: pointer to irq chip description structure + */ +int irq_set_chip(unsigned int irq, const struct irq_chip *chip) +{ + unsigned long flags; + struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); + + if (!desc) + return -EINVAL; + + desc->irq_data.chip = (struct irq_chip *)(chip ?: &no_irq_chip); + irq_put_desc_unlock(desc, flags); + /* + * For !CONFIG_SPARSE_IRQ make the irq show up in + * allocated_irqs. + */ + irq_mark_irq(irq); + return 0; +} +EXPORT_SYMBOL(irq_set_chip); + +/** + * irq_set_irq_type - set the irq trigger type for an irq + * @irq: irq number + * @type: IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h + */ +int irq_set_irq_type(unsigned int irq, unsigned int type) +{ + unsigned long flags; + struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); + int ret = 0; + + if (!desc) + return -EINVAL; + + ret = __irq_set_trigger(desc, type); + irq_put_desc_busunlock(desc, flags); + return ret; +} +EXPORT_SYMBOL(irq_set_irq_type); + +/** + * irq_set_handler_data - set irq handler data for an irq + * @irq: Interrupt number + * @data: Pointer to interrupt specific data + * + * Set the hardware irq controller data for an irq + */ +int irq_set_handler_data(unsigned int irq, void *data) +{ + unsigned long flags; + struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); + + if (!desc) + return -EINVAL; + desc->irq_common_data.handler_data = data; + irq_put_desc_unlock(desc, flags); + return 0; +} +EXPORT_SYMBOL(irq_set_handler_data); + +/** + * irq_set_msi_desc_off - set MSI descriptor data for an irq at offset + * @irq_base: Interrupt number base + * @irq_offset: Interrupt number offset + * @entry: Pointer to MSI descriptor data + * + * Set the MSI descriptor entry for an irq at offset + */ +int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset, + struct msi_desc *entry) +{ + unsigned long flags; + struct irq_desc *desc = irq_get_desc_lock(irq_base + irq_offset, &flags, IRQ_GET_DESC_CHECK_GLOBAL); + + if (!desc) + return -EINVAL; + desc->irq_common_data.msi_desc = entry; + if (entry && !irq_offset) + entry->irq = irq_base; + irq_put_desc_unlock(desc, flags); + return 0; +} + +/** + * irq_set_msi_desc - set MSI descriptor data for an irq + * @irq: Interrupt number + * @entry: Pointer to MSI descriptor data + * + * Set the MSI descriptor entry for an irq + */ +int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry) +{ + return irq_set_msi_desc_off(irq, 0, entry); +} + +/** + * irq_set_chip_data - set irq chip data for an irq + * @irq: Interrupt number + * @data: Pointer to chip specific data + * + * Set the hardware irq chip data for an irq + */ +int irq_set_chip_data(unsigned int irq, void *data) +{ + unsigned long flags; + struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); + + if (!desc) + return -EINVAL; + desc->irq_data.chip_data = data; + irq_put_desc_unlock(desc, flags); + return 0; +} +EXPORT_SYMBOL(irq_set_chip_data); + +struct irq_data *irq_get_irq_data(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + return desc ? &desc->irq_data : NULL; +} +EXPORT_SYMBOL_GPL(irq_get_irq_data); + +static void irq_state_clr_disabled(struct irq_desc *desc) +{ + irqd_clear(&desc->irq_data, IRQD_IRQ_DISABLED); +} + +static void irq_state_clr_masked(struct irq_desc *desc) +{ + irqd_clear(&desc->irq_data, IRQD_IRQ_MASKED); +} + +static void irq_state_clr_started(struct irq_desc *desc) +{ + irqd_clear(&desc->irq_data, IRQD_IRQ_STARTED); +} + +static void irq_state_set_started(struct irq_desc *desc) +{ + irqd_set(&desc->irq_data, IRQD_IRQ_STARTED); +} + +enum { + IRQ_STARTUP_NORMAL, + IRQ_STARTUP_MANAGED, + IRQ_STARTUP_ABORT, +}; + +#ifdef CONFIG_SMP +static int +__irq_startup_managed(struct irq_desc *desc, const struct cpumask *aff, + bool force) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + + if (!irqd_affinity_is_managed(d)) + return IRQ_STARTUP_NORMAL; + + irqd_clr_managed_shutdown(d); + + if (cpumask_any_and(aff, cpu_online_mask) >= nr_cpu_ids) { + /* + * Catch code which fiddles with enable_irq() on a managed + * and potentially shutdown IRQ. Chained interrupt + * installment or irq auto probing should not happen on + * managed irqs either. + */ + if (WARN_ON_ONCE(force)) + return IRQ_STARTUP_ABORT; + /* + * The interrupt was requested, but there is no online CPU + * in it's affinity mask. Put it into managed shutdown + * state and let the cpu hotplug mechanism start it up once + * a CPU in the mask becomes available. + */ + return IRQ_STARTUP_ABORT; + } + /* + * Managed interrupts have reserved resources, so this should not + * happen. + */ + if (WARN_ON(irq_domain_activate_irq(d, false))) + return IRQ_STARTUP_ABORT; + return IRQ_STARTUP_MANAGED; +} +#else +static __always_inline int +__irq_startup_managed(struct irq_desc *desc, const struct cpumask *aff, + bool force) +{ + return IRQ_STARTUP_NORMAL; +} +#endif + +static int __irq_startup(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + int ret = 0; + + /* Warn if this interrupt is not activated but try nevertheless */ + WARN_ON_ONCE(!irqd_is_activated(d)); + + if (d->chip->irq_startup) { + ret = d->chip->irq_startup(d); + irq_state_clr_disabled(desc); + irq_state_clr_masked(desc); + } else { + irq_enable(desc); + } + irq_state_set_started(desc); + return ret; +} + +int irq_startup(struct irq_desc *desc, bool resend, bool force) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + const struct cpumask *aff = irq_data_get_affinity_mask(d); + int ret = 0; + + desc->depth = 0; + + if (irqd_is_started(d)) { + irq_enable(desc); + } else { + switch (__irq_startup_managed(desc, aff, force)) { + case IRQ_STARTUP_NORMAL: + if (d->chip->flags & IRQCHIP_AFFINITY_PRE_STARTUP) + irq_setup_affinity(desc); + ret = __irq_startup(desc); + if (!(d->chip->flags & IRQCHIP_AFFINITY_PRE_STARTUP)) + irq_setup_affinity(desc); + break; + case IRQ_STARTUP_MANAGED: + irq_do_set_affinity(d, aff, false); + ret = __irq_startup(desc); + break; + case IRQ_STARTUP_ABORT: + irqd_set_managed_shutdown(d); + return 0; + } + } + if (resend) + check_irq_resend(desc, false); + + return ret; +} + +int irq_activate(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + + if (!irqd_affinity_is_managed(d)) + return irq_domain_activate_irq(d, false); + return 0; +} + +int irq_activate_and_startup(struct irq_desc *desc, bool resend) +{ + if (WARN_ON(irq_activate(desc))) + return 0; + return irq_startup(desc, resend, IRQ_START_FORCE); +} + +static void __irq_disable(struct irq_desc *desc, bool mask); + +void irq_shutdown(struct irq_desc *desc) +{ + if (irqd_is_started(&desc->irq_data)) { + clear_irq_resend(desc); + desc->depth = 1; + if (desc->irq_data.chip->irq_shutdown) { + desc->irq_data.chip->irq_shutdown(&desc->irq_data); + irq_state_set_disabled(desc); + irq_state_set_masked(desc); + } else { + __irq_disable(desc, true); + } + irq_state_clr_started(desc); + } +} + + +void irq_shutdown_and_deactivate(struct irq_desc *desc) +{ + irq_shutdown(desc); + /* + * This must be called even if the interrupt was never started up, + * because the activation can happen before the interrupt is + * available for request/startup. It has it's own state tracking so + * it's safe to call it unconditionally. + */ + irq_domain_deactivate_irq(&desc->irq_data); +} + +void irq_enable(struct irq_desc *desc) +{ + if (!irqd_irq_disabled(&desc->irq_data)) { + unmask_irq(desc); + } else { + irq_state_clr_disabled(desc); + if (desc->irq_data.chip->irq_enable) { + desc->irq_data.chip->irq_enable(&desc->irq_data); + irq_state_clr_masked(desc); + } else { + unmask_irq(desc); + } + } +} + +static void __irq_disable(struct irq_desc *desc, bool mask) +{ + if (irqd_irq_disabled(&desc->irq_data)) { + if (mask) + mask_irq(desc); + } else { + irq_state_set_disabled(desc); + if (desc->irq_data.chip->irq_disable) { + desc->irq_data.chip->irq_disable(&desc->irq_data); + irq_state_set_masked(desc); + } else if (mask) { + mask_irq(desc); + } + } +} + +/** + * irq_disable - Mark interrupt disabled + * @desc: irq descriptor which should be disabled + * + * If the chip does not implement the irq_disable callback, we + * use a lazy disable approach. That means we mark the interrupt + * disabled, but leave the hardware unmasked. That's an + * optimization because we avoid the hardware access for the + * common case where no interrupt happens after we marked it + * disabled. If an interrupt happens, then the interrupt flow + * handler masks the line at the hardware level and marks it + * pending. + * + * If the interrupt chip does not implement the irq_disable callback, + * a driver can disable the lazy approach for a particular irq line by + * calling 'irq_set_status_flags(irq, IRQ_DISABLE_UNLAZY)'. This can + * be used for devices which cannot disable the interrupt at the + * device level under certain circumstances and have to use + * disable_irq[_nosync] instead. + */ +void irq_disable(struct irq_desc *desc) +{ + __irq_disable(desc, irq_settings_disable_unlazy(desc)); +} + +void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu) +{ + if (desc->irq_data.chip->irq_enable) + desc->irq_data.chip->irq_enable(&desc->irq_data); + else + desc->irq_data.chip->irq_unmask(&desc->irq_data); + cpumask_set_cpu(cpu, desc->percpu_enabled); +} + +void irq_percpu_disable(struct irq_desc *desc, unsigned int cpu) +{ + if (desc->irq_data.chip->irq_disable) + desc->irq_data.chip->irq_disable(&desc->irq_data); + else + desc->irq_data.chip->irq_mask(&desc->irq_data); + cpumask_clear_cpu(cpu, desc->percpu_enabled); +} + +static inline void mask_ack_irq(struct irq_desc *desc) +{ + if (desc->irq_data.chip->irq_mask_ack) { + desc->irq_data.chip->irq_mask_ack(&desc->irq_data); + irq_state_set_masked(desc); + } else { + mask_irq(desc); + if (desc->irq_data.chip->irq_ack) + desc->irq_data.chip->irq_ack(&desc->irq_data); + } +} + +void mask_irq(struct irq_desc *desc) +{ + if (irqd_irq_masked(&desc->irq_data)) + return; + + if (desc->irq_data.chip->irq_mask) { + desc->irq_data.chip->irq_mask(&desc->irq_data); + irq_state_set_masked(desc); + } +} + +void unmask_irq(struct irq_desc *desc) +{ + if (!irqd_irq_masked(&desc->irq_data)) + return; + + if (desc->irq_data.chip->irq_unmask) { + desc->irq_data.chip->irq_unmask(&desc->irq_data); + irq_state_clr_masked(desc); + } +} + +void unmask_threaded_irq(struct irq_desc *desc) +{ + struct irq_chip *chip = desc->irq_data.chip; + + if (chip->flags & IRQCHIP_EOI_THREADED) + chip->irq_eoi(&desc->irq_data); + + unmask_irq(desc); +} + +/* + * handle_nested_irq - Handle a nested irq from a irq thread + * @irq: the interrupt number + * + * Handle interrupts which are nested into a threaded interrupt + * handler. The handler function is called inside the calling + * threads context. + */ +void handle_nested_irq(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + struct irqaction *action; + irqreturn_t action_ret; + + might_sleep(); + + raw_spin_lock_irq(&desc->lock); + + desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); + + action = desc->action; + if (unlikely(!action || irqd_irq_disabled(&desc->irq_data))) { + desc->istate |= IRQS_PENDING; + raw_spin_unlock_irq(&desc->lock); + return; + } + + kstat_incr_irqs_this_cpu(desc); + atomic_inc(&desc->threads_active); + raw_spin_unlock_irq(&desc->lock); + + action_ret = IRQ_NONE; + for_each_action_of_desc(desc, action) + action_ret |= action->thread_fn(action->irq, action->dev_id); + + if (!irq_settings_no_debug(desc)) + note_interrupt(desc, action_ret); + + wake_threads_waitq(desc); +} +EXPORT_SYMBOL_GPL(handle_nested_irq); + +static bool irq_check_poll(struct irq_desc *desc) +{ + if (!(desc->istate & IRQS_POLL_INPROGRESS)) + return false; + return irq_wait_for_poll(desc); +} + +static bool irq_may_run(struct irq_desc *desc) +{ + unsigned int mask = IRQD_IRQ_INPROGRESS | IRQD_WAKEUP_ARMED; + + /* + * If the interrupt is not in progress and is not an armed + * wakeup interrupt, proceed. + */ + if (!irqd_has_set(&desc->irq_data, mask)) + return true; + + /* + * If the interrupt is an armed wakeup source, mark it pending + * and suspended, disable it and notify the pm core about the + * event. + */ + if (irq_pm_check_wakeup(desc)) + return false; + + /* + * Handle a potential concurrent poll on a different core. + */ + return irq_check_poll(desc); +} + +/** + * handle_simple_irq - Simple and software-decoded IRQs. + * @desc: the interrupt description structure for this irq + * + * Simple interrupts are either sent from a demultiplexing interrupt + * handler or come from hardware, where no interrupt hardware control + * is necessary. + * + * Note: The caller is expected to handle the ack, clear, mask and + * unmask issues if necessary. + */ +void handle_simple_irq(struct irq_desc *desc) +{ + raw_spin_lock(&desc->lock); + + if (!irq_may_run(desc)) + goto out_unlock; + + desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); + + if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) { + desc->istate |= IRQS_PENDING; + goto out_unlock; + } + + kstat_incr_irqs_this_cpu(desc); + handle_irq_event(desc); + +out_unlock: + raw_spin_unlock(&desc->lock); +} +EXPORT_SYMBOL_GPL(handle_simple_irq); + +/** + * handle_untracked_irq - Simple and software-decoded IRQs. + * @desc: the interrupt description structure for this irq + * + * Untracked interrupts are sent from a demultiplexing interrupt + * handler when the demultiplexer does not know which device it its + * multiplexed irq domain generated the interrupt. IRQ's handled + * through here are not subjected to stats tracking, randomness, or + * spurious interrupt detection. + * + * Note: Like handle_simple_irq, the caller is expected to handle + * the ack, clear, mask and unmask issues if necessary. + */ +void handle_untracked_irq(struct irq_desc *desc) +{ + raw_spin_lock(&desc->lock); + + if (!irq_may_run(desc)) + goto out_unlock; + + desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); + + if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) { + desc->istate |= IRQS_PENDING; + goto out_unlock; + } + + desc->istate &= ~IRQS_PENDING; + irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS); + raw_spin_unlock(&desc->lock); + + __handle_irq_event_percpu(desc); + + raw_spin_lock(&desc->lock); + irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS); + +out_unlock: + raw_spin_unlock(&desc->lock); +} +EXPORT_SYMBOL_GPL(handle_untracked_irq); + +/* + * Called unconditionally from handle_level_irq() and only for oneshot + * interrupts from handle_fasteoi_irq() + */ +static void cond_unmask_irq(struct irq_desc *desc) +{ + /* + * We need to unmask in the following cases: + * - Standard level irq (IRQF_ONESHOT is not set) + * - Oneshot irq which did not wake the thread (caused by a + * spurious interrupt or a primary handler handling it + * completely). + */ + if (!irqd_irq_disabled(&desc->irq_data) && + irqd_irq_masked(&desc->irq_data) && !desc->threads_oneshot) + unmask_irq(desc); +} + +/** + * handle_level_irq - Level type irq handler + * @desc: the interrupt description structure for this irq + * + * Level type interrupts are active as long as the hardware line has + * the active level. This may require to mask the interrupt and unmask + * it after the associated handler has acknowledged the device, so the + * interrupt line is back to inactive. + */ +void handle_level_irq(struct irq_desc *desc) +{ + raw_spin_lock(&desc->lock); + mask_ack_irq(desc); + + if (!irq_may_run(desc)) + goto out_unlock; + + desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); + + /* + * If its disabled or no action available + * keep it masked and get out of here + */ + if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) { + desc->istate |= IRQS_PENDING; + goto out_unlock; + } + + kstat_incr_irqs_this_cpu(desc); + handle_irq_event(desc); + + cond_unmask_irq(desc); + +out_unlock: + raw_spin_unlock(&desc->lock); +} +EXPORT_SYMBOL_GPL(handle_level_irq); + +static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip) +{ + if (!(desc->istate & IRQS_ONESHOT)) { + chip->irq_eoi(&desc->irq_data); + return; + } + /* + * We need to unmask in the following cases: + * - Oneshot irq which did not wake the thread (caused by a + * spurious interrupt or a primary handler handling it + * completely). + */ + if (!irqd_irq_disabled(&desc->irq_data) && + irqd_irq_masked(&desc->irq_data) && !desc->threads_oneshot) { + chip->irq_eoi(&desc->irq_data); + unmask_irq(desc); + } else if (!(chip->flags & IRQCHIP_EOI_THREADED)) { + chip->irq_eoi(&desc->irq_data); + } +} + +/** + * handle_fasteoi_irq - irq handler for transparent controllers + * @desc: the interrupt description structure for this irq + * + * Only a single callback will be issued to the chip: an ->eoi() + * call when the interrupt has been serviced. This enables support + * for modern forms of interrupt handlers, which handle the flow + * details in hardware, transparently. + */ +void handle_fasteoi_irq(struct irq_desc *desc) +{ + struct irq_chip *chip = desc->irq_data.chip; + + raw_spin_lock(&desc->lock); + + /* + * When an affinity change races with IRQ handling, the next interrupt + * can arrive on the new CPU before the original CPU has completed + * handling the previous one - it may need to be resent. + */ + if (!irq_may_run(desc)) { + if (irqd_needs_resend_when_in_progress(&desc->irq_data)) + desc->istate |= IRQS_PENDING; + goto out; + } + + desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); + + /* + * If its disabled or no action available + * then mask it and get out of here: + */ + if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) { + desc->istate |= IRQS_PENDING; + mask_irq(desc); + goto out; + } + + kstat_incr_irqs_this_cpu(desc); + if (desc->istate & IRQS_ONESHOT) + mask_irq(desc); + + handle_irq_event(desc); + + cond_unmask_eoi_irq(desc, chip); + + /* + * When the race described above happens this will resend the interrupt. + */ + if (unlikely(desc->istate & IRQS_PENDING)) + check_irq_resend(desc, false); + + raw_spin_unlock(&desc->lock); + return; +out: + if (!(chip->flags & IRQCHIP_EOI_IF_HANDLED)) + chip->irq_eoi(&desc->irq_data); + raw_spin_unlock(&desc->lock); +} +EXPORT_SYMBOL_GPL(handle_fasteoi_irq); + +/** + * handle_fasteoi_nmi - irq handler for NMI interrupt lines + * @desc: the interrupt description structure for this irq + * + * A simple NMI-safe handler, considering the restrictions + * from request_nmi. + * + * Only a single callback will be issued to the chip: an ->eoi() + * call when the interrupt has been serviced. This enables support + * for modern forms of interrupt handlers, which handle the flow + * details in hardware, transparently. + */ +void handle_fasteoi_nmi(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + struct irqaction *action = desc->action; + unsigned int irq = irq_desc_get_irq(desc); + irqreturn_t res; + + __kstat_incr_irqs_this_cpu(desc); + + trace_irq_handler_entry(irq, action); + /* + * NMIs cannot be shared, there is only one action. + */ + res = action->handler(irq, action->dev_id); + trace_irq_handler_exit(irq, action, res); + + if (chip->irq_eoi) + chip->irq_eoi(&desc->irq_data); +} +EXPORT_SYMBOL_GPL(handle_fasteoi_nmi); + +/** + * handle_edge_irq - edge type IRQ handler + * @desc: the interrupt description structure for this irq + * + * Interrupt occurs on the falling and/or rising edge of a hardware + * signal. The occurrence is latched into the irq controller hardware + * and must be acked in order to be reenabled. After the ack another + * interrupt can happen on the same source even before the first one + * is handled by the associated event handler. If this happens it + * might be necessary to disable (mask) the interrupt depending on the + * controller hardware. This requires to reenable the interrupt inside + * of the loop which handles the interrupts which have arrived while + * the handler was running. If all pending interrupts are handled, the + * loop is left. + */ +void handle_edge_irq(struct irq_desc *desc) +{ + raw_spin_lock(&desc->lock); + + desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); + + if (!irq_may_run(desc)) { + desc->istate |= IRQS_PENDING; + mask_ack_irq(desc); + goto out_unlock; + } + + /* + * If its disabled or no action available then mask it and get + * out of here. + */ + if (irqd_irq_disabled(&desc->irq_data) || !desc->action) { + desc->istate |= IRQS_PENDING; + mask_ack_irq(desc); + goto out_unlock; + } + + kstat_incr_irqs_this_cpu(desc); + + /* Start handling the irq */ + desc->irq_data.chip->irq_ack(&desc->irq_data); + + do { + if (unlikely(!desc->action)) { + mask_irq(desc); + goto out_unlock; + } + + /* + * When another irq arrived while we were handling + * one, we could have masked the irq. + * Reenable it, if it was not disabled in meantime. + */ + if (unlikely(desc->istate & IRQS_PENDING)) { + if (!irqd_irq_disabled(&desc->irq_data) && + irqd_irq_masked(&desc->irq_data)) + unmask_irq(desc); + } + + handle_irq_event(desc); + + } while ((desc->istate & IRQS_PENDING) && + !irqd_irq_disabled(&desc->irq_data)); + +out_unlock: + raw_spin_unlock(&desc->lock); +} +EXPORT_SYMBOL(handle_edge_irq); + +#ifdef CONFIG_IRQ_EDGE_EOI_HANDLER +/** + * handle_edge_eoi_irq - edge eoi type IRQ handler + * @desc: the interrupt description structure for this irq + * + * Similar as the above handle_edge_irq, but using eoi and w/o the + * mask/unmask logic. + */ +void handle_edge_eoi_irq(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + + raw_spin_lock(&desc->lock); + + desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); + + if (!irq_may_run(desc)) { + desc->istate |= IRQS_PENDING; + goto out_eoi; + } + + /* + * If its disabled or no action available then mask it and get + * out of here. + */ + if (irqd_irq_disabled(&desc->irq_data) || !desc->action) { + desc->istate |= IRQS_PENDING; + goto out_eoi; + } + + kstat_incr_irqs_this_cpu(desc); + + do { + if (unlikely(!desc->action)) + goto out_eoi; + + handle_irq_event(desc); + + } while ((desc->istate & IRQS_PENDING) && + !irqd_irq_disabled(&desc->irq_data)); + +out_eoi: + chip->irq_eoi(&desc->irq_data); + raw_spin_unlock(&desc->lock); +} +#endif + +/** + * handle_percpu_irq - Per CPU local irq handler + * @desc: the interrupt description structure for this irq + * + * Per CPU interrupts on SMP machines without locking requirements + */ +void handle_percpu_irq(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + + /* + * PER CPU interrupts are not serialized. Do not touch + * desc->tot_count. + */ + __kstat_incr_irqs_this_cpu(desc); + + if (chip->irq_ack) + chip->irq_ack(&desc->irq_data); + + handle_irq_event_percpu(desc); + + if (chip->irq_eoi) + chip->irq_eoi(&desc->irq_data); +} + +/** + * handle_percpu_devid_irq - Per CPU local irq handler with per cpu dev ids + * @desc: the interrupt description structure for this irq + * + * Per CPU interrupts on SMP machines without locking requirements. Same as + * handle_percpu_irq() above but with the following extras: + * + * action->percpu_dev_id is a pointer to percpu variables which + * contain the real device id for the cpu on which this handler is + * called + */ +void handle_percpu_devid_irq(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + struct irqaction *action = desc->action; + unsigned int irq = irq_desc_get_irq(desc); + irqreturn_t res; + + /* + * PER CPU interrupts are not serialized. Do not touch + * desc->tot_count. + */ + __kstat_incr_irqs_this_cpu(desc); + + if (chip->irq_ack) + chip->irq_ack(&desc->irq_data); + + if (likely(action)) { + trace_irq_handler_entry(irq, action); + res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id)); + trace_irq_handler_exit(irq, action, res); + } else { + unsigned int cpu = smp_processor_id(); + bool enabled = cpumask_test_cpu(cpu, desc->percpu_enabled); + + if (enabled) + irq_percpu_disable(desc, cpu); + + pr_err_once("Spurious%s percpu IRQ%u on CPU%u\n", + enabled ? " and unmasked" : "", irq, cpu); + } + + if (chip->irq_eoi) + chip->irq_eoi(&desc->irq_data); +} + +/** + * handle_percpu_devid_fasteoi_nmi - Per CPU local NMI handler with per cpu + * dev ids + * @desc: the interrupt description structure for this irq + * + * Similar to handle_fasteoi_nmi, but handling the dev_id cookie + * as a percpu pointer. + */ +void handle_percpu_devid_fasteoi_nmi(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + struct irqaction *action = desc->action; + unsigned int irq = irq_desc_get_irq(desc); + irqreturn_t res; + + __kstat_incr_irqs_this_cpu(desc); + + trace_irq_handler_entry(irq, action); + res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id)); + trace_irq_handler_exit(irq, action, res); + + if (chip->irq_eoi) + chip->irq_eoi(&desc->irq_data); +} + +static void +__irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, + int is_chained, const char *name) +{ + if (!handle) { + handle = handle_bad_irq; + } else { + struct irq_data *irq_data = &desc->irq_data; +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + /* + * With hierarchical domains we might run into a + * situation where the outermost chip is not yet set + * up, but the inner chips are there. Instead of + * bailing we install the handler, but obviously we + * cannot enable/startup the interrupt at this point. + */ + while (irq_data) { + if (irq_data->chip != &no_irq_chip) + break; + /* + * Bail out if the outer chip is not set up + * and the interrupt supposed to be started + * right away. + */ + if (WARN_ON(is_chained)) + return; + /* Try the parent */ + irq_data = irq_data->parent_data; + } +#endif + if (WARN_ON(!irq_data || irq_data->chip == &no_irq_chip)) + return; + } + + /* Uninstall? */ + if (handle == handle_bad_irq) { + if (desc->irq_data.chip != &no_irq_chip) + mask_ack_irq(desc); + irq_state_set_disabled(desc); + if (is_chained) { + desc->action = NULL; + WARN_ON(irq_chip_pm_put(irq_desc_get_irq_data(desc))); + } + desc->depth = 1; + } + desc->handle_irq = handle; + desc->name = name; + + if (handle != handle_bad_irq && is_chained) { + unsigned int type = irqd_get_trigger_type(&desc->irq_data); + + /* + * We're about to start this interrupt immediately, + * hence the need to set the trigger configuration. + * But the .set_type callback may have overridden the + * flow handler, ignoring that we're dealing with a + * chained interrupt. Reset it immediately because we + * do know better. + */ + if (type != IRQ_TYPE_NONE) { + __irq_set_trigger(desc, type); + desc->handle_irq = handle; + } + + irq_settings_set_noprobe(desc); + irq_settings_set_norequest(desc); + irq_settings_set_nothread(desc); + desc->action = &chained_action; + WARN_ON(irq_chip_pm_get(irq_desc_get_irq_data(desc))); + irq_activate_and_startup(desc, IRQ_RESEND); + } +} + +void +__irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, + const char *name) +{ + unsigned long flags; + struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, 0); + + if (!desc) + return; + + __irq_do_set_handler(desc, handle, is_chained, name); + irq_put_desc_busunlock(desc, flags); +} +EXPORT_SYMBOL_GPL(__irq_set_handler); + +void +irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle, + void *data) +{ + unsigned long flags; + struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, 0); + + if (!desc) + return; + + desc->irq_common_data.handler_data = data; + __irq_do_set_handler(desc, handle, 1, NULL); + + irq_put_desc_busunlock(desc, flags); +} +EXPORT_SYMBOL_GPL(irq_set_chained_handler_and_data); + +void +irq_set_chip_and_handler_name(unsigned int irq, const struct irq_chip *chip, + irq_flow_handler_t handle, const char *name) +{ + irq_set_chip(irq, chip); + __irq_set_handler(irq, handle, 0, name); +} +EXPORT_SYMBOL_GPL(irq_set_chip_and_handler_name); + +void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set) +{ + unsigned long flags, trigger, tmp; + struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); + + if (!desc) + return; + + /* + * Warn when a driver sets the no autoenable flag on an already + * active interrupt. + */ + WARN_ON_ONCE(!desc->depth && (set & _IRQ_NOAUTOEN)); + + irq_settings_clr_and_set(desc, clr, set); + + trigger = irqd_get_trigger_type(&desc->irq_data); + + irqd_clear(&desc->irq_data, IRQD_NO_BALANCING | IRQD_PER_CPU | + IRQD_TRIGGER_MASK | IRQD_LEVEL | IRQD_MOVE_PCNTXT); + if (irq_settings_has_no_balance_set(desc)) + irqd_set(&desc->irq_data, IRQD_NO_BALANCING); + if (irq_settings_is_per_cpu(desc)) + irqd_set(&desc->irq_data, IRQD_PER_CPU); + if (irq_settings_can_move_pcntxt(desc)) + irqd_set(&desc->irq_data, IRQD_MOVE_PCNTXT); + if (irq_settings_is_level(desc)) + irqd_set(&desc->irq_data, IRQD_LEVEL); + + tmp = irq_settings_get_trigger_mask(desc); + if (tmp != IRQ_TYPE_NONE) + trigger = tmp; + + irqd_set(&desc->irq_data, trigger); + + irq_put_desc_unlock(desc, flags); +} +EXPORT_SYMBOL_GPL(irq_modify_status); + +#ifdef CONFIG_DEPRECATED_IRQ_CPU_ONOFFLINE +/** + * irq_cpu_online - Invoke all irq_cpu_online functions. + * + * Iterate through all irqs and invoke the chip.irq_cpu_online() + * for each. + */ +void irq_cpu_online(void) +{ + struct irq_desc *desc; + struct irq_chip *chip; + unsigned long flags; + unsigned int irq; + + for_each_active_irq(irq) { + desc = irq_to_desc(irq); + if (!desc) + continue; + + raw_spin_lock_irqsave(&desc->lock, flags); + + chip = irq_data_get_irq_chip(&desc->irq_data); + if (chip && chip->irq_cpu_online && + (!(chip->flags & IRQCHIP_ONOFFLINE_ENABLED) || + !irqd_irq_disabled(&desc->irq_data))) + chip->irq_cpu_online(&desc->irq_data); + + raw_spin_unlock_irqrestore(&desc->lock, flags); + } +} + +/** + * irq_cpu_offline - Invoke all irq_cpu_offline functions. + * + * Iterate through all irqs and invoke the chip.irq_cpu_offline() + * for each. + */ +void irq_cpu_offline(void) +{ + struct irq_desc *desc; + struct irq_chip *chip; + unsigned long flags; + unsigned int irq; + + for_each_active_irq(irq) { + desc = irq_to_desc(irq); + if (!desc) + continue; + + raw_spin_lock_irqsave(&desc->lock, flags); + + chip = irq_data_get_irq_chip(&desc->irq_data); + if (chip && chip->irq_cpu_offline && + (!(chip->flags & IRQCHIP_ONOFFLINE_ENABLED) || + !irqd_irq_disabled(&desc->irq_data))) + chip->irq_cpu_offline(&desc->irq_data); + + raw_spin_unlock_irqrestore(&desc->lock, flags); + } +} +#endif + +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + +#ifdef CONFIG_IRQ_FASTEOI_HIERARCHY_HANDLERS +/** + * handle_fasteoi_ack_irq - irq handler for edge hierarchy + * stacked on transparent controllers + * + * @desc: the interrupt description structure for this irq + * + * Like handle_fasteoi_irq(), but for use with hierarchy where + * the irq_chip also needs to have its ->irq_ack() function + * called. + */ +void handle_fasteoi_ack_irq(struct irq_desc *desc) +{ + struct irq_chip *chip = desc->irq_data.chip; + + raw_spin_lock(&desc->lock); + + if (!irq_may_run(desc)) + goto out; + + desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); + + /* + * If its disabled or no action available + * then mask it and get out of here: + */ + if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) { + desc->istate |= IRQS_PENDING; + mask_irq(desc); + goto out; + } + + kstat_incr_irqs_this_cpu(desc); + if (desc->istate & IRQS_ONESHOT) + mask_irq(desc); + + /* Start handling the irq */ + desc->irq_data.chip->irq_ack(&desc->irq_data); + + handle_irq_event(desc); + + cond_unmask_eoi_irq(desc, chip); + + raw_spin_unlock(&desc->lock); + return; +out: + if (!(chip->flags & IRQCHIP_EOI_IF_HANDLED)) + chip->irq_eoi(&desc->irq_data); + raw_spin_unlock(&desc->lock); +} +EXPORT_SYMBOL_GPL(handle_fasteoi_ack_irq); + +/** + * handle_fasteoi_mask_irq - irq handler for level hierarchy + * stacked on transparent controllers + * + * @desc: the interrupt description structure for this irq + * + * Like handle_fasteoi_irq(), but for use with hierarchy where + * the irq_chip also needs to have its ->irq_mask_ack() function + * called. + */ +void handle_fasteoi_mask_irq(struct irq_desc *desc) +{ + struct irq_chip *chip = desc->irq_data.chip; + + raw_spin_lock(&desc->lock); + mask_ack_irq(desc); + + if (!irq_may_run(desc)) + goto out; + + desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); + + /* + * If its disabled or no action available + * then mask it and get out of here: + */ + if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) { + desc->istate |= IRQS_PENDING; + mask_irq(desc); + goto out; + } + + kstat_incr_irqs_this_cpu(desc); + if (desc->istate & IRQS_ONESHOT) + mask_irq(desc); + + handle_irq_event(desc); + + cond_unmask_eoi_irq(desc, chip); + + raw_spin_unlock(&desc->lock); + return; +out: + if (!(chip->flags & IRQCHIP_EOI_IF_HANDLED)) + chip->irq_eoi(&desc->irq_data); + raw_spin_unlock(&desc->lock); +} +EXPORT_SYMBOL_GPL(handle_fasteoi_mask_irq); + +#endif /* CONFIG_IRQ_FASTEOI_HIERARCHY_HANDLERS */ + +/** + * irq_chip_set_parent_state - set the state of a parent interrupt. + * + * @data: Pointer to interrupt specific data + * @which: State to be restored (one of IRQCHIP_STATE_*) + * @val: Value corresponding to @which + * + * Conditional success, if the underlying irqchip does not implement it. + */ +int irq_chip_set_parent_state(struct irq_data *data, + enum irqchip_irq_state which, + bool val) +{ + data = data->parent_data; + + if (!data || !data->chip->irq_set_irqchip_state) + return 0; + + return data->chip->irq_set_irqchip_state(data, which, val); +} +EXPORT_SYMBOL_GPL(irq_chip_set_parent_state); + +/** + * irq_chip_get_parent_state - get the state of a parent interrupt. + * + * @data: Pointer to interrupt specific data + * @which: one of IRQCHIP_STATE_* the caller wants to know + * @state: a pointer to a boolean where the state is to be stored + * + * Conditional success, if the underlying irqchip does not implement it. + */ +int irq_chip_get_parent_state(struct irq_data *data, + enum irqchip_irq_state which, + bool *state) +{ + data = data->parent_data; + + if (!data || !data->chip->irq_get_irqchip_state) + return 0; + + return data->chip->irq_get_irqchip_state(data, which, state); +} +EXPORT_SYMBOL_GPL(irq_chip_get_parent_state); + +/** + * irq_chip_enable_parent - Enable the parent interrupt (defaults to unmask if + * NULL) + * @data: Pointer to interrupt specific data + */ +void irq_chip_enable_parent(struct irq_data *data) +{ + data = data->parent_data; + if (data->chip->irq_enable) + data->chip->irq_enable(data); + else + data->chip->irq_unmask(data); +} +EXPORT_SYMBOL_GPL(irq_chip_enable_parent); + +/** + * irq_chip_disable_parent - Disable the parent interrupt (defaults to mask if + * NULL) + * @data: Pointer to interrupt specific data + */ +void irq_chip_disable_parent(struct irq_data *data) +{ + data = data->parent_data; + if (data->chip->irq_disable) + data->chip->irq_disable(data); + else + data->chip->irq_mask(data); +} +EXPORT_SYMBOL_GPL(irq_chip_disable_parent); + +/** + * irq_chip_ack_parent - Acknowledge the parent interrupt + * @data: Pointer to interrupt specific data + */ +void irq_chip_ack_parent(struct irq_data *data) +{ + data = data->parent_data; + data->chip->irq_ack(data); +} +EXPORT_SYMBOL_GPL(irq_chip_ack_parent); + +/** + * irq_chip_mask_parent - Mask the parent interrupt + * @data: Pointer to interrupt specific data + */ +void irq_chip_mask_parent(struct irq_data *data) +{ + data = data->parent_data; + data->chip->irq_mask(data); +} +EXPORT_SYMBOL_GPL(irq_chip_mask_parent); + +/** + * irq_chip_mask_ack_parent - Mask and acknowledge the parent interrupt + * @data: Pointer to interrupt specific data + */ +void irq_chip_mask_ack_parent(struct irq_data *data) +{ + data = data->parent_data; + data->chip->irq_mask_ack(data); +} +EXPORT_SYMBOL_GPL(irq_chip_mask_ack_parent); + +/** + * irq_chip_unmask_parent - Unmask the parent interrupt + * @data: Pointer to interrupt specific data + */ +void irq_chip_unmask_parent(struct irq_data *data) +{ + data = data->parent_data; + data->chip->irq_unmask(data); +} +EXPORT_SYMBOL_GPL(irq_chip_unmask_parent); + +/** + * irq_chip_eoi_parent - Invoke EOI on the parent interrupt + * @data: Pointer to interrupt specific data + */ +void irq_chip_eoi_parent(struct irq_data *data) +{ + data = data->parent_data; + data->chip->irq_eoi(data); +} +EXPORT_SYMBOL_GPL(irq_chip_eoi_parent); + +/** + * irq_chip_set_affinity_parent - Set affinity on the parent interrupt + * @data: Pointer to interrupt specific data + * @dest: The affinity mask to set + * @force: Flag to enforce setting (disable online checks) + * + * Conditional, as the underlying parent chip might not implement it. + */ +int irq_chip_set_affinity_parent(struct irq_data *data, + const struct cpumask *dest, bool force) +{ + data = data->parent_data; + if (data->chip->irq_set_affinity) + return data->chip->irq_set_affinity(data, dest, force); + + return -ENOSYS; +} +EXPORT_SYMBOL_GPL(irq_chip_set_affinity_parent); + +/** + * irq_chip_set_type_parent - Set IRQ type on the parent interrupt + * @data: Pointer to interrupt specific data + * @type: IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h + * + * Conditional, as the underlying parent chip might not implement it. + */ +int irq_chip_set_type_parent(struct irq_data *data, unsigned int type) +{ + data = data->parent_data; + + if (data->chip->irq_set_type) + return data->chip->irq_set_type(data, type); + + return -ENOSYS; +} +EXPORT_SYMBOL_GPL(irq_chip_set_type_parent); + +/** + * irq_chip_retrigger_hierarchy - Retrigger an interrupt in hardware + * @data: Pointer to interrupt specific data + * + * Iterate through the domain hierarchy of the interrupt and check + * whether a hw retrigger function exists. If yes, invoke it. + */ +int irq_chip_retrigger_hierarchy(struct irq_data *data) +{ + for (data = data->parent_data; data; data = data->parent_data) + if (data->chip && data->chip->irq_retrigger) + return data->chip->irq_retrigger(data); + + return 0; +} +EXPORT_SYMBOL_GPL(irq_chip_retrigger_hierarchy); + +/** + * irq_chip_set_vcpu_affinity_parent - Set vcpu affinity on the parent interrupt + * @data: Pointer to interrupt specific data + * @vcpu_info: The vcpu affinity information + */ +int irq_chip_set_vcpu_affinity_parent(struct irq_data *data, void *vcpu_info) +{ + data = data->parent_data; + if (data->chip->irq_set_vcpu_affinity) + return data->chip->irq_set_vcpu_affinity(data, vcpu_info); + + return -ENOSYS; +} +EXPORT_SYMBOL_GPL(irq_chip_set_vcpu_affinity_parent); +/** + * irq_chip_set_wake_parent - Set/reset wake-up on the parent interrupt + * @data: Pointer to interrupt specific data + * @on: Whether to set or reset the wake-up capability of this irq + * + * Conditional, as the underlying parent chip might not implement it. + */ +int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on) +{ + data = data->parent_data; + + if (data->chip->flags & IRQCHIP_SKIP_SET_WAKE) + return 0; + + if (data->chip->irq_set_wake) + return data->chip->irq_set_wake(data, on); + + return -ENOSYS; +} +EXPORT_SYMBOL_GPL(irq_chip_set_wake_parent); + +/** + * irq_chip_request_resources_parent - Request resources on the parent interrupt + * @data: Pointer to interrupt specific data + */ +int irq_chip_request_resources_parent(struct irq_data *data) +{ + data = data->parent_data; + + if (data->chip->irq_request_resources) + return data->chip->irq_request_resources(data); + + /* no error on missing optional irq_chip::irq_request_resources */ + return 0; +} +EXPORT_SYMBOL_GPL(irq_chip_request_resources_parent); + +/** + * irq_chip_release_resources_parent - Release resources on the parent interrupt + * @data: Pointer to interrupt specific data + */ +void irq_chip_release_resources_parent(struct irq_data *data) +{ + data = data->parent_data; + if (data->chip->irq_release_resources) + data->chip->irq_release_resources(data); +} +EXPORT_SYMBOL_GPL(irq_chip_release_resources_parent); +#endif + +/** + * irq_chip_compose_msi_msg - Compose msi message for a irq chip + * @data: Pointer to interrupt specific data + * @msg: Pointer to the MSI message + * + * For hierarchical domains we find the first chip in the hierarchy + * which implements the irq_compose_msi_msg callback. For non + * hierarchical we use the top level chip. + */ +int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) +{ + struct irq_data *pos; + + for (pos = NULL; !pos && data; data = irqd_get_parent_data(data)) { + if (data->chip && data->chip->irq_compose_msi_msg) + pos = data; + } + + if (!pos) + return -ENOSYS; + + pos->chip->irq_compose_msi_msg(pos, msg); + return 0; +} + +static struct device *irq_get_pm_device(struct irq_data *data) +{ + if (data->domain) + return data->domain->pm_dev; + + return NULL; +} + +/** + * irq_chip_pm_get - Enable power for an IRQ chip + * @data: Pointer to interrupt specific data + * + * Enable the power to the IRQ chip referenced by the interrupt data + * structure. + */ +int irq_chip_pm_get(struct irq_data *data) +{ + struct device *dev = irq_get_pm_device(data); + int retval = 0; + + if (IS_ENABLED(CONFIG_PM) && dev) + retval = pm_runtime_resume_and_get(dev); + + return retval; +} + +/** + * irq_chip_pm_put - Disable power for an IRQ chip + * @data: Pointer to interrupt specific data + * + * Disable the power to the IRQ chip referenced by the interrupt data + * structure, belongs. Note that power will only be disabled, once this + * function has been called for all IRQs that have called irq_chip_pm_get(). + */ +int irq_chip_pm_put(struct irq_data *data) +{ + struct device *dev = irq_get_pm_device(data); + int retval = 0; + + if (IS_ENABLED(CONFIG_PM) && dev) + retval = pm_runtime_put(dev); + + return (retval < 0) ? retval : 0; +} diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c new file mode 100644 index 0000000000..1ed2b17393 --- /dev/null +++ b/kernel/irq/cpuhotplug.c @@ -0,0 +1,233 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Generic cpu hotunplug interrupt migration code copied from the + * arch/arm implementation + * + * Copyright (C) Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/interrupt.h> +#include <linux/ratelimit.h> +#include <linux/irq.h> +#include <linux/sched/isolation.h> + +#include "internals.h" + +/* For !GENERIC_IRQ_EFFECTIVE_AFF_MASK this looks at general affinity mask */ +static inline bool irq_needs_fixup(struct irq_data *d) +{ + const struct cpumask *m = irq_data_get_effective_affinity_mask(d); + unsigned int cpu = smp_processor_id(); + +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK + /* + * The cpumask_empty() check is a workaround for interrupt chips, + * which do not implement effective affinity, but the architecture has + * enabled the config switch. Use the general affinity mask instead. + */ + if (cpumask_empty(m)) + m = irq_data_get_affinity_mask(d); + + /* + * Sanity check. If the mask is not empty when excluding the outgoing + * CPU then it must contain at least one online CPU. The outgoing CPU + * has been removed from the online mask already. + */ + if (cpumask_any_but(m, cpu) < nr_cpu_ids && + cpumask_any_and(m, cpu_online_mask) >= nr_cpu_ids) { + /* + * If this happens then there was a missed IRQ fixup at some + * point. Warn about it and enforce fixup. + */ + pr_warn("Eff. affinity %*pbl of IRQ %u contains only offline CPUs after offlining CPU %u\n", + cpumask_pr_args(m), d->irq, cpu); + return true; + } +#endif + return cpumask_test_cpu(cpu, m); +} + +static bool migrate_one_irq(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + struct irq_chip *chip = irq_data_get_irq_chip(d); + bool maskchip = !irq_can_move_pcntxt(d) && !irqd_irq_masked(d); + const struct cpumask *affinity; + bool brokeaff = false; + int err; + + /* + * IRQ chip might be already torn down, but the irq descriptor is + * still in the radix tree. Also if the chip has no affinity setter, + * nothing can be done here. + */ + if (!chip || !chip->irq_set_affinity) { + pr_debug("IRQ %u: Unable to migrate away\n", d->irq); + return false; + } + + /* + * No move required, if: + * - Interrupt is per cpu + * - Interrupt is not started + * - Affinity mask does not include this CPU. + * + * Note: Do not check desc->action as this might be a chained + * interrupt. + */ + if (irqd_is_per_cpu(d) || !irqd_is_started(d) || !irq_needs_fixup(d)) { + /* + * If an irq move is pending, abort it if the dying CPU is + * the sole target. + */ + irq_fixup_move_pending(desc, false); + return false; + } + + /* + * Complete an eventually pending irq move cleanup. If this + * interrupt was moved in hard irq context, then the vectors need + * to be cleaned up. It can't wait until this interrupt actually + * happens and this CPU was involved. + */ + irq_force_complete_move(desc); + + /* + * If there is a setaffinity pending, then try to reuse the pending + * mask, so the last change of the affinity does not get lost. If + * there is no move pending or the pending mask does not contain + * any online CPU, use the current affinity mask. + */ + if (irq_fixup_move_pending(desc, true)) + affinity = irq_desc_get_pending_mask(desc); + else + affinity = irq_data_get_affinity_mask(d); + + /* Mask the chip for interrupts which cannot move in process context */ + if (maskchip && chip->irq_mask) + chip->irq_mask(d); + + if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { + /* + * If the interrupt is managed, then shut it down and leave + * the affinity untouched. + */ + if (irqd_affinity_is_managed(d)) { + irqd_set_managed_shutdown(d); + irq_shutdown_and_deactivate(desc); + return false; + } + affinity = cpu_online_mask; + brokeaff = true; + } + /* + * Do not set the force argument of irq_do_set_affinity() as this + * disables the masking of offline CPUs from the supplied affinity + * mask and therefore might keep/reassign the irq to the outgoing + * CPU. + */ + err = irq_do_set_affinity(d, affinity, false); + if (err) { + pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n", + d->irq, err); + brokeaff = false; + } + + if (maskchip && chip->irq_unmask) + chip->irq_unmask(d); + + return brokeaff; +} + +/** + * irq_migrate_all_off_this_cpu - Migrate irqs away from offline cpu + * + * The current CPU has been marked offline. Migrate IRQs off this CPU. + * If the affinity settings do not allow other CPUs, force them onto any + * available CPU. + * + * Note: we must iterate over all IRQs, whether they have an attached + * action structure or not, as we need to get chained interrupts too. + */ +void irq_migrate_all_off_this_cpu(void) +{ + struct irq_desc *desc; + unsigned int irq; + + for_each_active_irq(irq) { + bool affinity_broken; + + desc = irq_to_desc(irq); + raw_spin_lock(&desc->lock); + affinity_broken = migrate_one_irq(desc); + raw_spin_unlock(&desc->lock); + + if (affinity_broken) { + pr_debug_ratelimited("IRQ %u: no longer affine to CPU%u\n", + irq, smp_processor_id()); + } + } +} + +static bool hk_should_isolate(struct irq_data *data, unsigned int cpu) +{ + const struct cpumask *hk_mask; + + if (!housekeeping_enabled(HK_TYPE_MANAGED_IRQ)) + return false; + + hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ); + if (cpumask_subset(irq_data_get_effective_affinity_mask(data), hk_mask)) + return false; + + return cpumask_test_cpu(cpu, hk_mask); +} + +static void irq_restore_affinity_of_irq(struct irq_desc *desc, unsigned int cpu) +{ + struct irq_data *data = irq_desc_get_irq_data(desc); + const struct cpumask *affinity = irq_data_get_affinity_mask(data); + + if (!irqd_affinity_is_managed(data) || !desc->action || + !irq_data_get_irq_chip(data) || !cpumask_test_cpu(cpu, affinity)) + return; + + if (irqd_is_managed_and_shutdown(data)) { + irq_startup(desc, IRQ_RESEND, IRQ_START_COND); + return; + } + + /* + * If the interrupt can only be directed to a single target + * CPU then it is already assigned to a CPU in the affinity + * mask. No point in trying to move it around unless the + * isolation mechanism requests to move it to an upcoming + * housekeeping CPU. + */ + if (!irqd_is_single_target(data) || hk_should_isolate(data, cpu)) + irq_set_affinity_locked(data, affinity, false); +} + +/** + * irq_affinity_online_cpu - Restore affinity for managed interrupts + * @cpu: Upcoming CPU for which interrupts should be restored + */ +int irq_affinity_online_cpu(unsigned int cpu) +{ + struct irq_desc *desc; + unsigned int irq; + + irq_lock_sparse(); + for_each_active_irq(irq) { + desc = irq_to_desc(irq); + raw_spin_lock_irq(&desc->lock); + irq_restore_affinity_of_irq(desc, cpu); + raw_spin_unlock_irq(&desc->lock); + } + irq_unlock_sparse(); + + return 0; +} diff --git a/kernel/irq/debug.h b/kernel/irq/debug.h new file mode 100644 index 0000000000..8ccb326d29 --- /dev/null +++ b/kernel/irq/debug.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Debugging printout: + */ + +#define ___P(f) if (desc->status_use_accessors & f) printk("%14s set\n", #f) +#define ___PS(f) if (desc->istate & f) printk("%14s set\n", #f) +/* FIXME */ +#define ___PD(f) do { } while (0) + +static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc) +{ + static DEFINE_RATELIMIT_STATE(ratelimit, 5 * HZ, 5); + + if (!__ratelimit(&ratelimit)) + return; + + printk("irq %d, desc: %p, depth: %d, count: %d, unhandled: %d\n", + irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled); + printk("->handle_irq(): %p, %pS\n", + desc->handle_irq, desc->handle_irq); + printk("->irq_data.chip(): %p, %pS\n", + desc->irq_data.chip, desc->irq_data.chip); + printk("->action(): %p\n", desc->action); + if (desc->action) { + printk("->action->handler(): %p, %pS\n", + desc->action->handler, desc->action->handler); + } + + ___P(IRQ_LEVEL); + ___P(IRQ_PER_CPU); + ___P(IRQ_NOPROBE); + ___P(IRQ_NOREQUEST); + ___P(IRQ_NOTHREAD); + ___P(IRQ_NOAUTOEN); + + ___PS(IRQS_AUTODETECT); + ___PS(IRQS_REPLAY); + ___PS(IRQS_WAITING); + ___PS(IRQS_PENDING); + + ___PD(IRQS_INPROGRESS); + ___PD(IRQS_DISABLED); + ___PD(IRQS_MASKED); +} + +#undef ___P +#undef ___PS +#undef ___PD diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c new file mode 100644 index 0000000000..aae0402507 --- /dev/null +++ b/kernel/irq/debugfs.c @@ -0,0 +1,263 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright 2017 Thomas Gleixner <tglx@linutronix.de> + +#include <linux/irqdomain.h> +#include <linux/irq.h> +#include <linux/uaccess.h> + +#include "internals.h" + +static struct dentry *irq_dir; + +struct irq_bit_descr { + unsigned int mask; + char *name; +}; +#define BIT_MASK_DESCR(m) { .mask = m, .name = #m } + +static void irq_debug_show_bits(struct seq_file *m, int ind, unsigned int state, + const struct irq_bit_descr *sd, int size) +{ + int i; + + for (i = 0; i < size; i++, sd++) { + if (state & sd->mask) + seq_printf(m, "%*s%s\n", ind + 12, "", sd->name); + } +} + +#ifdef CONFIG_SMP +static void irq_debug_show_masks(struct seq_file *m, struct irq_desc *desc) +{ + struct irq_data *data = irq_desc_get_irq_data(desc); + const struct cpumask *msk; + + msk = irq_data_get_affinity_mask(data); + seq_printf(m, "affinity: %*pbl\n", cpumask_pr_args(msk)); +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK + msk = irq_data_get_effective_affinity_mask(data); + seq_printf(m, "effectiv: %*pbl\n", cpumask_pr_args(msk)); +#endif +#ifdef CONFIG_GENERIC_PENDING_IRQ + msk = desc->pending_mask; + seq_printf(m, "pending: %*pbl\n", cpumask_pr_args(msk)); +#endif +} +#else +static void irq_debug_show_masks(struct seq_file *m, struct irq_desc *desc) { } +#endif + +static const struct irq_bit_descr irqchip_flags[] = { + BIT_MASK_DESCR(IRQCHIP_SET_TYPE_MASKED), + BIT_MASK_DESCR(IRQCHIP_EOI_IF_HANDLED), + BIT_MASK_DESCR(IRQCHIP_MASK_ON_SUSPEND), + BIT_MASK_DESCR(IRQCHIP_ONOFFLINE_ENABLED), + BIT_MASK_DESCR(IRQCHIP_SKIP_SET_WAKE), + BIT_MASK_DESCR(IRQCHIP_ONESHOT_SAFE), + BIT_MASK_DESCR(IRQCHIP_EOI_THREADED), + BIT_MASK_DESCR(IRQCHIP_SUPPORTS_LEVEL_MSI), + BIT_MASK_DESCR(IRQCHIP_SUPPORTS_NMI), + BIT_MASK_DESCR(IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND), + BIT_MASK_DESCR(IRQCHIP_IMMUTABLE), +}; + +static void +irq_debug_show_chip(struct seq_file *m, struct irq_data *data, int ind) +{ + struct irq_chip *chip = data->chip; + + if (!chip) { + seq_printf(m, "chip: None\n"); + return; + } + seq_printf(m, "%*schip: ", ind, ""); + if (chip->irq_print_chip) + chip->irq_print_chip(data, m); + else + seq_printf(m, "%s", chip->name); + seq_printf(m, "\n%*sflags: 0x%lx\n", ind + 1, "", chip->flags); + irq_debug_show_bits(m, ind, chip->flags, irqchip_flags, + ARRAY_SIZE(irqchip_flags)); +} + +static void +irq_debug_show_data(struct seq_file *m, struct irq_data *data, int ind) +{ + seq_printf(m, "%*sdomain: %s\n", ind, "", + data->domain ? data->domain->name : ""); + seq_printf(m, "%*shwirq: 0x%lx\n", ind + 1, "", data->hwirq); + irq_debug_show_chip(m, data, ind + 1); + if (data->domain && data->domain->ops && data->domain->ops->debug_show) + data->domain->ops->debug_show(m, NULL, data, ind + 1); +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + if (!data->parent_data) + return; + seq_printf(m, "%*sparent:\n", ind + 1, ""); + irq_debug_show_data(m, data->parent_data, ind + 4); +#endif +} + +static const struct irq_bit_descr irqdata_states[] = { + BIT_MASK_DESCR(IRQ_TYPE_EDGE_RISING), + BIT_MASK_DESCR(IRQ_TYPE_EDGE_FALLING), + BIT_MASK_DESCR(IRQ_TYPE_LEVEL_HIGH), + BIT_MASK_DESCR(IRQ_TYPE_LEVEL_LOW), + BIT_MASK_DESCR(IRQD_LEVEL), + + BIT_MASK_DESCR(IRQD_ACTIVATED), + BIT_MASK_DESCR(IRQD_IRQ_STARTED), + BIT_MASK_DESCR(IRQD_IRQ_DISABLED), + BIT_MASK_DESCR(IRQD_IRQ_MASKED), + BIT_MASK_DESCR(IRQD_IRQ_INPROGRESS), + + BIT_MASK_DESCR(IRQD_PER_CPU), + BIT_MASK_DESCR(IRQD_NO_BALANCING), + + BIT_MASK_DESCR(IRQD_SINGLE_TARGET), + BIT_MASK_DESCR(IRQD_MOVE_PCNTXT), + BIT_MASK_DESCR(IRQD_AFFINITY_SET), + BIT_MASK_DESCR(IRQD_SETAFFINITY_PENDING), + BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED), + BIT_MASK_DESCR(IRQD_AFFINITY_ON_ACTIVATE), + BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN), + BIT_MASK_DESCR(IRQD_CAN_RESERVE), + + BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU), + + BIT_MASK_DESCR(IRQD_WAKEUP_STATE), + BIT_MASK_DESCR(IRQD_WAKEUP_ARMED), + + BIT_MASK_DESCR(IRQD_DEFAULT_TRIGGER_SET), + + BIT_MASK_DESCR(IRQD_HANDLE_ENFORCE_IRQCTX), + + BIT_MASK_DESCR(IRQD_IRQ_ENABLED_ON_SUSPEND), + + BIT_MASK_DESCR(IRQD_RESEND_WHEN_IN_PROGRESS), +}; + +static const struct irq_bit_descr irqdesc_states[] = { + BIT_MASK_DESCR(_IRQ_NOPROBE), + BIT_MASK_DESCR(_IRQ_NOREQUEST), + BIT_MASK_DESCR(_IRQ_NOTHREAD), + BIT_MASK_DESCR(_IRQ_NOAUTOEN), + BIT_MASK_DESCR(_IRQ_NESTED_THREAD), + BIT_MASK_DESCR(_IRQ_PER_CPU_DEVID), + BIT_MASK_DESCR(_IRQ_IS_POLLED), + BIT_MASK_DESCR(_IRQ_DISABLE_UNLAZY), + BIT_MASK_DESCR(_IRQ_HIDDEN), +}; + +static const struct irq_bit_descr irqdesc_istates[] = { + BIT_MASK_DESCR(IRQS_AUTODETECT), + BIT_MASK_DESCR(IRQS_SPURIOUS_DISABLED), + BIT_MASK_DESCR(IRQS_POLL_INPROGRESS), + BIT_MASK_DESCR(IRQS_ONESHOT), + BIT_MASK_DESCR(IRQS_REPLAY), + BIT_MASK_DESCR(IRQS_WAITING), + BIT_MASK_DESCR(IRQS_PENDING), + BIT_MASK_DESCR(IRQS_SUSPENDED), + BIT_MASK_DESCR(IRQS_NMI), +}; + + +static int irq_debug_show(struct seq_file *m, void *p) +{ + struct irq_desc *desc = m->private; + struct irq_data *data; + + raw_spin_lock_irq(&desc->lock); + data = irq_desc_get_irq_data(desc); + seq_printf(m, "handler: %ps\n", desc->handle_irq); + seq_printf(m, "device: %s\n", desc->dev_name); + seq_printf(m, "status: 0x%08x\n", desc->status_use_accessors); + irq_debug_show_bits(m, 0, desc->status_use_accessors, irqdesc_states, + ARRAY_SIZE(irqdesc_states)); + seq_printf(m, "istate: 0x%08x\n", desc->istate); + irq_debug_show_bits(m, 0, desc->istate, irqdesc_istates, + ARRAY_SIZE(irqdesc_istates)); + seq_printf(m, "ddepth: %u\n", desc->depth); + seq_printf(m, "wdepth: %u\n", desc->wake_depth); + seq_printf(m, "dstate: 0x%08x\n", irqd_get(data)); + irq_debug_show_bits(m, 0, irqd_get(data), irqdata_states, + ARRAY_SIZE(irqdata_states)); + seq_printf(m, "node: %d\n", irq_data_get_node(data)); + irq_debug_show_masks(m, desc); + irq_debug_show_data(m, data, 0); + raw_spin_unlock_irq(&desc->lock); + return 0; +} + +static int irq_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, irq_debug_show, inode->i_private); +} + +static ssize_t irq_debug_write(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct irq_desc *desc = file_inode(file)->i_private; + char buf[8] = { 0, }; + size_t size; + + size = min(sizeof(buf) - 1, count); + if (copy_from_user(buf, user_buf, size)) + return -EFAULT; + + if (!strncmp(buf, "trigger", size)) { + int err = irq_inject_interrupt(irq_desc_get_irq(desc)); + + return err ? err : count; + } + + return count; +} + +static const struct file_operations dfs_irq_ops = { + .open = irq_debug_open, + .write = irq_debug_write, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void irq_debugfs_copy_devname(int irq, struct device *dev) +{ + struct irq_desc *desc = irq_to_desc(irq); + const char *name = dev_name(dev); + + if (name) + desc->dev_name = kstrdup(name, GFP_KERNEL); +} + +void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *desc) +{ + char name [10]; + + if (!irq_dir || !desc || desc->debugfs_file) + return; + + sprintf(name, "%d", irq); + desc->debugfs_file = debugfs_create_file(name, 0644, irq_dir, desc, + &dfs_irq_ops); +} + +static int __init irq_debugfs_init(void) +{ + struct dentry *root_dir; + int irq; + + root_dir = debugfs_create_dir("irq", NULL); + + irq_domain_debugfs_init(root_dir); + + irq_dir = debugfs_create_dir("irqs", root_dir); + + irq_lock_sparse(); + for_each_active_irq(irq) + irq_add_debugfs_entry(irq, irq_to_desc(irq)); + irq_unlock_sparse(); + + return 0; +} +__initcall(irq_debugfs_init); diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c new file mode 100644 index 0000000000..f6e5515ee0 --- /dev/null +++ b/kernel/irq/devres.c @@ -0,0 +1,284 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/device.h> +#include <linux/gfp.h> +#include <linux/irq.h> + +#include "internals.h" + +/* + * Device resource management aware IRQ request/free implementation. + */ +struct irq_devres { + unsigned int irq; + void *dev_id; +}; + +static void devm_irq_release(struct device *dev, void *res) +{ + struct irq_devres *this = res; + + free_irq(this->irq, this->dev_id); +} + +static int devm_irq_match(struct device *dev, void *res, void *data) +{ + struct irq_devres *this = res, *match = data; + + return this->irq == match->irq && this->dev_id == match->dev_id; +} + +/** + * devm_request_threaded_irq - allocate an interrupt line for a managed device + * @dev: device to request interrupt for + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs + * @thread_fn: function to be called in a threaded interrupt context. NULL + * for devices which handle everything in @handler + * @irqflags: Interrupt type flags + * @devname: An ascii name for the claiming device, dev_name(dev) if NULL + * @dev_id: A cookie passed back to the handler function + * + * Except for the extra @dev argument, this function takes the + * same arguments and performs the same function as + * request_threaded_irq(). IRQs requested with this function will be + * automatically freed on driver detach. + * + * If an IRQ allocated with this function needs to be freed + * separately, devm_free_irq() must be used. + */ +int devm_request_threaded_irq(struct device *dev, unsigned int irq, + irq_handler_t handler, irq_handler_t thread_fn, + unsigned long irqflags, const char *devname, + void *dev_id) +{ + struct irq_devres *dr; + int rc; + + dr = devres_alloc(devm_irq_release, sizeof(struct irq_devres), + GFP_KERNEL); + if (!dr) + return -ENOMEM; + + if (!devname) + devname = dev_name(dev); + + rc = request_threaded_irq(irq, handler, thread_fn, irqflags, devname, + dev_id); + if (rc) { + devres_free(dr); + return rc; + } + + dr->irq = irq; + dr->dev_id = dev_id; + devres_add(dev, dr); + + return 0; +} +EXPORT_SYMBOL(devm_request_threaded_irq); + +/** + * devm_request_any_context_irq - allocate an interrupt line for a managed device + * @dev: device to request interrupt for + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs + * @irqflags: Interrupt type flags + * @devname: An ascii name for the claiming device, dev_name(dev) if NULL + * @dev_id: A cookie passed back to the handler function + * + * Except for the extra @dev argument, this function takes the + * same arguments and performs the same function as + * request_any_context_irq(). IRQs requested with this function will be + * automatically freed on driver detach. + * + * If an IRQ allocated with this function needs to be freed + * separately, devm_free_irq() must be used. + */ +int devm_request_any_context_irq(struct device *dev, unsigned int irq, + irq_handler_t handler, unsigned long irqflags, + const char *devname, void *dev_id) +{ + struct irq_devres *dr; + int rc; + + dr = devres_alloc(devm_irq_release, sizeof(struct irq_devres), + GFP_KERNEL); + if (!dr) + return -ENOMEM; + + if (!devname) + devname = dev_name(dev); + + rc = request_any_context_irq(irq, handler, irqflags, devname, dev_id); + if (rc < 0) { + devres_free(dr); + return rc; + } + + dr->irq = irq; + dr->dev_id = dev_id; + devres_add(dev, dr); + + return rc; +} +EXPORT_SYMBOL(devm_request_any_context_irq); + +/** + * devm_free_irq - free an interrupt + * @dev: device to free interrupt for + * @irq: Interrupt line to free + * @dev_id: Device identity to free + * + * Except for the extra @dev argument, this function takes the + * same arguments and performs the same function as free_irq(). + * This function instead of free_irq() should be used to manually + * free IRQs allocated with devm_request_irq(). + */ +void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id) +{ + struct irq_devres match_data = { irq, dev_id }; + + WARN_ON(devres_destroy(dev, devm_irq_release, devm_irq_match, + &match_data)); + free_irq(irq, dev_id); +} +EXPORT_SYMBOL(devm_free_irq); + +struct irq_desc_devres { + unsigned int from; + unsigned int cnt; +}; + +static void devm_irq_desc_release(struct device *dev, void *res) +{ + struct irq_desc_devres *this = res; + + irq_free_descs(this->from, this->cnt); +} + +/** + * __devm_irq_alloc_descs - Allocate and initialize a range of irq descriptors + * for a managed device + * @dev: Device to allocate the descriptors for + * @irq: Allocate for specific irq number if irq >= 0 + * @from: Start the search from this irq number + * @cnt: Number of consecutive irqs to allocate + * @node: Preferred node on which the irq descriptor should be allocated + * @owner: Owning module (can be NULL) + * @affinity: Optional pointer to an irq_affinity_desc array of size @cnt + * which hints where the irq descriptors should be allocated + * and which default affinities to use + * + * Returns the first irq number or error code. + * + * Note: Use the provided wrappers (devm_irq_alloc_desc*) for simplicity. + */ +int __devm_irq_alloc_descs(struct device *dev, int irq, unsigned int from, + unsigned int cnt, int node, struct module *owner, + const struct irq_affinity_desc *affinity) +{ + struct irq_desc_devres *dr; + int base; + + dr = devres_alloc(devm_irq_desc_release, sizeof(*dr), GFP_KERNEL); + if (!dr) + return -ENOMEM; + + base = __irq_alloc_descs(irq, from, cnt, node, owner, affinity); + if (base < 0) { + devres_free(dr); + return base; + } + + dr->from = base; + dr->cnt = cnt; + devres_add(dev, dr); + + return base; +} +EXPORT_SYMBOL_GPL(__devm_irq_alloc_descs); + +#ifdef CONFIG_GENERIC_IRQ_CHIP +/** + * devm_irq_alloc_generic_chip - Allocate and initialize a generic chip + * for a managed device + * @dev: Device to allocate the generic chip for + * @name: Name of the irq chip + * @num_ct: Number of irq_chip_type instances associated with this + * @irq_base: Interrupt base nr for this chip + * @reg_base: Register base address (virtual) + * @handler: Default flow handler associated with this chip + * + * Returns an initialized irq_chip_generic structure. The chip defaults + * to the primary (index 0) irq_chip_type and @handler + */ +struct irq_chip_generic * +devm_irq_alloc_generic_chip(struct device *dev, const char *name, int num_ct, + unsigned int irq_base, void __iomem *reg_base, + irq_flow_handler_t handler) +{ + struct irq_chip_generic *gc; + + gc = devm_kzalloc(dev, struct_size(gc, chip_types, num_ct), GFP_KERNEL); + if (gc) + irq_init_generic_chip(gc, name, num_ct, + irq_base, reg_base, handler); + + return gc; +} +EXPORT_SYMBOL_GPL(devm_irq_alloc_generic_chip); + +struct irq_generic_chip_devres { + struct irq_chip_generic *gc; + u32 msk; + unsigned int clr; + unsigned int set; +}; + +static void devm_irq_remove_generic_chip(struct device *dev, void *res) +{ + struct irq_generic_chip_devres *this = res; + + irq_remove_generic_chip(this->gc, this->msk, this->clr, this->set); +} + +/** + * devm_irq_setup_generic_chip - Setup a range of interrupts with a generic + * chip for a managed device + * + * @dev: Device to setup the generic chip for + * @gc: Generic irq chip holding all data + * @msk: Bitmask holding the irqs to initialize relative to gc->irq_base + * @flags: Flags for initialization + * @clr: IRQ_* bits to clear + * @set: IRQ_* bits to set + * + * Set up max. 32 interrupts starting from gc->irq_base. Note, this + * initializes all interrupts to the primary irq_chip_type and its + * associated handler. + */ +int devm_irq_setup_generic_chip(struct device *dev, struct irq_chip_generic *gc, + u32 msk, enum irq_gc_flags flags, + unsigned int clr, unsigned int set) +{ + struct irq_generic_chip_devres *dr; + + dr = devres_alloc(devm_irq_remove_generic_chip, + sizeof(*dr), GFP_KERNEL); + if (!dr) + return -ENOMEM; + + irq_setup_generic_chip(gc, msk, flags, clr, set); + + dr->gc = gc; + dr->msk = msk; + dr->clr = clr; + dr->set = set; + devres_add(dev, dr); + + return 0; +} +EXPORT_SYMBOL_GPL(devm_irq_setup_generic_chip); +#endif /* CONFIG_GENERIC_IRQ_CHIP */ diff --git a/kernel/irq/dummychip.c b/kernel/irq/dummychip.c new file mode 100644 index 0000000000..7fe6cffe7d --- /dev/null +++ b/kernel/irq/dummychip.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar + * Copyright (C) 2005-2006, Thomas Gleixner, Russell King + * + * This file contains the dummy interrupt chip implementation + */ +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/export.h> + +#include "internals.h" + +/* + * What should we do if we get a hw irq event on an illegal vector? + * Each architecture has to answer this themselves. + */ +static void ack_bad(struct irq_data *data) +{ + struct irq_desc *desc = irq_data_to_desc(data); + + print_irq_desc(data->irq, desc); + ack_bad_irq(data->irq); +} + +/* + * NOP functions + */ +static void noop(struct irq_data *data) { } + +static unsigned int noop_ret(struct irq_data *data) +{ + return 0; +} + +/* + * Generic no controller implementation + */ +struct irq_chip no_irq_chip = { + .name = "none", + .irq_startup = noop_ret, + .irq_shutdown = noop, + .irq_enable = noop, + .irq_disable = noop, + .irq_ack = ack_bad, + .flags = IRQCHIP_SKIP_SET_WAKE, +}; + +/* + * Generic dummy implementation which can be used for + * real dumb interrupt sources + */ +struct irq_chip dummy_irq_chip = { + .name = "dummy", + .irq_startup = noop_ret, + .irq_shutdown = noop, + .irq_enable = noop, + .irq_disable = noop, + .irq_ack = noop, + .irq_mask = noop, + .irq_unmask = noop, + .flags = IRQCHIP_SKIP_SET_WAKE, +}; +EXPORT_SYMBOL_GPL(dummy_irq_chip); diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c new file mode 100644 index 0000000000..5a452b94b6 --- /dev/null +++ b/kernel/irq/generic-chip.c @@ -0,0 +1,668 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Library implementing the most common irq chip callback functions + * + * Copyright (C) 2011, Thomas Gleixner + */ +#include <linux/io.h> +#include <linux/irq.h> +#include <linux/slab.h> +#include <linux/export.h> +#include <linux/irqdomain.h> +#include <linux/interrupt.h> +#include <linux/kernel_stat.h> +#include <linux/syscore_ops.h> + +#include "internals.h" + +static LIST_HEAD(gc_list); +static DEFINE_RAW_SPINLOCK(gc_lock); + +/** + * irq_gc_noop - NOOP function + * @d: irq_data + */ +void irq_gc_noop(struct irq_data *d) +{ +} +EXPORT_SYMBOL_GPL(irq_gc_noop); + +/** + * irq_gc_mask_disable_reg - Mask chip via disable register + * @d: irq_data + * + * Chip has separate enable/disable registers instead of a single mask + * register. + */ +void irq_gc_mask_disable_reg(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct irq_chip_type *ct = irq_data_get_chip_type(d); + u32 mask = d->mask; + + irq_gc_lock(gc); + irq_reg_writel(gc, mask, ct->regs.disable); + *ct->mask_cache &= ~mask; + irq_gc_unlock(gc); +} +EXPORT_SYMBOL_GPL(irq_gc_mask_disable_reg); + +/** + * irq_gc_mask_set_bit - Mask chip via setting bit in mask register + * @d: irq_data + * + * Chip has a single mask register. Values of this register are cached + * and protected by gc->lock + */ +void irq_gc_mask_set_bit(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct irq_chip_type *ct = irq_data_get_chip_type(d); + u32 mask = d->mask; + + irq_gc_lock(gc); + *ct->mask_cache |= mask; + irq_reg_writel(gc, *ct->mask_cache, ct->regs.mask); + irq_gc_unlock(gc); +} +EXPORT_SYMBOL_GPL(irq_gc_mask_set_bit); + +/** + * irq_gc_mask_clr_bit - Mask chip via clearing bit in mask register + * @d: irq_data + * + * Chip has a single mask register. Values of this register are cached + * and protected by gc->lock + */ +void irq_gc_mask_clr_bit(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct irq_chip_type *ct = irq_data_get_chip_type(d); + u32 mask = d->mask; + + irq_gc_lock(gc); + *ct->mask_cache &= ~mask; + irq_reg_writel(gc, *ct->mask_cache, ct->regs.mask); + irq_gc_unlock(gc); +} +EXPORT_SYMBOL_GPL(irq_gc_mask_clr_bit); + +/** + * irq_gc_unmask_enable_reg - Unmask chip via enable register + * @d: irq_data + * + * Chip has separate enable/disable registers instead of a single mask + * register. + */ +void irq_gc_unmask_enable_reg(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct irq_chip_type *ct = irq_data_get_chip_type(d); + u32 mask = d->mask; + + irq_gc_lock(gc); + irq_reg_writel(gc, mask, ct->regs.enable); + *ct->mask_cache |= mask; + irq_gc_unlock(gc); +} +EXPORT_SYMBOL_GPL(irq_gc_unmask_enable_reg); + +/** + * irq_gc_ack_set_bit - Ack pending interrupt via setting bit + * @d: irq_data + */ +void irq_gc_ack_set_bit(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct irq_chip_type *ct = irq_data_get_chip_type(d); + u32 mask = d->mask; + + irq_gc_lock(gc); + irq_reg_writel(gc, mask, ct->regs.ack); + irq_gc_unlock(gc); +} +EXPORT_SYMBOL_GPL(irq_gc_ack_set_bit); + +/** + * irq_gc_ack_clr_bit - Ack pending interrupt via clearing bit + * @d: irq_data + */ +void irq_gc_ack_clr_bit(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct irq_chip_type *ct = irq_data_get_chip_type(d); + u32 mask = ~d->mask; + + irq_gc_lock(gc); + irq_reg_writel(gc, mask, ct->regs.ack); + irq_gc_unlock(gc); +} + +/** + * irq_gc_mask_disable_and_ack_set - Mask and ack pending interrupt + * @d: irq_data + * + * This generic implementation of the irq_mask_ack method is for chips + * with separate enable/disable registers instead of a single mask + * register and where a pending interrupt is acknowledged by setting a + * bit. + * + * Note: This is the only permutation currently used. Similar generic + * functions should be added here if other permutations are required. + */ +void irq_gc_mask_disable_and_ack_set(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct irq_chip_type *ct = irq_data_get_chip_type(d); + u32 mask = d->mask; + + irq_gc_lock(gc); + irq_reg_writel(gc, mask, ct->regs.disable); + *ct->mask_cache &= ~mask; + irq_reg_writel(gc, mask, ct->regs.ack); + irq_gc_unlock(gc); +} + +/** + * irq_gc_eoi - EOI interrupt + * @d: irq_data + */ +void irq_gc_eoi(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct irq_chip_type *ct = irq_data_get_chip_type(d); + u32 mask = d->mask; + + irq_gc_lock(gc); + irq_reg_writel(gc, mask, ct->regs.eoi); + irq_gc_unlock(gc); +} + +/** + * irq_gc_set_wake - Set/clr wake bit for an interrupt + * @d: irq_data + * @on: Indicates whether the wake bit should be set or cleared + * + * For chips where the wake from suspend functionality is not + * configured in a separate register and the wakeup active state is + * just stored in a bitmask. + */ +int irq_gc_set_wake(struct irq_data *d, unsigned int on) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + u32 mask = d->mask; + + if (!(mask & gc->wake_enabled)) + return -EINVAL; + + irq_gc_lock(gc); + if (on) + gc->wake_active |= mask; + else + gc->wake_active &= ~mask; + irq_gc_unlock(gc); + return 0; +} +EXPORT_SYMBOL_GPL(irq_gc_set_wake); + +static u32 irq_readl_be(void __iomem *addr) +{ + return ioread32be(addr); +} + +static void irq_writel_be(u32 val, void __iomem *addr) +{ + iowrite32be(val, addr); +} + +void irq_init_generic_chip(struct irq_chip_generic *gc, const char *name, + int num_ct, unsigned int irq_base, + void __iomem *reg_base, irq_flow_handler_t handler) +{ + raw_spin_lock_init(&gc->lock); + gc->num_ct = num_ct; + gc->irq_base = irq_base; + gc->reg_base = reg_base; + gc->chip_types->chip.name = name; + gc->chip_types->handler = handler; +} + +/** + * irq_alloc_generic_chip - Allocate a generic chip and initialize it + * @name: Name of the irq chip + * @num_ct: Number of irq_chip_type instances associated with this + * @irq_base: Interrupt base nr for this chip + * @reg_base: Register base address (virtual) + * @handler: Default flow handler associated with this chip + * + * Returns an initialized irq_chip_generic structure. The chip defaults + * to the primary (index 0) irq_chip_type and @handler + */ +struct irq_chip_generic * +irq_alloc_generic_chip(const char *name, int num_ct, unsigned int irq_base, + void __iomem *reg_base, irq_flow_handler_t handler) +{ + struct irq_chip_generic *gc; + + gc = kzalloc(struct_size(gc, chip_types, num_ct), GFP_KERNEL); + if (gc) { + irq_init_generic_chip(gc, name, num_ct, irq_base, reg_base, + handler); + } + return gc; +} +EXPORT_SYMBOL_GPL(irq_alloc_generic_chip); + +static void +irq_gc_init_mask_cache(struct irq_chip_generic *gc, enum irq_gc_flags flags) +{ + struct irq_chip_type *ct = gc->chip_types; + u32 *mskptr = &gc->mask_cache, mskreg = ct->regs.mask; + int i; + + for (i = 0; i < gc->num_ct; i++) { + if (flags & IRQ_GC_MASK_CACHE_PER_TYPE) { + mskptr = &ct[i].mask_cache_priv; + mskreg = ct[i].regs.mask; + } + ct[i].mask_cache = mskptr; + if (flags & IRQ_GC_INIT_MASK_CACHE) + *mskptr = irq_reg_readl(gc, mskreg); + } +} + +/** + * __irq_alloc_domain_generic_chips - Allocate generic chips for an irq domain + * @d: irq domain for which to allocate chips + * @irqs_per_chip: Number of interrupts each chip handles (max 32) + * @num_ct: Number of irq_chip_type instances associated with this + * @name: Name of the irq chip + * @handler: Default flow handler associated with these chips + * @clr: IRQ_* bits to clear in the mapping function + * @set: IRQ_* bits to set in the mapping function + * @gcflags: Generic chip specific setup flags + */ +int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, + int num_ct, const char *name, + irq_flow_handler_t handler, + unsigned int clr, unsigned int set, + enum irq_gc_flags gcflags) +{ + struct irq_domain_chip_generic *dgc; + struct irq_chip_generic *gc; + unsigned long flags; + int numchips, i; + size_t dgc_sz; + size_t gc_sz; + size_t sz; + void *tmp; + + if (d->gc) + return -EBUSY; + + numchips = DIV_ROUND_UP(d->revmap_size, irqs_per_chip); + if (!numchips) + return -EINVAL; + + /* Allocate a pointer, generic chip and chiptypes for each chip */ + gc_sz = struct_size(gc, chip_types, num_ct); + dgc_sz = struct_size(dgc, gc, numchips); + sz = dgc_sz + numchips * gc_sz; + + tmp = dgc = kzalloc(sz, GFP_KERNEL); + if (!dgc) + return -ENOMEM; + dgc->irqs_per_chip = irqs_per_chip; + dgc->num_chips = numchips; + dgc->irq_flags_to_set = set; + dgc->irq_flags_to_clear = clr; + dgc->gc_flags = gcflags; + d->gc = dgc; + + /* Calc pointer to the first generic chip */ + tmp += dgc_sz; + for (i = 0; i < numchips; i++) { + /* Store the pointer to the generic chip */ + dgc->gc[i] = gc = tmp; + irq_init_generic_chip(gc, name, num_ct, i * irqs_per_chip, + NULL, handler); + + gc->domain = d; + if (gcflags & IRQ_GC_BE_IO) { + gc->reg_readl = &irq_readl_be; + gc->reg_writel = &irq_writel_be; + } + + raw_spin_lock_irqsave(&gc_lock, flags); + list_add_tail(&gc->list, &gc_list); + raw_spin_unlock_irqrestore(&gc_lock, flags); + /* Calc pointer to the next generic chip */ + tmp += gc_sz; + } + return 0; +} +EXPORT_SYMBOL_GPL(__irq_alloc_domain_generic_chips); + +static struct irq_chip_generic * +__irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq) +{ + struct irq_domain_chip_generic *dgc = d->gc; + int idx; + + if (!dgc) + return ERR_PTR(-ENODEV); + idx = hw_irq / dgc->irqs_per_chip; + if (idx >= dgc->num_chips) + return ERR_PTR(-EINVAL); + return dgc->gc[idx]; +} + +/** + * irq_get_domain_generic_chip - Get a pointer to the generic chip of a hw_irq + * @d: irq domain pointer + * @hw_irq: Hardware interrupt number + */ +struct irq_chip_generic * +irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq) +{ + struct irq_chip_generic *gc = __irq_get_domain_generic_chip(d, hw_irq); + + return !IS_ERR(gc) ? gc : NULL; +} +EXPORT_SYMBOL_GPL(irq_get_domain_generic_chip); + +/* + * Separate lockdep classes for interrupt chip which can nest irq_desc + * lock and request mutex. + */ +static struct lock_class_key irq_nested_lock_class; +static struct lock_class_key irq_nested_request_class; + +/* + * irq_map_generic_chip - Map a generic chip for an irq domain + */ +int irq_map_generic_chip(struct irq_domain *d, unsigned int virq, + irq_hw_number_t hw_irq) +{ + struct irq_data *data = irq_domain_get_irq_data(d, virq); + struct irq_domain_chip_generic *dgc = d->gc; + struct irq_chip_generic *gc; + struct irq_chip_type *ct; + struct irq_chip *chip; + unsigned long flags; + int idx; + + gc = __irq_get_domain_generic_chip(d, hw_irq); + if (IS_ERR(gc)) + return PTR_ERR(gc); + + idx = hw_irq % dgc->irqs_per_chip; + + if (test_bit(idx, &gc->unused)) + return -ENOTSUPP; + + if (test_bit(idx, &gc->installed)) + return -EBUSY; + + ct = gc->chip_types; + chip = &ct->chip; + + /* We only init the cache for the first mapping of a generic chip */ + if (!gc->installed) { + raw_spin_lock_irqsave(&gc->lock, flags); + irq_gc_init_mask_cache(gc, dgc->gc_flags); + raw_spin_unlock_irqrestore(&gc->lock, flags); + } + + /* Mark the interrupt as installed */ + set_bit(idx, &gc->installed); + + if (dgc->gc_flags & IRQ_GC_INIT_NESTED_LOCK) + irq_set_lockdep_class(virq, &irq_nested_lock_class, + &irq_nested_request_class); + + if (chip->irq_calc_mask) + chip->irq_calc_mask(data); + else + data->mask = 1 << idx; + + irq_domain_set_info(d, virq, hw_irq, chip, gc, ct->handler, NULL, NULL); + irq_modify_status(virq, dgc->irq_flags_to_clear, dgc->irq_flags_to_set); + return 0; +} + +void irq_unmap_generic_chip(struct irq_domain *d, unsigned int virq) +{ + struct irq_data *data = irq_domain_get_irq_data(d, virq); + struct irq_domain_chip_generic *dgc = d->gc; + unsigned int hw_irq = data->hwirq; + struct irq_chip_generic *gc; + int irq_idx; + + gc = irq_get_domain_generic_chip(d, hw_irq); + if (!gc) + return; + + irq_idx = hw_irq % dgc->irqs_per_chip; + + clear_bit(irq_idx, &gc->installed); + irq_domain_set_info(d, virq, hw_irq, &no_irq_chip, NULL, NULL, NULL, + NULL); + +} + +const struct irq_domain_ops irq_generic_chip_ops = { + .map = irq_map_generic_chip, + .unmap = irq_unmap_generic_chip, + .xlate = irq_domain_xlate_onetwocell, +}; +EXPORT_SYMBOL_GPL(irq_generic_chip_ops); + +/** + * irq_setup_generic_chip - Setup a range of interrupts with a generic chip + * @gc: Generic irq chip holding all data + * @msk: Bitmask holding the irqs to initialize relative to gc->irq_base + * @flags: Flags for initialization + * @clr: IRQ_* bits to clear + * @set: IRQ_* bits to set + * + * Set up max. 32 interrupts starting from gc->irq_base. Note, this + * initializes all interrupts to the primary irq_chip_type and its + * associated handler. + */ +void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk, + enum irq_gc_flags flags, unsigned int clr, + unsigned int set) +{ + struct irq_chip_type *ct = gc->chip_types; + struct irq_chip *chip = &ct->chip; + unsigned int i; + + raw_spin_lock(&gc_lock); + list_add_tail(&gc->list, &gc_list); + raw_spin_unlock(&gc_lock); + + irq_gc_init_mask_cache(gc, flags); + + for (i = gc->irq_base; msk; msk >>= 1, i++) { + if (!(msk & 0x01)) + continue; + + if (flags & IRQ_GC_INIT_NESTED_LOCK) + irq_set_lockdep_class(i, &irq_nested_lock_class, + &irq_nested_request_class); + + if (!(flags & IRQ_GC_NO_MASK)) { + struct irq_data *d = irq_get_irq_data(i); + + if (chip->irq_calc_mask) + chip->irq_calc_mask(d); + else + d->mask = 1 << (i - gc->irq_base); + } + irq_set_chip_and_handler(i, chip, ct->handler); + irq_set_chip_data(i, gc); + irq_modify_status(i, clr, set); + } + gc->irq_cnt = i - gc->irq_base; +} +EXPORT_SYMBOL_GPL(irq_setup_generic_chip); + +/** + * irq_setup_alt_chip - Switch to alternative chip + * @d: irq_data for this interrupt + * @type: Flow type to be initialized + * + * Only to be called from chip->irq_set_type() callbacks. + */ +int irq_setup_alt_chip(struct irq_data *d, unsigned int type) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct irq_chip_type *ct = gc->chip_types; + unsigned int i; + + for (i = 0; i < gc->num_ct; i++, ct++) { + if (ct->type & type) { + d->chip = &ct->chip; + irq_data_to_desc(d)->handle_irq = ct->handler; + return 0; + } + } + return -EINVAL; +} +EXPORT_SYMBOL_GPL(irq_setup_alt_chip); + +/** + * irq_remove_generic_chip - Remove a chip + * @gc: Generic irq chip holding all data + * @msk: Bitmask holding the irqs to initialize relative to gc->irq_base + * @clr: IRQ_* bits to clear + * @set: IRQ_* bits to set + * + * Remove up to 32 interrupts starting from gc->irq_base. + */ +void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk, + unsigned int clr, unsigned int set) +{ + unsigned int i, virq; + + raw_spin_lock(&gc_lock); + list_del(&gc->list); + raw_spin_unlock(&gc_lock); + + for (i = 0; msk; msk >>= 1, i++) { + if (!(msk & 0x01)) + continue; + + /* + * Interrupt domain based chips store the base hardware + * interrupt number in gc::irq_base. Otherwise gc::irq_base + * contains the base Linux interrupt number. + */ + if (gc->domain) { + virq = irq_find_mapping(gc->domain, gc->irq_base + i); + if (!virq) + continue; + } else { + virq = gc->irq_base + i; + } + + /* Remove handler first. That will mask the irq line */ + irq_set_handler(virq, NULL); + irq_set_chip(virq, &no_irq_chip); + irq_set_chip_data(virq, NULL); + irq_modify_status(virq, clr, set); + } +} +EXPORT_SYMBOL_GPL(irq_remove_generic_chip); + +static struct irq_data *irq_gc_get_irq_data(struct irq_chip_generic *gc) +{ + unsigned int virq; + + if (!gc->domain) + return irq_get_irq_data(gc->irq_base); + + /* + * We don't know which of the irqs has been actually + * installed. Use the first one. + */ + if (!gc->installed) + return NULL; + + virq = irq_find_mapping(gc->domain, gc->irq_base + __ffs(gc->installed)); + return virq ? irq_get_irq_data(virq) : NULL; +} + +#ifdef CONFIG_PM +static int irq_gc_suspend(void) +{ + struct irq_chip_generic *gc; + + list_for_each_entry(gc, &gc_list, list) { + struct irq_chip_type *ct = gc->chip_types; + + if (ct->chip.irq_suspend) { + struct irq_data *data = irq_gc_get_irq_data(gc); + + if (data) + ct->chip.irq_suspend(data); + } + + if (gc->suspend) + gc->suspend(gc); + } + return 0; +} + +static void irq_gc_resume(void) +{ + struct irq_chip_generic *gc; + + list_for_each_entry(gc, &gc_list, list) { + struct irq_chip_type *ct = gc->chip_types; + + if (gc->resume) + gc->resume(gc); + + if (ct->chip.irq_resume) { + struct irq_data *data = irq_gc_get_irq_data(gc); + + if (data) + ct->chip.irq_resume(data); + } + } +} +#else +#define irq_gc_suspend NULL +#define irq_gc_resume NULL +#endif + +static void irq_gc_shutdown(void) +{ + struct irq_chip_generic *gc; + + list_for_each_entry(gc, &gc_list, list) { + struct irq_chip_type *ct = gc->chip_types; + + if (ct->chip.irq_pm_shutdown) { + struct irq_data *data = irq_gc_get_irq_data(gc); + + if (data) + ct->chip.irq_pm_shutdown(data); + } + } +} + +static struct syscore_ops irq_gc_syscore_ops = { + .suspend = irq_gc_suspend, + .resume = irq_gc_resume, + .shutdown = irq_gc_shutdown, +}; + +static int __init irq_gc_init_ops(void) +{ + register_syscore_ops(&irq_gc_syscore_ops); + return 0; +} +device_initcall(irq_gc_init_ops); diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c new file mode 100644 index 0000000000..9489f93b3d --- /dev/null +++ b/kernel/irq/handle.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar + * Copyright (C) 2005-2006, Thomas Gleixner, Russell King + * + * This file contains the core interrupt handling code. Detailed + * information is available in Documentation/core-api/genericirq.rst + * + */ + +#include <linux/irq.h> +#include <linux/random.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/kernel_stat.h> + +#include <asm/irq_regs.h> + +#include <trace/events/irq.h> + +#include "internals.h" + +#ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER +void (*handle_arch_irq)(struct pt_regs *) __ro_after_init; +#endif + +/** + * handle_bad_irq - handle spurious and unhandled irqs + * @desc: description of the interrupt + * + * Handles spurious and unhandled IRQ's. It also prints a debugmessage. + */ +void handle_bad_irq(struct irq_desc *desc) +{ + unsigned int irq = irq_desc_get_irq(desc); + + print_irq_desc(irq, desc); + kstat_incr_irqs_this_cpu(desc); + ack_bad_irq(irq); +} +EXPORT_SYMBOL_GPL(handle_bad_irq); + +/* + * Special, empty irq handler: + */ +irqreturn_t no_action(int cpl, void *dev_id) +{ + return IRQ_NONE; +} +EXPORT_SYMBOL_GPL(no_action); + +static void warn_no_thread(unsigned int irq, struct irqaction *action) +{ + if (test_and_set_bit(IRQTF_WARNED, &action->thread_flags)) + return; + + printk(KERN_WARNING "IRQ %d device %s returned IRQ_WAKE_THREAD " + "but no thread function available.", irq, action->name); +} + +void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action) +{ + /* + * In case the thread crashed and was killed we just pretend that + * we handled the interrupt. The hardirq handler has disabled the + * device interrupt, so no irq storm is lurking. + */ + if (action->thread->flags & PF_EXITING) + return; + + /* + * Wake up the handler thread for this action. If the + * RUNTHREAD bit is already set, nothing to do. + */ + if (test_and_set_bit(IRQTF_RUNTHREAD, &action->thread_flags)) + return; + + /* + * It's safe to OR the mask lockless here. We have only two + * places which write to threads_oneshot: This code and the + * irq thread. + * + * This code is the hard irq context and can never run on two + * cpus in parallel. If it ever does we have more serious + * problems than this bitmask. + * + * The irq threads of this irq which clear their "running" bit + * in threads_oneshot are serialized via desc->lock against + * each other and they are serialized against this code by + * IRQS_INPROGRESS. + * + * Hard irq handler: + * + * spin_lock(desc->lock); + * desc->state |= IRQS_INPROGRESS; + * spin_unlock(desc->lock); + * set_bit(IRQTF_RUNTHREAD, &action->thread_flags); + * desc->threads_oneshot |= mask; + * spin_lock(desc->lock); + * desc->state &= ~IRQS_INPROGRESS; + * spin_unlock(desc->lock); + * + * irq thread: + * + * again: + * spin_lock(desc->lock); + * if (desc->state & IRQS_INPROGRESS) { + * spin_unlock(desc->lock); + * while(desc->state & IRQS_INPROGRESS) + * cpu_relax(); + * goto again; + * } + * if (!test_bit(IRQTF_RUNTHREAD, &action->thread_flags)) + * desc->threads_oneshot &= ~mask; + * spin_unlock(desc->lock); + * + * So either the thread waits for us to clear IRQS_INPROGRESS + * or we are waiting in the flow handler for desc->lock to be + * released before we reach this point. The thread also checks + * IRQTF_RUNTHREAD under desc->lock. If set it leaves + * threads_oneshot untouched and runs the thread another time. + */ + desc->threads_oneshot |= action->thread_mask; + + /* + * We increment the threads_active counter in case we wake up + * the irq thread. The irq thread decrements the counter when + * it returns from the handler or in the exit path and wakes + * up waiters which are stuck in synchronize_irq() when the + * active count becomes zero. synchronize_irq() is serialized + * against this code (hard irq handler) via IRQS_INPROGRESS + * like the finalize_oneshot() code. See comment above. + */ + atomic_inc(&desc->threads_active); + + wake_up_process(action->thread); +} + +irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc) +{ + irqreturn_t retval = IRQ_NONE; + unsigned int irq = desc->irq_data.irq; + struct irqaction *action; + + record_irq_time(desc); + + for_each_action_of_desc(desc, action) { + irqreturn_t res; + + /* + * If this IRQ would be threaded under force_irqthreads, mark it so. + */ + if (irq_settings_can_thread(desc) && + !(action->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT))) + lockdep_hardirq_threaded(); + + trace_irq_handler_entry(irq, action); + res = action->handler(irq, action->dev_id); + trace_irq_handler_exit(irq, action, res); + + if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pS enabled interrupts\n", + irq, action->handler)) + local_irq_disable(); + + switch (res) { + case IRQ_WAKE_THREAD: + /* + * Catch drivers which return WAKE_THREAD but + * did not set up a thread function + */ + if (unlikely(!action->thread_fn)) { + warn_no_thread(irq, action); + break; + } + + __irq_wake_thread(desc, action); + break; + + default: + break; + } + + retval |= res; + } + + return retval; +} + +irqreturn_t handle_irq_event_percpu(struct irq_desc *desc) +{ + irqreturn_t retval; + + retval = __handle_irq_event_percpu(desc); + + add_interrupt_randomness(desc->irq_data.irq); + + if (!irq_settings_no_debug(desc)) + note_interrupt(desc, retval); + return retval; +} + +irqreturn_t handle_irq_event(struct irq_desc *desc) +{ + irqreturn_t ret; + + desc->istate &= ~IRQS_PENDING; + irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS); + raw_spin_unlock(&desc->lock); + + ret = handle_irq_event_percpu(desc); + + raw_spin_lock(&desc->lock); + irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS); + return ret; +} + +#ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER +int __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) +{ + if (handle_arch_irq) + return -EBUSY; + + handle_arch_irq = handle_irq; + return 0; +} + +/** + * generic_handle_arch_irq - root irq handler for architectures which do no + * entry accounting themselves + * @regs: Register file coming from the low-level handling code + */ +asmlinkage void noinstr generic_handle_arch_irq(struct pt_regs *regs) +{ + struct pt_regs *old_regs; + + irq_enter(); + old_regs = set_irq_regs(regs); + handle_arch_irq(regs); + set_irq_regs(old_regs); + irq_exit(); +} +#endif diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h new file mode 100644 index 0000000000..bcc7f21db9 --- /dev/null +++ b/kernel/irq/internals.h @@ -0,0 +1,521 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * IRQ subsystem internal functions and variables: + * + * Do not ever include this file from anything else than + * kernel/irq/. Do not even think about using any information outside + * of this file for your non core code. + */ +#include <linux/irqdesc.h> +#include <linux/kernel_stat.h> +#include <linux/pm_runtime.h> +#include <linux/sched/clock.h> + +#ifdef CONFIG_SPARSE_IRQ +# define MAX_SPARSE_IRQS INT_MAX +#else +# define MAX_SPARSE_IRQS NR_IRQS +#endif + +#define istate core_internal_state__do_not_mess_with_it + +extern bool noirqdebug; + +extern struct irqaction chained_action; + +/* + * Bits used by threaded handlers: + * IRQTF_RUNTHREAD - signals that the interrupt handler thread should run + * IRQTF_WARNED - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed + * IRQTF_AFFINITY - irq thread is requested to adjust affinity + * IRQTF_FORCED_THREAD - irq action is force threaded + * IRQTF_READY - signals that irq thread is ready + */ +enum { + IRQTF_RUNTHREAD, + IRQTF_WARNED, + IRQTF_AFFINITY, + IRQTF_FORCED_THREAD, + IRQTF_READY, +}; + +/* + * Bit masks for desc->core_internal_state__do_not_mess_with_it + * + * IRQS_AUTODETECT - autodetection in progress + * IRQS_SPURIOUS_DISABLED - was disabled due to spurious interrupt + * detection + * IRQS_POLL_INPROGRESS - polling in progress + * IRQS_ONESHOT - irq is not unmasked in primary handler + * IRQS_REPLAY - irq has been resent and will not be resent + * again until the handler has run and cleared + * this flag. + * IRQS_WAITING - irq is waiting + * IRQS_PENDING - irq needs to be resent and should be resent + * at the next available opportunity. + * IRQS_SUSPENDED - irq is suspended + * IRQS_NMI - irq line is used to deliver NMIs + * IRQS_SYSFS - descriptor has been added to sysfs + */ +enum { + IRQS_AUTODETECT = 0x00000001, + IRQS_SPURIOUS_DISABLED = 0x00000002, + IRQS_POLL_INPROGRESS = 0x00000008, + IRQS_ONESHOT = 0x00000020, + IRQS_REPLAY = 0x00000040, + IRQS_WAITING = 0x00000080, + IRQS_PENDING = 0x00000200, + IRQS_SUSPENDED = 0x00000800, + IRQS_TIMINGS = 0x00001000, + IRQS_NMI = 0x00002000, + IRQS_SYSFS = 0x00004000, +}; + +#include "debug.h" +#include "settings.h" + +extern int __irq_set_trigger(struct irq_desc *desc, unsigned long flags); +extern void __disable_irq(struct irq_desc *desc); +extern void __enable_irq(struct irq_desc *desc); + +#define IRQ_RESEND true +#define IRQ_NORESEND false + +#define IRQ_START_FORCE true +#define IRQ_START_COND false + +extern int irq_activate(struct irq_desc *desc); +extern int irq_activate_and_startup(struct irq_desc *desc, bool resend); +extern int irq_startup(struct irq_desc *desc, bool resend, bool force); + +extern void irq_shutdown(struct irq_desc *desc); +extern void irq_shutdown_and_deactivate(struct irq_desc *desc); +extern void irq_enable(struct irq_desc *desc); +extern void irq_disable(struct irq_desc *desc); +extern void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu); +extern void irq_percpu_disable(struct irq_desc *desc, unsigned int cpu); +extern void mask_irq(struct irq_desc *desc); +extern void unmask_irq(struct irq_desc *desc); +extern void unmask_threaded_irq(struct irq_desc *desc); + +#ifdef CONFIG_SPARSE_IRQ +static inline void irq_mark_irq(unsigned int irq) { } +#else +extern void irq_mark_irq(unsigned int irq); +#endif + +extern int __irq_get_irqchip_state(struct irq_data *data, + enum irqchip_irq_state which, + bool *state); + +irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc); +irqreturn_t handle_irq_event_percpu(struct irq_desc *desc); +irqreturn_t handle_irq_event(struct irq_desc *desc); + +/* Resending of interrupts :*/ +int check_irq_resend(struct irq_desc *desc, bool inject); +void clear_irq_resend(struct irq_desc *desc); +void irq_resend_init(struct irq_desc *desc); +bool irq_wait_for_poll(struct irq_desc *desc); +void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action); + +void wake_threads_waitq(struct irq_desc *desc); + +#ifdef CONFIG_PROC_FS +extern void register_irq_proc(unsigned int irq, struct irq_desc *desc); +extern void unregister_irq_proc(unsigned int irq, struct irq_desc *desc); +extern void register_handler_proc(unsigned int irq, struct irqaction *action); +extern void unregister_handler_proc(unsigned int irq, struct irqaction *action); +#else +static inline void register_irq_proc(unsigned int irq, struct irq_desc *desc) { } +static inline void unregister_irq_proc(unsigned int irq, struct irq_desc *desc) { } +static inline void register_handler_proc(unsigned int irq, + struct irqaction *action) { } +static inline void unregister_handler_proc(unsigned int irq, + struct irqaction *action) { } +#endif + +extern bool irq_can_set_affinity_usr(unsigned int irq); + +extern void irq_set_thread_affinity(struct irq_desc *desc); + +extern int irq_do_set_affinity(struct irq_data *data, + const struct cpumask *dest, bool force); + +#ifdef CONFIG_SMP +extern int irq_setup_affinity(struct irq_desc *desc); +#else +static inline int irq_setup_affinity(struct irq_desc *desc) { return 0; } +#endif + +/* Inline functions for support of irq chips on slow busses */ +static inline void chip_bus_lock(struct irq_desc *desc) +{ + if (unlikely(desc->irq_data.chip->irq_bus_lock)) + desc->irq_data.chip->irq_bus_lock(&desc->irq_data); +} + +static inline void chip_bus_sync_unlock(struct irq_desc *desc) +{ + if (unlikely(desc->irq_data.chip->irq_bus_sync_unlock)) + desc->irq_data.chip->irq_bus_sync_unlock(&desc->irq_data); +} + +#define _IRQ_DESC_CHECK (1 << 0) +#define _IRQ_DESC_PERCPU (1 << 1) + +#define IRQ_GET_DESC_CHECK_GLOBAL (_IRQ_DESC_CHECK) +#define IRQ_GET_DESC_CHECK_PERCPU (_IRQ_DESC_CHECK | _IRQ_DESC_PERCPU) + +#define for_each_action_of_desc(desc, act) \ + for (act = desc->action; act; act = act->next) + +struct irq_desc * +__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus, + unsigned int check); +void __irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags, bool bus); + +static inline struct irq_desc * +irq_get_desc_buslock(unsigned int irq, unsigned long *flags, unsigned int check) +{ + return __irq_get_desc_lock(irq, flags, true, check); +} + +static inline void +irq_put_desc_busunlock(struct irq_desc *desc, unsigned long flags) +{ + __irq_put_desc_unlock(desc, flags, true); +} + +static inline struct irq_desc * +irq_get_desc_lock(unsigned int irq, unsigned long *flags, unsigned int check) +{ + return __irq_get_desc_lock(irq, flags, false, check); +} + +static inline void +irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags) +{ + __irq_put_desc_unlock(desc, flags, false); +} + +#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) + +static inline unsigned int irqd_get(struct irq_data *d) +{ + return __irqd_to_state(d); +} + +/* + * Manipulation functions for irq_data.state + */ +static inline void irqd_set_move_pending(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_SETAFFINITY_PENDING; +} + +static inline void irqd_clr_move_pending(struct irq_data *d) +{ + __irqd_to_state(d) &= ~IRQD_SETAFFINITY_PENDING; +} + +static inline void irqd_set_managed_shutdown(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_MANAGED_SHUTDOWN; +} + +static inline void irqd_clr_managed_shutdown(struct irq_data *d) +{ + __irqd_to_state(d) &= ~IRQD_MANAGED_SHUTDOWN; +} + +static inline void irqd_clear(struct irq_data *d, unsigned int mask) +{ + __irqd_to_state(d) &= ~mask; +} + +static inline void irqd_set(struct irq_data *d, unsigned int mask) +{ + __irqd_to_state(d) |= mask; +} + +static inline bool irqd_has_set(struct irq_data *d, unsigned int mask) +{ + return __irqd_to_state(d) & mask; +} + +static inline void irq_state_set_disabled(struct irq_desc *desc) +{ + irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED); +} + +static inline void irq_state_set_masked(struct irq_desc *desc) +{ + irqd_set(&desc->irq_data, IRQD_IRQ_MASKED); +} + +#undef __irqd_to_state + +static inline void __kstat_incr_irqs_this_cpu(struct irq_desc *desc) +{ + __this_cpu_inc(*desc->kstat_irqs); + __this_cpu_inc(kstat.irqs_sum); +} + +static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc) +{ + __kstat_incr_irqs_this_cpu(desc); + desc->tot_count++; +} + +static inline int irq_desc_get_node(struct irq_desc *desc) +{ + return irq_common_data_get_node(&desc->irq_common_data); +} + +static inline int irq_desc_is_chained(struct irq_desc *desc) +{ + return (desc->action && desc->action == &chained_action); +} + +#ifdef CONFIG_PM_SLEEP +bool irq_pm_check_wakeup(struct irq_desc *desc); +void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action); +void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action); +#else +static inline bool irq_pm_check_wakeup(struct irq_desc *desc) { return false; } +static inline void +irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) { } +static inline void +irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action) { } +#endif + +#ifdef CONFIG_IRQ_TIMINGS + +#define IRQ_TIMINGS_SHIFT 5 +#define IRQ_TIMINGS_SIZE (1 << IRQ_TIMINGS_SHIFT) +#define IRQ_TIMINGS_MASK (IRQ_TIMINGS_SIZE - 1) + +/** + * struct irq_timings - irq timings storing structure + * @values: a circular buffer of u64 encoded <timestamp,irq> values + * @count: the number of elements in the array + */ +struct irq_timings { + u64 values[IRQ_TIMINGS_SIZE]; + int count; +}; + +DECLARE_PER_CPU(struct irq_timings, irq_timings); + +extern void irq_timings_free(int irq); +extern int irq_timings_alloc(int irq); + +static inline void irq_remove_timings(struct irq_desc *desc) +{ + desc->istate &= ~IRQS_TIMINGS; + + irq_timings_free(irq_desc_get_irq(desc)); +} + +static inline void irq_setup_timings(struct irq_desc *desc, struct irqaction *act) +{ + int irq = irq_desc_get_irq(desc); + int ret; + + /* + * We don't need the measurement because the idle code already + * knows the next expiry event. + */ + if (act->flags & __IRQF_TIMER) + return; + + /* + * In case the timing allocation fails, we just want to warn, + * not fail, so letting the system boot anyway. + */ + ret = irq_timings_alloc(irq); + if (ret) { + pr_warn("Failed to allocate irq timing stats for irq%d (%d)", + irq, ret); + return; + } + + desc->istate |= IRQS_TIMINGS; +} + +extern void irq_timings_enable(void); +extern void irq_timings_disable(void); + +DECLARE_STATIC_KEY_FALSE(irq_timing_enabled); + +/* + * The interrupt number and the timestamp are encoded into a single + * u64 variable to optimize the size. + * 48 bit time stamp and 16 bit IRQ number is way sufficient. + * Who cares an IRQ after 78 hours of idle time? + */ +static inline u64 irq_timing_encode(u64 timestamp, int irq) +{ + return (timestamp << 16) | irq; +} + +static inline int irq_timing_decode(u64 value, u64 *timestamp) +{ + *timestamp = value >> 16; + return value & U16_MAX; +} + +static __always_inline void irq_timings_push(u64 ts, int irq) +{ + struct irq_timings *timings = this_cpu_ptr(&irq_timings); + + timings->values[timings->count & IRQ_TIMINGS_MASK] = + irq_timing_encode(ts, irq); + + timings->count++; +} + +/* + * The function record_irq_time is only called in one place in the + * interrupts handler. We want this function always inline so the code + * inside is embedded in the function and the static key branching + * code can act at the higher level. Without the explicit + * __always_inline we can end up with a function call and a small + * overhead in the hotpath for nothing. + */ +static __always_inline void record_irq_time(struct irq_desc *desc) +{ + if (!static_branch_likely(&irq_timing_enabled)) + return; + + if (desc->istate & IRQS_TIMINGS) + irq_timings_push(local_clock(), irq_desc_get_irq(desc)); +} +#else +static inline void irq_remove_timings(struct irq_desc *desc) {} +static inline void irq_setup_timings(struct irq_desc *desc, + struct irqaction *act) {}; +static inline void record_irq_time(struct irq_desc *desc) {} +#endif /* CONFIG_IRQ_TIMINGS */ + + +#ifdef CONFIG_GENERIC_IRQ_CHIP +void irq_init_generic_chip(struct irq_chip_generic *gc, const char *name, + int num_ct, unsigned int irq_base, + void __iomem *reg_base, irq_flow_handler_t handler); +#else +static inline void +irq_init_generic_chip(struct irq_chip_generic *gc, const char *name, + int num_ct, unsigned int irq_base, + void __iomem *reg_base, irq_flow_handler_t handler) { } +#endif /* CONFIG_GENERIC_IRQ_CHIP */ + +#ifdef CONFIG_GENERIC_PENDING_IRQ +static inline bool irq_can_move_pcntxt(struct irq_data *data) +{ + return irqd_can_move_in_process_context(data); +} +static inline bool irq_move_pending(struct irq_data *data) +{ + return irqd_is_setaffinity_pending(data); +} +static inline void +irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask) +{ + cpumask_copy(desc->pending_mask, mask); +} +static inline void +irq_get_pending(struct cpumask *mask, struct irq_desc *desc) +{ + cpumask_copy(mask, desc->pending_mask); +} +static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc) +{ + return desc->pending_mask; +} +static inline bool handle_enforce_irqctx(struct irq_data *data) +{ + return irqd_is_handle_enforce_irqctx(data); +} +bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear); +#else /* CONFIG_GENERIC_PENDING_IRQ */ +static inline bool irq_can_move_pcntxt(struct irq_data *data) +{ + return true; +} +static inline bool irq_move_pending(struct irq_data *data) +{ + return false; +} +static inline void +irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask) +{ +} +static inline void +irq_get_pending(struct cpumask *mask, struct irq_desc *desc) +{ +} +static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc) +{ + return NULL; +} +static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear) +{ + return false; +} +static inline bool handle_enforce_irqctx(struct irq_data *data) +{ + return false; +} +#endif /* !CONFIG_GENERIC_PENDING_IRQ */ + +#if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY) +static inline int irq_domain_activate_irq(struct irq_data *data, bool reserve) +{ + irqd_set_activated(data); + return 0; +} +static inline void irq_domain_deactivate_irq(struct irq_data *data) +{ + irqd_clr_activated(data); +} +#endif + +static inline struct irq_data *irqd_get_parent_data(struct irq_data *irqd) +{ +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + return irqd->parent_data; +#else + return NULL; +#endif +} + +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS +#include <linux/debugfs.h> + +void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *desc); +static inline void irq_remove_debugfs_entry(struct irq_desc *desc) +{ + debugfs_remove(desc->debugfs_file); + kfree(desc->dev_name); +} +void irq_debugfs_copy_devname(int irq, struct device *dev); +# ifdef CONFIG_IRQ_DOMAIN +void irq_domain_debugfs_init(struct dentry *root); +# else +static inline void irq_domain_debugfs_init(struct dentry *root) +{ +} +# endif +#else /* CONFIG_GENERIC_IRQ_DEBUGFS */ +static inline void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *d) +{ +} +static inline void irq_remove_debugfs_entry(struct irq_desc *d) +{ +} +static inline void irq_debugfs_copy_devname(int irq, struct device *dev) +{ +} +#endif /* CONFIG_GENERIC_IRQ_DEBUGFS */ diff --git a/kernel/irq/ipi-mux.c b/kernel/irq/ipi-mux.c new file mode 100644 index 0000000000..fa4fc18c61 --- /dev/null +++ b/kernel/irq/ipi-mux.c @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Multiplex several virtual IPIs over a single HW IPI. + * + * Copyright The Asahi Linux Contributors + * Copyright (c) 2022 Ventana Micro Systems Inc. + */ + +#define pr_fmt(fmt) "ipi-mux: " fmt +#include <linux/cpu.h> +#include <linux/init.h> +#include <linux/irq.h> +#include <linux/irqchip.h> +#include <linux/irqchip/chained_irq.h> +#include <linux/irqdomain.h> +#include <linux/jump_label.h> +#include <linux/percpu.h> +#include <linux/smp.h> + +struct ipi_mux_cpu { + atomic_t enable; + atomic_t bits; +}; + +static struct ipi_mux_cpu __percpu *ipi_mux_pcpu; +static struct irq_domain *ipi_mux_domain; +static void (*ipi_mux_send)(unsigned int cpu); + +static void ipi_mux_mask(struct irq_data *d) +{ + struct ipi_mux_cpu *icpu = this_cpu_ptr(ipi_mux_pcpu); + + atomic_andnot(BIT(irqd_to_hwirq(d)), &icpu->enable); +} + +static void ipi_mux_unmask(struct irq_data *d) +{ + struct ipi_mux_cpu *icpu = this_cpu_ptr(ipi_mux_pcpu); + u32 ibit = BIT(irqd_to_hwirq(d)); + + atomic_or(ibit, &icpu->enable); + + /* + * The atomic_or() above must complete before the atomic_read() + * below to avoid racing ipi_mux_send_mask(). + */ + smp_mb__after_atomic(); + + /* If a pending IPI was unmasked, raise a parent IPI immediately. */ + if (atomic_read(&icpu->bits) & ibit) + ipi_mux_send(smp_processor_id()); +} + +static void ipi_mux_send_mask(struct irq_data *d, const struct cpumask *mask) +{ + struct ipi_mux_cpu *icpu = this_cpu_ptr(ipi_mux_pcpu); + u32 ibit = BIT(irqd_to_hwirq(d)); + unsigned long pending; + int cpu; + + for_each_cpu(cpu, mask) { + icpu = per_cpu_ptr(ipi_mux_pcpu, cpu); + + /* + * This sequence is the mirror of the one in ipi_mux_unmask(); + * see the comment there. Additionally, release semantics + * ensure that the vIPI flag set is ordered after any shared + * memory accesses that precede it. This therefore also pairs + * with the atomic_fetch_andnot in ipi_mux_process(). + */ + pending = atomic_fetch_or_release(ibit, &icpu->bits); + + /* + * The atomic_fetch_or_release() above must complete + * before the atomic_read() below to avoid racing with + * ipi_mux_unmask(). + */ + smp_mb__after_atomic(); + + /* + * The flag writes must complete before the physical IPI is + * issued to another CPU. This is implied by the control + * dependency on the result of atomic_read() below, which is + * itself already ordered after the vIPI flag write. + */ + if (!(pending & ibit) && (atomic_read(&icpu->enable) & ibit)) + ipi_mux_send(cpu); + } +} + +static const struct irq_chip ipi_mux_chip = { + .name = "IPI Mux", + .irq_mask = ipi_mux_mask, + .irq_unmask = ipi_mux_unmask, + .ipi_send_mask = ipi_mux_send_mask, +}; + +static int ipi_mux_domain_alloc(struct irq_domain *d, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + int i; + + for (i = 0; i < nr_irqs; i++) { + irq_set_percpu_devid(virq + i); + irq_domain_set_info(d, virq + i, i, &ipi_mux_chip, NULL, + handle_percpu_devid_irq, NULL, NULL); + } + + return 0; +} + +static const struct irq_domain_ops ipi_mux_domain_ops = { + .alloc = ipi_mux_domain_alloc, + .free = irq_domain_free_irqs_top, +}; + +/** + * ipi_mux_process - Process multiplexed virtual IPIs + */ +void ipi_mux_process(void) +{ + struct ipi_mux_cpu *icpu = this_cpu_ptr(ipi_mux_pcpu); + irq_hw_number_t hwirq; + unsigned long ipis; + unsigned int en; + + /* + * Reading enable mask does not need to be ordered as long as + * this function is called from interrupt handler because only + * the CPU itself can change it's own enable mask. + */ + en = atomic_read(&icpu->enable); + + /* + * Clear the IPIs we are about to handle. This pairs with the + * atomic_fetch_or_release() in ipi_mux_send_mask(). + */ + ipis = atomic_fetch_andnot(en, &icpu->bits) & en; + + for_each_set_bit(hwirq, &ipis, BITS_PER_TYPE(int)) + generic_handle_domain_irq(ipi_mux_domain, hwirq); +} + +/** + * ipi_mux_create - Create virtual IPIs multiplexed on top of a single + * parent IPI. + * @nr_ipi: number of virtual IPIs to create. This should + * be <= BITS_PER_TYPE(int) + * @mux_send: callback to trigger parent IPI for a particular CPU + * + * Returns first virq of the newly created virtual IPIs upon success + * or <=0 upon failure + */ +int ipi_mux_create(unsigned int nr_ipi, void (*mux_send)(unsigned int cpu)) +{ + struct fwnode_handle *fwnode; + struct irq_domain *domain; + int rc; + + if (ipi_mux_domain) + return -EEXIST; + + if (BITS_PER_TYPE(int) < nr_ipi || !mux_send) + return -EINVAL; + + ipi_mux_pcpu = alloc_percpu(typeof(*ipi_mux_pcpu)); + if (!ipi_mux_pcpu) + return -ENOMEM; + + fwnode = irq_domain_alloc_named_fwnode("IPI-Mux"); + if (!fwnode) { + pr_err("unable to create IPI Mux fwnode\n"); + rc = -ENOMEM; + goto fail_free_cpu; + } + + domain = irq_domain_create_linear(fwnode, nr_ipi, + &ipi_mux_domain_ops, NULL); + if (!domain) { + pr_err("unable to add IPI Mux domain\n"); + rc = -ENOMEM; + goto fail_free_fwnode; + } + + domain->flags |= IRQ_DOMAIN_FLAG_IPI_SINGLE; + irq_domain_update_bus_token(domain, DOMAIN_BUS_IPI); + + rc = irq_domain_alloc_irqs(domain, nr_ipi, NUMA_NO_NODE, NULL); + if (rc <= 0) { + pr_err("unable to alloc IRQs from IPI Mux domain\n"); + goto fail_free_domain; + } + + ipi_mux_domain = domain; + ipi_mux_send = mux_send; + + return rc; + +fail_free_domain: + irq_domain_remove(domain); +fail_free_fwnode: + irq_domain_free_fwnode(fwnode); +fail_free_cpu: + free_percpu(ipi_mux_pcpu); + return rc; +} diff --git a/kernel/irq/ipi.c b/kernel/irq/ipi.c new file mode 100644 index 0000000000..961d4af76a --- /dev/null +++ b/kernel/irq/ipi.c @@ -0,0 +1,345 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015 Imagination Technologies Ltd + * Author: Qais Yousef <qais.yousef@imgtec.com> + * + * This file contains driver APIs to the IPI subsystem. + */ + +#define pr_fmt(fmt) "genirq/ipi: " fmt + +#include <linux/irqdomain.h> +#include <linux/irq.h> + +/** + * irq_reserve_ipi() - Setup an IPI to destination cpumask + * @domain: IPI domain + * @dest: cpumask of CPUs which can receive the IPI + * + * Allocate a virq that can be used to send IPI to any CPU in dest mask. + * + * Return: Linux IRQ number on success or error code on failure + */ +int irq_reserve_ipi(struct irq_domain *domain, + const struct cpumask *dest) +{ + unsigned int nr_irqs, offset; + struct irq_data *data; + int virq, i; + + if (!domain ||!irq_domain_is_ipi(domain)) { + pr_warn("Reservation on a non IPI domain\n"); + return -EINVAL; + } + + if (!cpumask_subset(dest, cpu_possible_mask)) { + pr_warn("Reservation is not in possible_cpu_mask\n"); + return -EINVAL; + } + + nr_irqs = cpumask_weight(dest); + if (!nr_irqs) { + pr_warn("Reservation for empty destination mask\n"); + return -EINVAL; + } + + if (irq_domain_is_ipi_single(domain)) { + /* + * If the underlying implementation uses a single HW irq on + * all cpus then we only need a single Linux irq number for + * it. We have no restrictions vs. the destination mask. The + * underlying implementation can deal with holes nicely. + */ + nr_irqs = 1; + offset = 0; + } else { + unsigned int next; + + /* + * The IPI requires a separate HW irq on each CPU. We require + * that the destination mask is consecutive. If an + * implementation needs to support holes, it can reserve + * several IPI ranges. + */ + offset = cpumask_first(dest); + /* + * Find a hole and if found look for another set bit after the + * hole. For now we don't support this scenario. + */ + next = cpumask_next_zero(offset, dest); + if (next < nr_cpu_ids) + next = cpumask_next(next, dest); + if (next < nr_cpu_ids) { + pr_warn("Destination mask has holes\n"); + return -EINVAL; + } + } + + virq = irq_domain_alloc_descs(-1, nr_irqs, 0, NUMA_NO_NODE, NULL); + if (virq <= 0) { + pr_warn("Can't reserve IPI, failed to alloc descs\n"); + return -ENOMEM; + } + + virq = __irq_domain_alloc_irqs(domain, virq, nr_irqs, NUMA_NO_NODE, + (void *) dest, true, NULL); + + if (virq <= 0) { + pr_warn("Can't reserve IPI, failed to alloc hw irqs\n"); + goto free_descs; + } + + for (i = 0; i < nr_irqs; i++) { + data = irq_get_irq_data(virq + i); + cpumask_copy(data->common->affinity, dest); + data->common->ipi_offset = offset; + irq_set_status_flags(virq + i, IRQ_NO_BALANCING); + } + return virq; + +free_descs: + irq_free_descs(virq, nr_irqs); + return -EBUSY; +} + +/** + * irq_destroy_ipi() - unreserve an IPI that was previously allocated + * @irq: Linux IRQ number to be destroyed + * @dest: cpumask of CPUs which should have the IPI removed + * + * The IPIs allocated with irq_reserve_ipi() are returned to the system + * destroying all virqs associated with them. + * + * Return: %0 on success or error code on failure. + */ +int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest) +{ + struct irq_data *data = irq_get_irq_data(irq); + const struct cpumask *ipimask; + struct irq_domain *domain; + unsigned int nr_irqs; + + if (!irq || !data) + return -EINVAL; + + domain = data->domain; + if (WARN_ON(domain == NULL)) + return -EINVAL; + + if (!irq_domain_is_ipi(domain)) { + pr_warn("Trying to destroy a non IPI domain!\n"); + return -EINVAL; + } + + ipimask = irq_data_get_affinity_mask(data); + if (!ipimask || WARN_ON(!cpumask_subset(dest, ipimask))) + /* + * Must be destroying a subset of CPUs to which this IPI + * was set up to target + */ + return -EINVAL; + + if (irq_domain_is_ipi_per_cpu(domain)) { + irq = irq + cpumask_first(dest) - data->common->ipi_offset; + nr_irqs = cpumask_weight(dest); + } else { + nr_irqs = 1; + } + + irq_domain_free_irqs(irq, nr_irqs); + return 0; +} + +/** + * ipi_get_hwirq - Get the hwirq associated with an IPI to a CPU + * @irq: Linux IRQ number + * @cpu: the target CPU + * + * When dealing with coprocessors IPI, we need to inform the coprocessor of + * the hwirq it needs to use to receive and send IPIs. + * + * Return: hwirq value on success or INVALID_HWIRQ on failure. + */ +irq_hw_number_t ipi_get_hwirq(unsigned int irq, unsigned int cpu) +{ + struct irq_data *data = irq_get_irq_data(irq); + const struct cpumask *ipimask; + + if (!data || cpu >= nr_cpu_ids) + return INVALID_HWIRQ; + + ipimask = irq_data_get_affinity_mask(data); + if (!ipimask || !cpumask_test_cpu(cpu, ipimask)) + return INVALID_HWIRQ; + + /* + * Get the real hardware irq number if the underlying implementation + * uses a separate irq per cpu. If the underlying implementation uses + * a single hardware irq for all cpus then the IPI send mechanism + * needs to take care of the cpu destinations. + */ + if (irq_domain_is_ipi_per_cpu(data->domain)) + data = irq_get_irq_data(irq + cpu - data->common->ipi_offset); + + return data ? irqd_to_hwirq(data) : INVALID_HWIRQ; +} +EXPORT_SYMBOL_GPL(ipi_get_hwirq); + +static int ipi_send_verify(struct irq_chip *chip, struct irq_data *data, + const struct cpumask *dest, unsigned int cpu) +{ + const struct cpumask *ipimask; + + if (!chip || !data) + return -EINVAL; + + if (!chip->ipi_send_single && !chip->ipi_send_mask) + return -EINVAL; + + if (cpu >= nr_cpu_ids) + return -EINVAL; + + ipimask = irq_data_get_affinity_mask(data); + if (!ipimask) + return -EINVAL; + + if (dest) { + if (!cpumask_subset(dest, ipimask)) + return -EINVAL; + } else { + if (!cpumask_test_cpu(cpu, ipimask)) + return -EINVAL; + } + return 0; +} + +/** + * __ipi_send_single - send an IPI to a target Linux SMP CPU + * @desc: pointer to irq_desc of the IRQ + * @cpu: destination CPU, must in the destination mask passed to + * irq_reserve_ipi() + * + * This function is for architecture or core code to speed up IPI sending. Not + * usable from driver code. + * + * Return: %0 on success or negative error number on failure. + */ +int __ipi_send_single(struct irq_desc *desc, unsigned int cpu) +{ + struct irq_data *data = irq_desc_get_irq_data(desc); + struct irq_chip *chip = irq_data_get_irq_chip(data); + +#ifdef DEBUG + /* + * Minimise the overhead by omitting the checks for Linux SMP IPIs. + * Since the callers should be arch or core code which is generally + * trusted, only check for errors when debugging. + */ + if (WARN_ON_ONCE(ipi_send_verify(chip, data, NULL, cpu))) + return -EINVAL; +#endif + if (!chip->ipi_send_single) { + chip->ipi_send_mask(data, cpumask_of(cpu)); + return 0; + } + + /* FIXME: Store this information in irqdata flags */ + if (irq_domain_is_ipi_per_cpu(data->domain) && + cpu != data->common->ipi_offset) { + /* use the correct data for that cpu */ + unsigned irq = data->irq + cpu - data->common->ipi_offset; + + data = irq_get_irq_data(irq); + } + chip->ipi_send_single(data, cpu); + return 0; +} + +/** + * __ipi_send_mask - send an IPI to target Linux SMP CPU(s) + * @desc: pointer to irq_desc of the IRQ + * @dest: dest CPU(s), must be a subset of the mask passed to + * irq_reserve_ipi() + * + * This function is for architecture or core code to speed up IPI sending. Not + * usable from driver code. + * + * Return: %0 on success or negative error number on failure. + */ +int __ipi_send_mask(struct irq_desc *desc, const struct cpumask *dest) +{ + struct irq_data *data = irq_desc_get_irq_data(desc); + struct irq_chip *chip = irq_data_get_irq_chip(data); + unsigned int cpu; + +#ifdef DEBUG + /* + * Minimise the overhead by omitting the checks for Linux SMP IPIs. + * Since the callers should be arch or core code which is generally + * trusted, only check for errors when debugging. + */ + if (WARN_ON_ONCE(ipi_send_verify(chip, data, dest, 0))) + return -EINVAL; +#endif + if (chip->ipi_send_mask) { + chip->ipi_send_mask(data, dest); + return 0; + } + + if (irq_domain_is_ipi_per_cpu(data->domain)) { + unsigned int base = data->irq; + + for_each_cpu(cpu, dest) { + unsigned irq = base + cpu - data->common->ipi_offset; + + data = irq_get_irq_data(irq); + chip->ipi_send_single(data, cpu); + } + } else { + for_each_cpu(cpu, dest) + chip->ipi_send_single(data, cpu); + } + return 0; +} + +/** + * ipi_send_single - Send an IPI to a single CPU + * @virq: Linux IRQ number from irq_reserve_ipi() + * @cpu: destination CPU, must in the destination mask passed to + * irq_reserve_ipi() + * + * Return: %0 on success or negative error number on failure. + */ +int ipi_send_single(unsigned int virq, unsigned int cpu) +{ + struct irq_desc *desc = irq_to_desc(virq); + struct irq_data *data = desc ? irq_desc_get_irq_data(desc) : NULL; + struct irq_chip *chip = data ? irq_data_get_irq_chip(data) : NULL; + + if (WARN_ON_ONCE(ipi_send_verify(chip, data, NULL, cpu))) + return -EINVAL; + + return __ipi_send_single(desc, cpu); +} +EXPORT_SYMBOL_GPL(ipi_send_single); + +/** + * ipi_send_mask - Send an IPI to target CPU(s) + * @virq: Linux IRQ number from irq_reserve_ipi() + * @dest: dest CPU(s), must be a subset of the mask passed to + * irq_reserve_ipi() + * + * Return: %0 on success or negative error number on failure. + */ +int ipi_send_mask(unsigned int virq, const struct cpumask *dest) +{ + struct irq_desc *desc = irq_to_desc(virq); + struct irq_data *data = desc ? irq_desc_get_irq_data(desc) : NULL; + struct irq_chip *chip = data ? irq_data_get_irq_chip(data) : NULL; + + if (WARN_ON_ONCE(ipi_send_verify(chip, data, dest, 0))) + return -EINVAL; + + return __ipi_send_mask(desc, dest); +} +EXPORT_SYMBOL_GPL(ipi_send_mask); diff --git a/kernel/irq/irq_sim.c b/kernel/irq/irq_sim.c new file mode 100644 index 0000000000..dd76323ea3 --- /dev/null +++ b/kernel/irq/irq_sim.c @@ -0,0 +1,250 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2017-2018 Bartosz Golaszewski <brgl@bgdev.pl> + * Copyright (C) 2020 Bartosz Golaszewski <bgolaszewski@baylibre.com> + */ + +#include <linux/irq.h> +#include <linux/irq_sim.h> +#include <linux/irq_work.h> +#include <linux/interrupt.h> +#include <linux/slab.h> + +struct irq_sim_work_ctx { + struct irq_work work; + int irq_base; + unsigned int irq_count; + unsigned long *pending; + struct irq_domain *domain; +}; + +struct irq_sim_irq_ctx { + int irqnum; + bool enabled; + struct irq_sim_work_ctx *work_ctx; +}; + +static void irq_sim_irqmask(struct irq_data *data) +{ + struct irq_sim_irq_ctx *irq_ctx = irq_data_get_irq_chip_data(data); + + irq_ctx->enabled = false; +} + +static void irq_sim_irqunmask(struct irq_data *data) +{ + struct irq_sim_irq_ctx *irq_ctx = irq_data_get_irq_chip_data(data); + + irq_ctx->enabled = true; +} + +static int irq_sim_set_type(struct irq_data *data, unsigned int type) +{ + /* We only support rising and falling edge trigger types. */ + if (type & ~IRQ_TYPE_EDGE_BOTH) + return -EINVAL; + + irqd_set_trigger_type(data, type); + + return 0; +} + +static int irq_sim_get_irqchip_state(struct irq_data *data, + enum irqchip_irq_state which, bool *state) +{ + struct irq_sim_irq_ctx *irq_ctx = irq_data_get_irq_chip_data(data); + irq_hw_number_t hwirq = irqd_to_hwirq(data); + + switch (which) { + case IRQCHIP_STATE_PENDING: + if (irq_ctx->enabled) + *state = test_bit(hwirq, irq_ctx->work_ctx->pending); + break; + default: + return -EINVAL; + } + + return 0; +} + +static int irq_sim_set_irqchip_state(struct irq_data *data, + enum irqchip_irq_state which, bool state) +{ + struct irq_sim_irq_ctx *irq_ctx = irq_data_get_irq_chip_data(data); + irq_hw_number_t hwirq = irqd_to_hwirq(data); + + switch (which) { + case IRQCHIP_STATE_PENDING: + if (irq_ctx->enabled) { + assign_bit(hwirq, irq_ctx->work_ctx->pending, state); + if (state) + irq_work_queue(&irq_ctx->work_ctx->work); + } + break; + default: + return -EINVAL; + } + + return 0; +} + +static struct irq_chip irq_sim_irqchip = { + .name = "irq_sim", + .irq_mask = irq_sim_irqmask, + .irq_unmask = irq_sim_irqunmask, + .irq_set_type = irq_sim_set_type, + .irq_get_irqchip_state = irq_sim_get_irqchip_state, + .irq_set_irqchip_state = irq_sim_set_irqchip_state, +}; + +static void irq_sim_handle_irq(struct irq_work *work) +{ + struct irq_sim_work_ctx *work_ctx; + unsigned int offset = 0; + int irqnum; + + work_ctx = container_of(work, struct irq_sim_work_ctx, work); + + while (!bitmap_empty(work_ctx->pending, work_ctx->irq_count)) { + offset = find_next_bit(work_ctx->pending, + work_ctx->irq_count, offset); + clear_bit(offset, work_ctx->pending); + irqnum = irq_find_mapping(work_ctx->domain, offset); + handle_simple_irq(irq_to_desc(irqnum)); + } +} + +static int irq_sim_domain_map(struct irq_domain *domain, + unsigned int virq, irq_hw_number_t hw) +{ + struct irq_sim_work_ctx *work_ctx = domain->host_data; + struct irq_sim_irq_ctx *irq_ctx; + + irq_ctx = kzalloc(sizeof(*irq_ctx), GFP_KERNEL); + if (!irq_ctx) + return -ENOMEM; + + irq_set_chip(virq, &irq_sim_irqchip); + irq_set_chip_data(virq, irq_ctx); + irq_set_handler(virq, handle_simple_irq); + irq_modify_status(virq, IRQ_NOREQUEST | IRQ_NOAUTOEN, IRQ_NOPROBE); + irq_ctx->work_ctx = work_ctx; + + return 0; +} + +static void irq_sim_domain_unmap(struct irq_domain *domain, unsigned int virq) +{ + struct irq_sim_irq_ctx *irq_ctx; + struct irq_data *irqd; + + irqd = irq_domain_get_irq_data(domain, virq); + irq_ctx = irq_data_get_irq_chip_data(irqd); + + irq_set_handler(virq, NULL); + irq_domain_reset_irq_data(irqd); + kfree(irq_ctx); +} + +static const struct irq_domain_ops irq_sim_domain_ops = { + .map = irq_sim_domain_map, + .unmap = irq_sim_domain_unmap, +}; + +/** + * irq_domain_create_sim - Create a new interrupt simulator irq_domain and + * allocate a range of dummy interrupts. + * + * @fwnode: struct fwnode_handle to be associated with this domain. + * @num_irqs: Number of interrupts to allocate. + * + * On success: return a new irq_domain object. + * On failure: a negative errno wrapped with ERR_PTR(). + */ +struct irq_domain *irq_domain_create_sim(struct fwnode_handle *fwnode, + unsigned int num_irqs) +{ + struct irq_sim_work_ctx *work_ctx; + + work_ctx = kmalloc(sizeof(*work_ctx), GFP_KERNEL); + if (!work_ctx) + goto err_out; + + work_ctx->pending = bitmap_zalloc(num_irqs, GFP_KERNEL); + if (!work_ctx->pending) + goto err_free_work_ctx; + + work_ctx->domain = irq_domain_create_linear(fwnode, num_irqs, + &irq_sim_domain_ops, + work_ctx); + if (!work_ctx->domain) + goto err_free_bitmap; + + work_ctx->irq_count = num_irqs; + work_ctx->work = IRQ_WORK_INIT_HARD(irq_sim_handle_irq); + + return work_ctx->domain; + +err_free_bitmap: + bitmap_free(work_ctx->pending); +err_free_work_ctx: + kfree(work_ctx); +err_out: + return ERR_PTR(-ENOMEM); +} +EXPORT_SYMBOL_GPL(irq_domain_create_sim); + +/** + * irq_domain_remove_sim - Deinitialize the interrupt simulator domain: free + * the interrupt descriptors and allocated memory. + * + * @domain: The interrupt simulator domain to tear down. + */ +void irq_domain_remove_sim(struct irq_domain *domain) +{ + struct irq_sim_work_ctx *work_ctx = domain->host_data; + + irq_work_sync(&work_ctx->work); + bitmap_free(work_ctx->pending); + kfree(work_ctx); + + irq_domain_remove(domain); +} +EXPORT_SYMBOL_GPL(irq_domain_remove_sim); + +static void devm_irq_domain_remove_sim(void *data) +{ + struct irq_domain *domain = data; + + irq_domain_remove_sim(domain); +} + +/** + * devm_irq_domain_create_sim - Create a new interrupt simulator for + * a managed device. + * + * @dev: Device to initialize the simulator object for. + * @fwnode: struct fwnode_handle to be associated with this domain. + * @num_irqs: Number of interrupts to allocate + * + * On success: return a new irq_domain object. + * On failure: a negative errno wrapped with ERR_PTR(). + */ +struct irq_domain *devm_irq_domain_create_sim(struct device *dev, + struct fwnode_handle *fwnode, + unsigned int num_irqs) +{ + struct irq_domain *domain; + int ret; + + domain = irq_domain_create_sim(fwnode, num_irqs); + if (IS_ERR(domain)) + return domain; + + ret = devm_add_action_or_reset(dev, devm_irq_domain_remove_sim, domain); + if (ret) + return ERR_PTR(ret); + + return domain; +} +EXPORT_SYMBOL_GPL(devm_irq_domain_create_sim); diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c new file mode 100644 index 0000000000..371eb1711d --- /dev/null +++ b/kernel/irq/irqdesc.c @@ -0,0 +1,1014 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar + * Copyright (C) 2005-2006, Thomas Gleixner, Russell King + * + * This file contains the interrupt descriptor management code. Detailed + * information is available in Documentation/core-api/genericirq.rst + * + */ +#include <linux/irq.h> +#include <linux/slab.h> +#include <linux/export.h> +#include <linux/interrupt.h> +#include <linux/kernel_stat.h> +#include <linux/maple_tree.h> +#include <linux/irqdomain.h> +#include <linux/sysfs.h> + +#include "internals.h" + +/* + * lockdep: we want to handle all irq_desc locks as a single lock-class: + */ +static struct lock_class_key irq_desc_lock_class; + +#if defined(CONFIG_SMP) +static int __init irq_affinity_setup(char *str) +{ + alloc_bootmem_cpumask_var(&irq_default_affinity); + cpulist_parse(str, irq_default_affinity); + /* + * Set at least the boot cpu. We don't want to end up with + * bugreports caused by random commandline masks + */ + cpumask_set_cpu(smp_processor_id(), irq_default_affinity); + return 1; +} +__setup("irqaffinity=", irq_affinity_setup); + +static void __init init_irq_default_affinity(void) +{ + if (!cpumask_available(irq_default_affinity)) + zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT); + if (cpumask_empty(irq_default_affinity)) + cpumask_setall(irq_default_affinity); +} +#else +static void __init init_irq_default_affinity(void) +{ +} +#endif + +#ifdef CONFIG_SMP +static int alloc_masks(struct irq_desc *desc, int node) +{ + if (!zalloc_cpumask_var_node(&desc->irq_common_data.affinity, + GFP_KERNEL, node)) + return -ENOMEM; + +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK + if (!zalloc_cpumask_var_node(&desc->irq_common_data.effective_affinity, + GFP_KERNEL, node)) { + free_cpumask_var(desc->irq_common_data.affinity); + return -ENOMEM; + } +#endif + +#ifdef CONFIG_GENERIC_PENDING_IRQ + if (!zalloc_cpumask_var_node(&desc->pending_mask, GFP_KERNEL, node)) { +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK + free_cpumask_var(desc->irq_common_data.effective_affinity); +#endif + free_cpumask_var(desc->irq_common_data.affinity); + return -ENOMEM; + } +#endif + return 0; +} + +static void desc_smp_init(struct irq_desc *desc, int node, + const struct cpumask *affinity) +{ + if (!affinity) + affinity = irq_default_affinity; + cpumask_copy(desc->irq_common_data.affinity, affinity); + +#ifdef CONFIG_GENERIC_PENDING_IRQ + cpumask_clear(desc->pending_mask); +#endif +#ifdef CONFIG_NUMA + desc->irq_common_data.node = node; +#endif +} + +#else +static inline int +alloc_masks(struct irq_desc *desc, int node) { return 0; } +static inline void +desc_smp_init(struct irq_desc *desc, int node, const struct cpumask *affinity) { } +#endif + +static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node, + const struct cpumask *affinity, struct module *owner) +{ + int cpu; + + desc->irq_common_data.handler_data = NULL; + desc->irq_common_data.msi_desc = NULL; + + desc->irq_data.common = &desc->irq_common_data; + desc->irq_data.irq = irq; + desc->irq_data.chip = &no_irq_chip; + desc->irq_data.chip_data = NULL; + irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS); + irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED); + irqd_set(&desc->irq_data, IRQD_IRQ_MASKED); + desc->handle_irq = handle_bad_irq; + desc->depth = 1; + desc->irq_count = 0; + desc->irqs_unhandled = 0; + desc->tot_count = 0; + desc->name = NULL; + desc->owner = owner; + for_each_possible_cpu(cpu) + *per_cpu_ptr(desc->kstat_irqs, cpu) = 0; + desc_smp_init(desc, node, affinity); +} + +int nr_irqs = NR_IRQS; +EXPORT_SYMBOL_GPL(nr_irqs); + +static DEFINE_MUTEX(sparse_irq_lock); +static struct maple_tree sparse_irqs = MTREE_INIT_EXT(sparse_irqs, + MT_FLAGS_ALLOC_RANGE | + MT_FLAGS_LOCK_EXTERN | + MT_FLAGS_USE_RCU, + sparse_irq_lock); + +static int irq_find_free_area(unsigned int from, unsigned int cnt) +{ + MA_STATE(mas, &sparse_irqs, 0, 0); + + if (mas_empty_area(&mas, from, MAX_SPARSE_IRQS, cnt)) + return -ENOSPC; + return mas.index; +} + +static unsigned int irq_find_at_or_after(unsigned int offset) +{ + unsigned long index = offset; + struct irq_desc *desc = mt_find(&sparse_irqs, &index, nr_irqs); + + return desc ? irq_desc_get_irq(desc) : nr_irqs; +} + +static void irq_insert_desc(unsigned int irq, struct irq_desc *desc) +{ + MA_STATE(mas, &sparse_irqs, irq, irq); + WARN_ON(mas_store_gfp(&mas, desc, GFP_KERNEL) != 0); +} + +static void delete_irq_desc(unsigned int irq) +{ + MA_STATE(mas, &sparse_irqs, irq, irq); + mas_erase(&mas); +} + +#ifdef CONFIG_SPARSE_IRQ + +static void irq_kobj_release(struct kobject *kobj); + +#ifdef CONFIG_SYSFS +static struct kobject *irq_kobj_base; + +#define IRQ_ATTR_RO(_name) \ +static struct kobj_attribute _name##_attr = __ATTR_RO(_name) + +static ssize_t per_cpu_count_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); + ssize_t ret = 0; + char *p = ""; + int cpu; + + for_each_possible_cpu(cpu) { + unsigned int c = irq_desc_kstat_cpu(desc, cpu); + + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%u", p, c); + p = ","; + } + + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); + return ret; +} +IRQ_ATTR_RO(per_cpu_count); + +static ssize_t chip_name_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); + ssize_t ret = 0; + + raw_spin_lock_irq(&desc->lock); + if (desc->irq_data.chip && desc->irq_data.chip->name) { + ret = scnprintf(buf, PAGE_SIZE, "%s\n", + desc->irq_data.chip->name); + } + raw_spin_unlock_irq(&desc->lock); + + return ret; +} +IRQ_ATTR_RO(chip_name); + +static ssize_t hwirq_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); + ssize_t ret = 0; + + raw_spin_lock_irq(&desc->lock); + if (desc->irq_data.domain) + ret = sprintf(buf, "%lu\n", desc->irq_data.hwirq); + raw_spin_unlock_irq(&desc->lock); + + return ret; +} +IRQ_ATTR_RO(hwirq); + +static ssize_t type_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); + ssize_t ret = 0; + + raw_spin_lock_irq(&desc->lock); + ret = sprintf(buf, "%s\n", + irqd_is_level_type(&desc->irq_data) ? "level" : "edge"); + raw_spin_unlock_irq(&desc->lock); + + return ret; + +} +IRQ_ATTR_RO(type); + +static ssize_t wakeup_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); + ssize_t ret = 0; + + raw_spin_lock_irq(&desc->lock); + ret = sprintf(buf, "%s\n", + irqd_is_wakeup_set(&desc->irq_data) ? "enabled" : "disabled"); + raw_spin_unlock_irq(&desc->lock); + + return ret; + +} +IRQ_ATTR_RO(wakeup); + +static ssize_t name_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); + ssize_t ret = 0; + + raw_spin_lock_irq(&desc->lock); + if (desc->name) + ret = scnprintf(buf, PAGE_SIZE, "%s\n", desc->name); + raw_spin_unlock_irq(&desc->lock); + + return ret; +} +IRQ_ATTR_RO(name); + +static ssize_t actions_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); + struct irqaction *action; + ssize_t ret = 0; + char *p = ""; + + raw_spin_lock_irq(&desc->lock); + for_each_action_of_desc(desc, action) { + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s", + p, action->name); + p = ","; + } + raw_spin_unlock_irq(&desc->lock); + + if (ret) + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); + + return ret; +} +IRQ_ATTR_RO(actions); + +static struct attribute *irq_attrs[] = { + &per_cpu_count_attr.attr, + &chip_name_attr.attr, + &hwirq_attr.attr, + &type_attr.attr, + &wakeup_attr.attr, + &name_attr.attr, + &actions_attr.attr, + NULL +}; +ATTRIBUTE_GROUPS(irq); + +static const struct kobj_type irq_kobj_type = { + .release = irq_kobj_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = irq_groups, +}; + +static void irq_sysfs_add(int irq, struct irq_desc *desc) +{ + if (irq_kobj_base) { + /* + * Continue even in case of failure as this is nothing + * crucial and failures in the late irq_sysfs_init() + * cannot be rolled back. + */ + if (kobject_add(&desc->kobj, irq_kobj_base, "%d", irq)) + pr_warn("Failed to add kobject for irq %d\n", irq); + else + desc->istate |= IRQS_SYSFS; + } +} + +static void irq_sysfs_del(struct irq_desc *desc) +{ + /* + * Only invoke kobject_del() when kobject_add() was successfully + * invoked for the descriptor. This covers both early boot, where + * sysfs is not initialized yet, and the case of a failed + * kobject_add() invocation. + */ + if (desc->istate & IRQS_SYSFS) + kobject_del(&desc->kobj); +} + +static int __init irq_sysfs_init(void) +{ + struct irq_desc *desc; + int irq; + + /* Prevent concurrent irq alloc/free */ + irq_lock_sparse(); + + irq_kobj_base = kobject_create_and_add("irq", kernel_kobj); + if (!irq_kobj_base) { + irq_unlock_sparse(); + return -ENOMEM; + } + + /* Add the already allocated interrupts */ + for_each_irq_desc(irq, desc) + irq_sysfs_add(irq, desc); + irq_unlock_sparse(); + + return 0; +} +postcore_initcall(irq_sysfs_init); + +#else /* !CONFIG_SYSFS */ + +static const struct kobj_type irq_kobj_type = { + .release = irq_kobj_release, +}; + +static void irq_sysfs_add(int irq, struct irq_desc *desc) {} +static void irq_sysfs_del(struct irq_desc *desc) {} + +#endif /* CONFIG_SYSFS */ + +struct irq_desc *irq_to_desc(unsigned int irq) +{ + return mtree_load(&sparse_irqs, irq); +} +#ifdef CONFIG_KVM_BOOK3S_64_HV_MODULE +EXPORT_SYMBOL_GPL(irq_to_desc); +#endif + +#ifdef CONFIG_SMP +static void free_masks(struct irq_desc *desc) +{ +#ifdef CONFIG_GENERIC_PENDING_IRQ + free_cpumask_var(desc->pending_mask); +#endif + free_cpumask_var(desc->irq_common_data.affinity); +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK + free_cpumask_var(desc->irq_common_data.effective_affinity); +#endif +} +#else +static inline void free_masks(struct irq_desc *desc) { } +#endif + +void irq_lock_sparse(void) +{ + mutex_lock(&sparse_irq_lock); +} + +void irq_unlock_sparse(void) +{ + mutex_unlock(&sparse_irq_lock); +} + +static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags, + const struct cpumask *affinity, + struct module *owner) +{ + struct irq_desc *desc; + + desc = kzalloc_node(sizeof(*desc), GFP_KERNEL, node); + if (!desc) + return NULL; + /* allocate based on nr_cpu_ids */ + desc->kstat_irqs = alloc_percpu(unsigned int); + if (!desc->kstat_irqs) + goto err_desc; + + if (alloc_masks(desc, node)) + goto err_kstat; + + raw_spin_lock_init(&desc->lock); + lockdep_set_class(&desc->lock, &irq_desc_lock_class); + mutex_init(&desc->request_mutex); + init_rcu_head(&desc->rcu); + init_waitqueue_head(&desc->wait_for_threads); + + desc_set_defaults(irq, desc, node, affinity, owner); + irqd_set(&desc->irq_data, flags); + kobject_init(&desc->kobj, &irq_kobj_type); + irq_resend_init(desc); + + return desc; + +err_kstat: + free_percpu(desc->kstat_irqs); +err_desc: + kfree(desc); + return NULL; +} + +static void irq_kobj_release(struct kobject *kobj) +{ + struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); + + free_masks(desc); + free_percpu(desc->kstat_irqs); + kfree(desc); +} + +static void delayed_free_desc(struct rcu_head *rhp) +{ + struct irq_desc *desc = container_of(rhp, struct irq_desc, rcu); + + kobject_put(&desc->kobj); +} + +static void free_desc(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + irq_remove_debugfs_entry(desc); + unregister_irq_proc(irq, desc); + + /* + * sparse_irq_lock protects also show_interrupts() and + * kstat_irq_usr(). Once we deleted the descriptor from the + * sparse tree we can free it. Access in proc will fail to + * lookup the descriptor. + * + * The sysfs entry must be serialized against a concurrent + * irq_sysfs_init() as well. + */ + irq_sysfs_del(desc); + delete_irq_desc(irq); + + /* + * We free the descriptor, masks and stat fields via RCU. That + * allows demultiplex interrupts to do rcu based management of + * the child interrupts. + * This also allows us to use rcu in kstat_irqs_usr(). + */ + call_rcu(&desc->rcu, delayed_free_desc); +} + +static int alloc_descs(unsigned int start, unsigned int cnt, int node, + const struct irq_affinity_desc *affinity, + struct module *owner) +{ + struct irq_desc *desc; + int i; + + /* Validate affinity mask(s) */ + if (affinity) { + for (i = 0; i < cnt; i++) { + if (cpumask_empty(&affinity[i].mask)) + return -EINVAL; + } + } + + for (i = 0; i < cnt; i++) { + const struct cpumask *mask = NULL; + unsigned int flags = 0; + + if (affinity) { + if (affinity->is_managed) { + flags = IRQD_AFFINITY_MANAGED | + IRQD_MANAGED_SHUTDOWN; + } + mask = &affinity->mask; + node = cpu_to_node(cpumask_first(mask)); + affinity++; + } + + desc = alloc_desc(start + i, node, flags, mask, owner); + if (!desc) + goto err; + irq_insert_desc(start + i, desc); + irq_sysfs_add(start + i, desc); + irq_add_debugfs_entry(start + i, desc); + } + return start; + +err: + for (i--; i >= 0; i--) + free_desc(start + i); + return -ENOMEM; +} + +static int irq_expand_nr_irqs(unsigned int nr) +{ + if (nr > MAX_SPARSE_IRQS) + return -ENOMEM; + nr_irqs = nr; + return 0; +} + +int __init early_irq_init(void) +{ + int i, initcnt, node = first_online_node; + struct irq_desc *desc; + + init_irq_default_affinity(); + + /* Let arch update nr_irqs and return the nr of preallocated irqs */ + initcnt = arch_probe_nr_irqs(); + printk(KERN_INFO "NR_IRQS: %d, nr_irqs: %d, preallocated irqs: %d\n", + NR_IRQS, nr_irqs, initcnt); + + if (WARN_ON(nr_irqs > MAX_SPARSE_IRQS)) + nr_irqs = MAX_SPARSE_IRQS; + + if (WARN_ON(initcnt > MAX_SPARSE_IRQS)) + initcnt = MAX_SPARSE_IRQS; + + if (initcnt > nr_irqs) + nr_irqs = initcnt; + + for (i = 0; i < initcnt; i++) { + desc = alloc_desc(i, node, 0, NULL, NULL); + irq_insert_desc(i, desc); + } + return arch_early_irq_init(); +} + +#else /* !CONFIG_SPARSE_IRQ */ + +struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { + [0 ... NR_IRQS-1] = { + .handle_irq = handle_bad_irq, + .depth = 1, + .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock), + } +}; + +int __init early_irq_init(void) +{ + int count, i, node = first_online_node; + struct irq_desc *desc; + + init_irq_default_affinity(); + + printk(KERN_INFO "NR_IRQS: %d\n", NR_IRQS); + + desc = irq_desc; + count = ARRAY_SIZE(irq_desc); + + for (i = 0; i < count; i++) { + desc[i].kstat_irqs = alloc_percpu(unsigned int); + alloc_masks(&desc[i], node); + raw_spin_lock_init(&desc[i].lock); + lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); + mutex_init(&desc[i].request_mutex); + init_waitqueue_head(&desc[i].wait_for_threads); + desc_set_defaults(i, &desc[i], node, NULL, NULL); + irq_resend_init(&desc[i]); + } + return arch_early_irq_init(); +} + +struct irq_desc *irq_to_desc(unsigned int irq) +{ + return (irq < NR_IRQS) ? irq_desc + irq : NULL; +} +EXPORT_SYMBOL(irq_to_desc); + +static void free_desc(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + unsigned long flags; + + raw_spin_lock_irqsave(&desc->lock, flags); + desc_set_defaults(irq, desc, irq_desc_get_node(desc), NULL, NULL); + raw_spin_unlock_irqrestore(&desc->lock, flags); + delete_irq_desc(irq); +} + +static inline int alloc_descs(unsigned int start, unsigned int cnt, int node, + const struct irq_affinity_desc *affinity, + struct module *owner) +{ + u32 i; + + for (i = 0; i < cnt; i++) { + struct irq_desc *desc = irq_to_desc(start + i); + + desc->owner = owner; + irq_insert_desc(start + i, desc); + } + return start; +} + +static int irq_expand_nr_irqs(unsigned int nr) +{ + return -ENOMEM; +} + +void irq_mark_irq(unsigned int irq) +{ + mutex_lock(&sparse_irq_lock); + irq_insert_desc(irq, irq_desc + irq); + mutex_unlock(&sparse_irq_lock); +} + +#ifdef CONFIG_GENERIC_IRQ_LEGACY +void irq_init_desc(unsigned int irq) +{ + free_desc(irq); +} +#endif + +#endif /* !CONFIG_SPARSE_IRQ */ + +int handle_irq_desc(struct irq_desc *desc) +{ + struct irq_data *data; + + if (!desc) + return -EINVAL; + + data = irq_desc_get_irq_data(desc); + if (WARN_ON_ONCE(!in_hardirq() && handle_enforce_irqctx(data))) + return -EPERM; + + generic_handle_irq_desc(desc); + return 0; +} + +/** + * generic_handle_irq - Invoke the handler for a particular irq + * @irq: The irq number to handle + * + * Returns: 0 on success, or -EINVAL if conversion has failed + * + * This function must be called from an IRQ context with irq regs + * initialized. + */ +int generic_handle_irq(unsigned int irq) +{ + return handle_irq_desc(irq_to_desc(irq)); +} +EXPORT_SYMBOL_GPL(generic_handle_irq); + +/** + * generic_handle_irq_safe - Invoke the handler for a particular irq from any + * context. + * @irq: The irq number to handle + * + * Returns: 0 on success, a negative value on error. + * + * This function can be called from any context (IRQ or process context). It + * will report an error if not invoked from IRQ context and the irq has been + * marked to enforce IRQ-context only. + */ +int generic_handle_irq_safe(unsigned int irq) +{ + unsigned long flags; + int ret; + + local_irq_save(flags); + ret = handle_irq_desc(irq_to_desc(irq)); + local_irq_restore(flags); + return ret; +} +EXPORT_SYMBOL_GPL(generic_handle_irq_safe); + +#ifdef CONFIG_IRQ_DOMAIN +/** + * generic_handle_domain_irq - Invoke the handler for a HW irq belonging + * to a domain. + * @domain: The domain where to perform the lookup + * @hwirq: The HW irq number to convert to a logical one + * + * Returns: 0 on success, or -EINVAL if conversion has failed + * + * This function must be called from an IRQ context with irq regs + * initialized. + */ +int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq) +{ + return handle_irq_desc(irq_resolve_mapping(domain, hwirq)); +} +EXPORT_SYMBOL_GPL(generic_handle_domain_irq); + + /** + * generic_handle_irq_safe - Invoke the handler for a HW irq belonging + * to a domain from any context. + * @domain: The domain where to perform the lookup + * @hwirq: The HW irq number to convert to a logical one + * + * Returns: 0 on success, a negative value on error. + * + * This function can be called from any context (IRQ or process + * context). If the interrupt is marked as 'enforce IRQ-context only' then + * the function must be invoked from hard interrupt context. + */ +int generic_handle_domain_irq_safe(struct irq_domain *domain, unsigned int hwirq) +{ + unsigned long flags; + int ret; + + local_irq_save(flags); + ret = handle_irq_desc(irq_resolve_mapping(domain, hwirq)); + local_irq_restore(flags); + return ret; +} +EXPORT_SYMBOL_GPL(generic_handle_domain_irq_safe); + +/** + * generic_handle_domain_nmi - Invoke the handler for a HW nmi belonging + * to a domain. + * @domain: The domain where to perform the lookup + * @hwirq: The HW irq number to convert to a logical one + * + * Returns: 0 on success, or -EINVAL if conversion has failed + * + * This function must be called from an NMI context with irq regs + * initialized. + **/ +int generic_handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq) +{ + WARN_ON_ONCE(!in_nmi()); + return handle_irq_desc(irq_resolve_mapping(domain, hwirq)); +} +#endif + +/* Dynamic interrupt handling */ + +/** + * irq_free_descs - free irq descriptors + * @from: Start of descriptor range + * @cnt: Number of consecutive irqs to free + */ +void irq_free_descs(unsigned int from, unsigned int cnt) +{ + int i; + + if (from >= nr_irqs || (from + cnt) > nr_irqs) + return; + + mutex_lock(&sparse_irq_lock); + for (i = 0; i < cnt; i++) + free_desc(from + i); + + mutex_unlock(&sparse_irq_lock); +} +EXPORT_SYMBOL_GPL(irq_free_descs); + +/** + * __irq_alloc_descs - allocate and initialize a range of irq descriptors + * @irq: Allocate for specific irq number if irq >= 0 + * @from: Start the search from this irq number + * @cnt: Number of consecutive irqs to allocate. + * @node: Preferred node on which the irq descriptor should be allocated + * @owner: Owning module (can be NULL) + * @affinity: Optional pointer to an affinity mask array of size @cnt which + * hints where the irq descriptors should be allocated and which + * default affinities to use + * + * Returns the first irq number or error code + */ +int __ref +__irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, + struct module *owner, const struct irq_affinity_desc *affinity) +{ + int start, ret; + + if (!cnt) + return -EINVAL; + + if (irq >= 0) { + if (from > irq) + return -EINVAL; + from = irq; + } else { + /* + * For interrupts which are freely allocated the + * architecture can force a lower bound to the @from + * argument. x86 uses this to exclude the GSI space. + */ + from = arch_dynirq_lower_bound(from); + } + + mutex_lock(&sparse_irq_lock); + + start = irq_find_free_area(from, cnt); + ret = -EEXIST; + if (irq >=0 && start != irq) + goto unlock; + + if (start + cnt > nr_irqs) { + ret = irq_expand_nr_irqs(start + cnt); + if (ret) + goto unlock; + } + ret = alloc_descs(start, cnt, node, affinity, owner); +unlock: + mutex_unlock(&sparse_irq_lock); + return ret; +} +EXPORT_SYMBOL_GPL(__irq_alloc_descs); + +/** + * irq_get_next_irq - get next allocated irq number + * @offset: where to start the search + * + * Returns next irq number after offset or nr_irqs if none is found. + */ +unsigned int irq_get_next_irq(unsigned int offset) +{ + return irq_find_at_or_after(offset); +} + +struct irq_desc * +__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus, + unsigned int check) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (desc) { + if (check & _IRQ_DESC_CHECK) { + if ((check & _IRQ_DESC_PERCPU) && + !irq_settings_is_per_cpu_devid(desc)) + return NULL; + + if (!(check & _IRQ_DESC_PERCPU) && + irq_settings_is_per_cpu_devid(desc)) + return NULL; + } + + if (bus) + chip_bus_lock(desc); + raw_spin_lock_irqsave(&desc->lock, *flags); + } + return desc; +} + +void __irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags, bool bus) + __releases(&desc->lock) +{ + raw_spin_unlock_irqrestore(&desc->lock, flags); + if (bus) + chip_bus_sync_unlock(desc); +} + +int irq_set_percpu_devid_partition(unsigned int irq, + const struct cpumask *affinity) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (!desc) + return -EINVAL; + + if (desc->percpu_enabled) + return -EINVAL; + + desc->percpu_enabled = kzalloc(sizeof(*desc->percpu_enabled), GFP_KERNEL); + + if (!desc->percpu_enabled) + return -ENOMEM; + + if (affinity) + desc->percpu_affinity = affinity; + else + desc->percpu_affinity = cpu_possible_mask; + + irq_set_percpu_devid_flags(irq); + return 0; +} + +int irq_set_percpu_devid(unsigned int irq) +{ + return irq_set_percpu_devid_partition(irq, NULL); +} + +int irq_get_percpu_devid_partition(unsigned int irq, struct cpumask *affinity) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (!desc || !desc->percpu_enabled) + return -EINVAL; + + if (affinity) + cpumask_copy(affinity, desc->percpu_affinity); + + return 0; +} +EXPORT_SYMBOL_GPL(irq_get_percpu_devid_partition); + +void kstat_incr_irq_this_cpu(unsigned int irq) +{ + kstat_incr_irqs_this_cpu(irq_to_desc(irq)); +} + +/** + * kstat_irqs_cpu - Get the statistics for an interrupt on a cpu + * @irq: The interrupt number + * @cpu: The cpu number + * + * Returns the sum of interrupt counts on @cpu since boot for + * @irq. The caller must ensure that the interrupt is not removed + * concurrently. + */ +unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) +{ + struct irq_desc *desc = irq_to_desc(irq); + + return desc && desc->kstat_irqs ? + *per_cpu_ptr(desc->kstat_irqs, cpu) : 0; +} + +static bool irq_is_nmi(struct irq_desc *desc) +{ + return desc->istate & IRQS_NMI; +} + +static unsigned int kstat_irqs(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + unsigned int sum = 0; + int cpu; + + if (!desc || !desc->kstat_irqs) + return 0; + if (!irq_settings_is_per_cpu_devid(desc) && + !irq_settings_is_per_cpu(desc) && + !irq_is_nmi(desc)) + return data_race(desc->tot_count); + + for_each_possible_cpu(cpu) + sum += data_race(*per_cpu_ptr(desc->kstat_irqs, cpu)); + return sum; +} + +/** + * kstat_irqs_usr - Get the statistics for an interrupt from thread context + * @irq: The interrupt number + * + * Returns the sum of interrupt counts on all cpus since boot for @irq. + * + * It uses rcu to protect the access since a concurrent removal of an + * interrupt descriptor is observing an rcu grace period before + * delayed_free_desc()/irq_kobj_release(). + */ +unsigned int kstat_irqs_usr(unsigned int irq) +{ + unsigned int sum; + + rcu_read_lock(); + sum = kstat_irqs(irq); + rcu_read_unlock(); + return sum; +} + +#ifdef CONFIG_LOCKDEP +void __irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class, + struct lock_class_key *request_class) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (desc) { + lockdep_set_class(&desc->lock, lock_class); + lockdep_set_class(&desc->request_mutex, request_class); + } +} +EXPORT_SYMBOL_GPL(__irq_set_lockdep_class); +#endif diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c new file mode 100644 index 0000000000..0bdef4fe92 --- /dev/null +++ b/kernel/irq/irqdomain.c @@ -0,0 +1,1978 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define pr_fmt(fmt) "irq: " fmt + +#include <linux/acpi.h> +#include <linux/debugfs.h> +#include <linux/hardirq.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/irqdesc.h> +#include <linux/irqdomain.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> +#include <linux/topology.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/fs.h> + +static LIST_HEAD(irq_domain_list); +static DEFINE_MUTEX(irq_domain_mutex); + +static struct irq_domain *irq_default_domain; + +static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base, + unsigned int nr_irqs, int node, void *arg, + bool realloc, const struct irq_affinity_desc *affinity); +static void irq_domain_check_hierarchy(struct irq_domain *domain); + +struct irqchip_fwid { + struct fwnode_handle fwnode; + unsigned int type; + char *name; + phys_addr_t *pa; +}; + +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS +static void debugfs_add_domain_dir(struct irq_domain *d); +static void debugfs_remove_domain_dir(struct irq_domain *d); +#else +static inline void debugfs_add_domain_dir(struct irq_domain *d) { } +static inline void debugfs_remove_domain_dir(struct irq_domain *d) { } +#endif + +static const char *irqchip_fwnode_get_name(const struct fwnode_handle *fwnode) +{ + struct irqchip_fwid *fwid = container_of(fwnode, struct irqchip_fwid, fwnode); + + return fwid->name; +} + +const struct fwnode_operations irqchip_fwnode_ops = { + .get_name = irqchip_fwnode_get_name, +}; +EXPORT_SYMBOL_GPL(irqchip_fwnode_ops); + +/** + * __irq_domain_alloc_fwnode - Allocate a fwnode_handle suitable for + * identifying an irq domain + * @type: Type of irqchip_fwnode. See linux/irqdomain.h + * @id: Optional user provided id if name != NULL + * @name: Optional user provided domain name + * @pa: Optional user-provided physical address + * + * Allocate a struct irqchip_fwid, and return a pointer to the embedded + * fwnode_handle (or NULL on failure). + * + * Note: The types IRQCHIP_FWNODE_NAMED and IRQCHIP_FWNODE_NAMED_ID are + * solely to transport name information to irqdomain creation code. The + * node is not stored. For other types the pointer is kept in the irq + * domain struct. + */ +struct fwnode_handle *__irq_domain_alloc_fwnode(unsigned int type, int id, + const char *name, + phys_addr_t *pa) +{ + struct irqchip_fwid *fwid; + char *n; + + fwid = kzalloc(sizeof(*fwid), GFP_KERNEL); + + switch (type) { + case IRQCHIP_FWNODE_NAMED: + n = kasprintf(GFP_KERNEL, "%s", name); + break; + case IRQCHIP_FWNODE_NAMED_ID: + n = kasprintf(GFP_KERNEL, "%s-%d", name, id); + break; + default: + n = kasprintf(GFP_KERNEL, "irqchip@%pa", pa); + break; + } + + if (!fwid || !n) { + kfree(fwid); + kfree(n); + return NULL; + } + + fwid->type = type; + fwid->name = n; + fwid->pa = pa; + fwnode_init(&fwid->fwnode, &irqchip_fwnode_ops); + return &fwid->fwnode; +} +EXPORT_SYMBOL_GPL(__irq_domain_alloc_fwnode); + +/** + * irq_domain_free_fwnode - Free a non-OF-backed fwnode_handle + * + * Free a fwnode_handle allocated with irq_domain_alloc_fwnode. + */ +void irq_domain_free_fwnode(struct fwnode_handle *fwnode) +{ + struct irqchip_fwid *fwid; + + if (!fwnode || WARN_ON(!is_fwnode_irqchip(fwnode))) + return; + + fwid = container_of(fwnode, struct irqchip_fwid, fwnode); + kfree(fwid->name); + kfree(fwid); +} +EXPORT_SYMBOL_GPL(irq_domain_free_fwnode); + +static struct irq_domain *__irq_domain_create(struct fwnode_handle *fwnode, + unsigned int size, + irq_hw_number_t hwirq_max, + int direct_max, + const struct irq_domain_ops *ops, + void *host_data) +{ + struct irqchip_fwid *fwid; + struct irq_domain *domain; + + static atomic_t unknown_domains; + + if (WARN_ON((size && direct_max) || + (!IS_ENABLED(CONFIG_IRQ_DOMAIN_NOMAP) && direct_max) || + (direct_max && (direct_max != hwirq_max)))) + return NULL; + + domain = kzalloc_node(struct_size(domain, revmap, size), + GFP_KERNEL, of_node_to_nid(to_of_node(fwnode))); + if (!domain) + return NULL; + + if (is_fwnode_irqchip(fwnode)) { + fwid = container_of(fwnode, struct irqchip_fwid, fwnode); + + switch (fwid->type) { + case IRQCHIP_FWNODE_NAMED: + case IRQCHIP_FWNODE_NAMED_ID: + domain->fwnode = fwnode; + domain->name = kstrdup(fwid->name, GFP_KERNEL); + if (!domain->name) { + kfree(domain); + return NULL; + } + domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; + break; + default: + domain->fwnode = fwnode; + domain->name = fwid->name; + break; + } + } else if (is_of_node(fwnode) || is_acpi_device_node(fwnode) || + is_software_node(fwnode)) { + char *name; + + /* + * fwnode paths contain '/', which debugfs is legitimately + * unhappy about. Replace them with ':', which does + * the trick and is not as offensive as '\'... + */ + name = kasprintf(GFP_KERNEL, "%pfw", fwnode); + if (!name) { + kfree(domain); + return NULL; + } + + domain->name = strreplace(name, '/', ':'); + domain->fwnode = fwnode; + domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; + } + + if (!domain->name) { + if (fwnode) + pr_err("Invalid fwnode type for irqdomain\n"); + domain->name = kasprintf(GFP_KERNEL, "unknown-%d", + atomic_inc_return(&unknown_domains)); + if (!domain->name) { + kfree(domain); + return NULL; + } + domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; + } + + fwnode_handle_get(fwnode); + fwnode_dev_initialized(fwnode, true); + + /* Fill structure */ + INIT_RADIX_TREE(&domain->revmap_tree, GFP_KERNEL); + domain->ops = ops; + domain->host_data = host_data; + domain->hwirq_max = hwirq_max; + + if (direct_max) + domain->flags |= IRQ_DOMAIN_FLAG_NO_MAP; + + domain->revmap_size = size; + + /* + * Hierarchical domains use the domain lock of the root domain + * (innermost domain). + * + * For non-hierarchical domains (as for root domains), the root + * pointer is set to the domain itself so that &domain->root->mutex + * always points to the right lock. + */ + mutex_init(&domain->mutex); + domain->root = domain; + + irq_domain_check_hierarchy(domain); + + return domain; +} + +static void __irq_domain_publish(struct irq_domain *domain) +{ + mutex_lock(&irq_domain_mutex); + debugfs_add_domain_dir(domain); + list_add(&domain->link, &irq_domain_list); + mutex_unlock(&irq_domain_mutex); + + pr_debug("Added domain %s\n", domain->name); +} + +/** + * __irq_domain_add() - Allocate a new irq_domain data structure + * @fwnode: firmware node for the interrupt controller + * @size: Size of linear map; 0 for radix mapping only + * @hwirq_max: Maximum number of interrupts supported by controller + * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no + * direct mapping + * @ops: domain callbacks + * @host_data: Controller private data pointer + * + * Allocates and initializes an irq_domain structure. + * Returns pointer to IRQ domain, or NULL on failure. + */ +struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size, + irq_hw_number_t hwirq_max, int direct_max, + const struct irq_domain_ops *ops, + void *host_data) +{ + struct irq_domain *domain; + + domain = __irq_domain_create(fwnode, size, hwirq_max, direct_max, + ops, host_data); + if (domain) + __irq_domain_publish(domain); + + return domain; +} +EXPORT_SYMBOL_GPL(__irq_domain_add); + +/** + * irq_domain_remove() - Remove an irq domain. + * @domain: domain to remove + * + * This routine is used to remove an irq domain. The caller must ensure + * that all mappings within the domain have been disposed of prior to + * use, depending on the revmap type. + */ +void irq_domain_remove(struct irq_domain *domain) +{ + mutex_lock(&irq_domain_mutex); + debugfs_remove_domain_dir(domain); + + WARN_ON(!radix_tree_empty(&domain->revmap_tree)); + + list_del(&domain->link); + + /* + * If the going away domain is the default one, reset it. + */ + if (unlikely(irq_default_domain == domain)) + irq_set_default_host(NULL); + + mutex_unlock(&irq_domain_mutex); + + pr_debug("Removed domain %s\n", domain->name); + + fwnode_dev_initialized(domain->fwnode, false); + fwnode_handle_put(domain->fwnode); + if (domain->flags & IRQ_DOMAIN_NAME_ALLOCATED) + kfree(domain->name); + kfree(domain); +} +EXPORT_SYMBOL_GPL(irq_domain_remove); + +void irq_domain_update_bus_token(struct irq_domain *domain, + enum irq_domain_bus_token bus_token) +{ + char *name; + + if (domain->bus_token == bus_token) + return; + + mutex_lock(&irq_domain_mutex); + + domain->bus_token = bus_token; + + name = kasprintf(GFP_KERNEL, "%s-%d", domain->name, bus_token); + if (!name) { + mutex_unlock(&irq_domain_mutex); + return; + } + + debugfs_remove_domain_dir(domain); + + if (domain->flags & IRQ_DOMAIN_NAME_ALLOCATED) + kfree(domain->name); + else + domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; + + domain->name = name; + debugfs_add_domain_dir(domain); + + mutex_unlock(&irq_domain_mutex); +} +EXPORT_SYMBOL_GPL(irq_domain_update_bus_token); + +/** + * irq_domain_create_simple() - Register an irq_domain and optionally map a range of irqs + * @fwnode: firmware node for the interrupt controller + * @size: total number of irqs in mapping + * @first_irq: first number of irq block assigned to the domain, + * pass zero to assign irqs on-the-fly. If first_irq is non-zero, then + * pre-map all of the irqs in the domain to virqs starting at first_irq. + * @ops: domain callbacks + * @host_data: Controller private data pointer + * + * Allocates an irq_domain, and optionally if first_irq is positive then also + * allocate irq_descs and map all of the hwirqs to virqs starting at first_irq. + * + * This is intended to implement the expected behaviour for most + * interrupt controllers. If device tree is used, then first_irq will be 0 and + * irqs get mapped dynamically on the fly. However, if the controller requires + * static virq assignments (non-DT boot) then it will set that up correctly. + */ +struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode, + unsigned int size, + unsigned int first_irq, + const struct irq_domain_ops *ops, + void *host_data) +{ + struct irq_domain *domain; + + domain = __irq_domain_add(fwnode, size, size, 0, ops, host_data); + if (!domain) + return NULL; + + if (first_irq > 0) { + if (IS_ENABLED(CONFIG_SPARSE_IRQ)) { + /* attempt to allocated irq_descs */ + int rc = irq_alloc_descs(first_irq, first_irq, size, + of_node_to_nid(to_of_node(fwnode))); + if (rc < 0) + pr_info("Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n", + first_irq); + } + irq_domain_associate_many(domain, first_irq, 0, size); + } + + return domain; +} +EXPORT_SYMBOL_GPL(irq_domain_create_simple); + +/** + * irq_domain_add_legacy() - Allocate and register a legacy revmap irq_domain. + * @of_node: pointer to interrupt controller's device tree node. + * @size: total number of irqs in legacy mapping + * @first_irq: first number of irq block assigned to the domain + * @first_hwirq: first hwirq number to use for the translation. Should normally + * be '0', but a positive integer can be used if the effective + * hwirqs numbering does not begin at zero. + * @ops: map/unmap domain callbacks + * @host_data: Controller private data pointer + * + * Note: the map() callback will be called before this function returns + * for all legacy interrupts except 0 (which is always the invalid irq for + * a legacy controller). + */ +struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, + unsigned int size, + unsigned int first_irq, + irq_hw_number_t first_hwirq, + const struct irq_domain_ops *ops, + void *host_data) +{ + return irq_domain_create_legacy(of_node_to_fwnode(of_node), size, + first_irq, first_hwirq, ops, host_data); +} +EXPORT_SYMBOL_GPL(irq_domain_add_legacy); + +struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode, + unsigned int size, + unsigned int first_irq, + irq_hw_number_t first_hwirq, + const struct irq_domain_ops *ops, + void *host_data) +{ + struct irq_domain *domain; + + domain = __irq_domain_add(fwnode, first_hwirq + size, first_hwirq + size, 0, ops, host_data); + if (domain) + irq_domain_associate_many(domain, first_irq, first_hwirq, size); + + return domain; +} +EXPORT_SYMBOL_GPL(irq_domain_create_legacy); + +/** + * irq_find_matching_fwspec() - Locates a domain for a given fwspec + * @fwspec: FW specifier for an interrupt + * @bus_token: domain-specific data + */ +struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, + enum irq_domain_bus_token bus_token) +{ + struct irq_domain *h, *found = NULL; + struct fwnode_handle *fwnode = fwspec->fwnode; + int rc; + + /* We might want to match the legacy controller last since + * it might potentially be set to match all interrupts in + * the absence of a device node. This isn't a problem so far + * yet though... + * + * bus_token == DOMAIN_BUS_ANY matches any domain, any other + * values must generate an exact match for the domain to be + * selected. + */ + mutex_lock(&irq_domain_mutex); + list_for_each_entry(h, &irq_domain_list, link) { + if (h->ops->select && fwspec->param_count) + rc = h->ops->select(h, fwspec, bus_token); + else if (h->ops->match) + rc = h->ops->match(h, to_of_node(fwnode), bus_token); + else + rc = ((fwnode != NULL) && (h->fwnode == fwnode) && + ((bus_token == DOMAIN_BUS_ANY) || + (h->bus_token == bus_token))); + + if (rc) { + found = h; + break; + } + } + mutex_unlock(&irq_domain_mutex); + return found; +} +EXPORT_SYMBOL_GPL(irq_find_matching_fwspec); + +/** + * irq_set_default_host() - Set a "default" irq domain + * @domain: default domain pointer + * + * For convenience, it's possible to set a "default" domain that will be used + * whenever NULL is passed to irq_create_mapping(). It makes life easier for + * platforms that want to manipulate a few hard coded interrupt numbers that + * aren't properly represented in the device-tree. + */ +void irq_set_default_host(struct irq_domain *domain) +{ + pr_debug("Default domain set to @0x%p\n", domain); + + irq_default_domain = domain; +} +EXPORT_SYMBOL_GPL(irq_set_default_host); + +/** + * irq_get_default_host() - Retrieve the "default" irq domain + * + * Returns: the default domain, if any. + * + * Modern code should never use this. This should only be used on + * systems that cannot implement a firmware->fwnode mapping (which + * both DT and ACPI provide). + */ +struct irq_domain *irq_get_default_host(void) +{ + return irq_default_domain; +} +EXPORT_SYMBOL_GPL(irq_get_default_host); + +static bool irq_domain_is_nomap(struct irq_domain *domain) +{ + return IS_ENABLED(CONFIG_IRQ_DOMAIN_NOMAP) && + (domain->flags & IRQ_DOMAIN_FLAG_NO_MAP); +} + +static void irq_domain_clear_mapping(struct irq_domain *domain, + irq_hw_number_t hwirq) +{ + lockdep_assert_held(&domain->root->mutex); + + if (irq_domain_is_nomap(domain)) + return; + + if (hwirq < domain->revmap_size) + rcu_assign_pointer(domain->revmap[hwirq], NULL); + else + radix_tree_delete(&domain->revmap_tree, hwirq); +} + +static void irq_domain_set_mapping(struct irq_domain *domain, + irq_hw_number_t hwirq, + struct irq_data *irq_data) +{ + /* + * This also makes sure that all domains point to the same root when + * called from irq_domain_insert_irq() for each domain in a hierarchy. + */ + lockdep_assert_held(&domain->root->mutex); + + if (irq_domain_is_nomap(domain)) + return; + + if (hwirq < domain->revmap_size) + rcu_assign_pointer(domain->revmap[hwirq], irq_data); + else + radix_tree_insert(&domain->revmap_tree, hwirq, irq_data); +} + +static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq) +{ + struct irq_data *irq_data = irq_get_irq_data(irq); + irq_hw_number_t hwirq; + + if (WARN(!irq_data || irq_data->domain != domain, + "virq%i doesn't exist; cannot disassociate\n", irq)) + return; + + hwirq = irq_data->hwirq; + + mutex_lock(&domain->root->mutex); + + irq_set_status_flags(irq, IRQ_NOREQUEST); + + /* remove chip and handler */ + irq_set_chip_and_handler(irq, NULL, NULL); + + /* Make sure it's completed */ + synchronize_irq(irq); + + /* Tell the PIC about it */ + if (domain->ops->unmap) + domain->ops->unmap(domain, irq); + smp_mb(); + + irq_data->domain = NULL; + irq_data->hwirq = 0; + domain->mapcount--; + + /* Clear reverse map for this hwirq */ + irq_domain_clear_mapping(domain, hwirq); + + mutex_unlock(&domain->root->mutex); +} + +static int irq_domain_associate_locked(struct irq_domain *domain, unsigned int virq, + irq_hw_number_t hwirq) +{ + struct irq_data *irq_data = irq_get_irq_data(virq); + int ret; + + if (WARN(hwirq >= domain->hwirq_max, + "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain->name)) + return -EINVAL; + if (WARN(!irq_data, "error: virq%i is not allocated", virq)) + return -EINVAL; + if (WARN(irq_data->domain, "error: virq%i is already associated", virq)) + return -EINVAL; + + irq_data->hwirq = hwirq; + irq_data->domain = domain; + if (domain->ops->map) { + ret = domain->ops->map(domain, virq, hwirq); + if (ret != 0) { + /* + * If map() returns -EPERM, this interrupt is protected + * by the firmware or some other service and shall not + * be mapped. Don't bother telling the user about it. + */ + if (ret != -EPERM) { + pr_info("%s didn't like hwirq-0x%lx to VIRQ%i mapping (rc=%d)\n", + domain->name, hwirq, virq, ret); + } + irq_data->domain = NULL; + irq_data->hwirq = 0; + return ret; + } + } + + domain->mapcount++; + irq_domain_set_mapping(domain, hwirq, irq_data); + + irq_clear_status_flags(virq, IRQ_NOREQUEST); + + return 0; +} + +int irq_domain_associate(struct irq_domain *domain, unsigned int virq, + irq_hw_number_t hwirq) +{ + int ret; + + mutex_lock(&domain->root->mutex); + ret = irq_domain_associate_locked(domain, virq, hwirq); + mutex_unlock(&domain->root->mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(irq_domain_associate); + +void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base, + irq_hw_number_t hwirq_base, int count) +{ + struct device_node *of_node; + int i; + + of_node = irq_domain_get_of_node(domain); + pr_debug("%s(%s, irqbase=%i, hwbase=%i, count=%i)\n", __func__, + of_node_full_name(of_node), irq_base, (int)hwirq_base, count); + + for (i = 0; i < count; i++) + irq_domain_associate(domain, irq_base + i, hwirq_base + i); +} +EXPORT_SYMBOL_GPL(irq_domain_associate_many); + +#ifdef CONFIG_IRQ_DOMAIN_NOMAP +/** + * irq_create_direct_mapping() - Allocate an irq for direct mapping + * @domain: domain to allocate the irq for or NULL for default domain + * + * This routine is used for irq controllers which can choose the hardware + * interrupt numbers they generate. In such a case it's simplest to use + * the linux irq as the hardware interrupt number. It still uses the linear + * or radix tree to store the mapping, but the irq controller can optimize + * the revmap path by using the hwirq directly. + */ +unsigned int irq_create_direct_mapping(struct irq_domain *domain) +{ + struct device_node *of_node; + unsigned int virq; + + if (domain == NULL) + domain = irq_default_domain; + + of_node = irq_domain_get_of_node(domain); + virq = irq_alloc_desc_from(1, of_node_to_nid(of_node)); + if (!virq) { + pr_debug("create_direct virq allocation failed\n"); + return 0; + } + if (virq >= domain->hwirq_max) { + pr_err("ERROR: no free irqs available below %lu maximum\n", + domain->hwirq_max); + irq_free_desc(virq); + return 0; + } + pr_debug("create_direct obtained virq %d\n", virq); + + if (irq_domain_associate(domain, virq, virq)) { + irq_free_desc(virq); + return 0; + } + + return virq; +} +EXPORT_SYMBOL_GPL(irq_create_direct_mapping); +#endif + +static unsigned int irq_create_mapping_affinity_locked(struct irq_domain *domain, + irq_hw_number_t hwirq, + const struct irq_affinity_desc *affinity) +{ + struct device_node *of_node = irq_domain_get_of_node(domain); + int virq; + + pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq); + + /* Allocate a virtual interrupt number */ + virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node), + affinity); + if (virq <= 0) { + pr_debug("-> virq allocation failed\n"); + return 0; + } + + if (irq_domain_associate_locked(domain, virq, hwirq)) { + irq_free_desc(virq); + return 0; + } + + pr_debug("irq %lu on domain %s mapped to virtual irq %u\n", + hwirq, of_node_full_name(of_node), virq); + + return virq; +} + +/** + * irq_create_mapping_affinity() - Map a hardware interrupt into linux irq space + * @domain: domain owning this hardware interrupt or NULL for default domain + * @hwirq: hardware irq number in that domain space + * @affinity: irq affinity + * + * Only one mapping per hardware interrupt is permitted. Returns a linux + * irq number. + * If the sense/trigger is to be specified, set_irq_type() should be called + * on the number returned from that call. + */ +unsigned int irq_create_mapping_affinity(struct irq_domain *domain, + irq_hw_number_t hwirq, + const struct irq_affinity_desc *affinity) +{ + int virq; + + /* Look for default domain if necessary */ + if (domain == NULL) + domain = irq_default_domain; + if (domain == NULL) { + WARN(1, "%s(, %lx) called with NULL domain\n", __func__, hwirq); + return 0; + } + + mutex_lock(&domain->root->mutex); + + /* Check if mapping already exists */ + virq = irq_find_mapping(domain, hwirq); + if (virq) { + pr_debug("existing mapping on virq %d\n", virq); + goto out; + } + + virq = irq_create_mapping_affinity_locked(domain, hwirq, affinity); +out: + mutex_unlock(&domain->root->mutex); + + return virq; +} +EXPORT_SYMBOL_GPL(irq_create_mapping_affinity); + +static int irq_domain_translate(struct irq_domain *d, + struct irq_fwspec *fwspec, + irq_hw_number_t *hwirq, unsigned int *type) +{ +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + if (d->ops->translate) + return d->ops->translate(d, fwspec, hwirq, type); +#endif + if (d->ops->xlate) + return d->ops->xlate(d, to_of_node(fwspec->fwnode), + fwspec->param, fwspec->param_count, + hwirq, type); + + /* If domain has no translation, then we assume interrupt line */ + *hwirq = fwspec->param[0]; + return 0; +} + +void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, + unsigned int count, struct irq_fwspec *fwspec) +{ + int i; + + fwspec->fwnode = of_node_to_fwnode(np); + fwspec->param_count = count; + + for (i = 0; i < count; i++) + fwspec->param[i] = args[i]; +} +EXPORT_SYMBOL_GPL(of_phandle_args_to_fwspec); + +unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec) +{ + struct irq_domain *domain; + struct irq_data *irq_data; + irq_hw_number_t hwirq; + unsigned int type = IRQ_TYPE_NONE; + int virq; + + if (fwspec->fwnode) { + domain = irq_find_matching_fwspec(fwspec, DOMAIN_BUS_WIRED); + if (!domain) + domain = irq_find_matching_fwspec(fwspec, DOMAIN_BUS_ANY); + } else { + domain = irq_default_domain; + } + + if (!domain) { + pr_warn("no irq domain found for %s !\n", + of_node_full_name(to_of_node(fwspec->fwnode))); + return 0; + } + + if (irq_domain_translate(domain, fwspec, &hwirq, &type)) + return 0; + + /* + * WARN if the irqchip returns a type with bits + * outside the sense mask set and clear these bits. + */ + if (WARN_ON(type & ~IRQ_TYPE_SENSE_MASK)) + type &= IRQ_TYPE_SENSE_MASK; + + mutex_lock(&domain->root->mutex); + + /* + * If we've already configured this interrupt, + * don't do it again, or hell will break loose. + */ + virq = irq_find_mapping(domain, hwirq); + if (virq) { + /* + * If the trigger type is not specified or matches the + * current trigger type then we are done so return the + * interrupt number. + */ + if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq)) + goto out; + + /* + * If the trigger type has not been set yet, then set + * it now and return the interrupt number. + */ + if (irq_get_trigger_type(virq) == IRQ_TYPE_NONE) { + irq_data = irq_get_irq_data(virq); + if (!irq_data) { + virq = 0; + goto out; + } + + irqd_set_trigger_type(irq_data, type); + goto out; + } + + pr_warn("type mismatch, failed to map hwirq-%lu for %s!\n", + hwirq, of_node_full_name(to_of_node(fwspec->fwnode))); + virq = 0; + goto out; + } + + if (irq_domain_is_hierarchy(domain)) { + virq = irq_domain_alloc_irqs_locked(domain, -1, 1, NUMA_NO_NODE, + fwspec, false, NULL); + if (virq <= 0) { + virq = 0; + goto out; + } + } else { + /* Create mapping */ + virq = irq_create_mapping_affinity_locked(domain, hwirq, NULL); + if (!virq) + goto out; + } + + irq_data = irq_get_irq_data(virq); + if (WARN_ON(!irq_data)) { + virq = 0; + goto out; + } + + /* Store trigger type */ + irqd_set_trigger_type(irq_data, type); +out: + mutex_unlock(&domain->root->mutex); + + return virq; +} +EXPORT_SYMBOL_GPL(irq_create_fwspec_mapping); + +unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data) +{ + struct irq_fwspec fwspec; + + of_phandle_args_to_fwspec(irq_data->np, irq_data->args, + irq_data->args_count, &fwspec); + + return irq_create_fwspec_mapping(&fwspec); +} +EXPORT_SYMBOL_GPL(irq_create_of_mapping); + +/** + * irq_dispose_mapping() - Unmap an interrupt + * @virq: linux irq number of the interrupt to unmap + */ +void irq_dispose_mapping(unsigned int virq) +{ + struct irq_data *irq_data = irq_get_irq_data(virq); + struct irq_domain *domain; + + if (!virq || !irq_data) + return; + + domain = irq_data->domain; + if (WARN_ON(domain == NULL)) + return; + + if (irq_domain_is_hierarchy(domain)) { + irq_domain_free_irqs(virq, 1); + } else { + irq_domain_disassociate(domain, virq); + irq_free_desc(virq); + } +} +EXPORT_SYMBOL_GPL(irq_dispose_mapping); + +/** + * __irq_resolve_mapping() - Find a linux irq from a hw irq number. + * @domain: domain owning this hardware interrupt + * @hwirq: hardware irq number in that domain space + * @irq: optional pointer to return the Linux irq if required + * + * Returns the interrupt descriptor. + */ +struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain, + irq_hw_number_t hwirq, + unsigned int *irq) +{ + struct irq_desc *desc = NULL; + struct irq_data *data; + + /* Look for default domain if necessary */ + if (domain == NULL) + domain = irq_default_domain; + if (domain == NULL) + return desc; + + if (irq_domain_is_nomap(domain)) { + if (hwirq < domain->hwirq_max) { + data = irq_domain_get_irq_data(domain, hwirq); + if (data && data->hwirq == hwirq) + desc = irq_data_to_desc(data); + if (irq && desc) + *irq = hwirq; + } + + return desc; + } + + rcu_read_lock(); + /* Check if the hwirq is in the linear revmap. */ + if (hwirq < domain->revmap_size) + data = rcu_dereference(domain->revmap[hwirq]); + else + data = radix_tree_lookup(&domain->revmap_tree, hwirq); + + if (likely(data)) { + desc = irq_data_to_desc(data); + if (irq) + *irq = data->irq; + } + + rcu_read_unlock(); + return desc; +} +EXPORT_SYMBOL_GPL(__irq_resolve_mapping); + +/** + * irq_domain_xlate_onecell() - Generic xlate for direct one cell bindings + * + * Device Tree IRQ specifier translation function which works with one cell + * bindings where the cell value maps directly to the hwirq number. + */ +int irq_domain_xlate_onecell(struct irq_domain *d, struct device_node *ctrlr, + const u32 *intspec, unsigned int intsize, + unsigned long *out_hwirq, unsigned int *out_type) +{ + if (WARN_ON(intsize < 1)) + return -EINVAL; + *out_hwirq = intspec[0]; + *out_type = IRQ_TYPE_NONE; + return 0; +} +EXPORT_SYMBOL_GPL(irq_domain_xlate_onecell); + +/** + * irq_domain_xlate_twocell() - Generic xlate for direct two cell bindings + * + * Device Tree IRQ specifier translation function which works with two cell + * bindings where the cell values map directly to the hwirq number + * and linux irq flags. + */ +int irq_domain_xlate_twocell(struct irq_domain *d, struct device_node *ctrlr, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_type) +{ + struct irq_fwspec fwspec; + + of_phandle_args_to_fwspec(ctrlr, intspec, intsize, &fwspec); + return irq_domain_translate_twocell(d, &fwspec, out_hwirq, out_type); +} +EXPORT_SYMBOL_GPL(irq_domain_xlate_twocell); + +/** + * irq_domain_xlate_onetwocell() - Generic xlate for one or two cell bindings + * + * Device Tree IRQ specifier translation function which works with either one + * or two cell bindings where the cell values map directly to the hwirq number + * and linux irq flags. + * + * Note: don't use this function unless your interrupt controller explicitly + * supports both one and two cell bindings. For the majority of controllers + * the _onecell() or _twocell() variants above should be used. + */ +int irq_domain_xlate_onetwocell(struct irq_domain *d, + struct device_node *ctrlr, + const u32 *intspec, unsigned int intsize, + unsigned long *out_hwirq, unsigned int *out_type) +{ + if (WARN_ON(intsize < 1)) + return -EINVAL; + *out_hwirq = intspec[0]; + if (intsize > 1) + *out_type = intspec[1] & IRQ_TYPE_SENSE_MASK; + else + *out_type = IRQ_TYPE_NONE; + return 0; +} +EXPORT_SYMBOL_GPL(irq_domain_xlate_onetwocell); + +const struct irq_domain_ops irq_domain_simple_ops = { + .xlate = irq_domain_xlate_onetwocell, +}; +EXPORT_SYMBOL_GPL(irq_domain_simple_ops); + +/** + * irq_domain_translate_onecell() - Generic translate for direct one cell + * bindings + */ +int irq_domain_translate_onecell(struct irq_domain *d, + struct irq_fwspec *fwspec, + unsigned long *out_hwirq, + unsigned int *out_type) +{ + if (WARN_ON(fwspec->param_count < 1)) + return -EINVAL; + *out_hwirq = fwspec->param[0]; + *out_type = IRQ_TYPE_NONE; + return 0; +} +EXPORT_SYMBOL_GPL(irq_domain_translate_onecell); + +/** + * irq_domain_translate_twocell() - Generic translate for direct two cell + * bindings + * + * Device Tree IRQ specifier translation function which works with two cell + * bindings where the cell values map directly to the hwirq number + * and linux irq flags. + */ +int irq_domain_translate_twocell(struct irq_domain *d, + struct irq_fwspec *fwspec, + unsigned long *out_hwirq, + unsigned int *out_type) +{ + if (WARN_ON(fwspec->param_count < 2)) + return -EINVAL; + *out_hwirq = fwspec->param[0]; + *out_type = fwspec->param[1] & IRQ_TYPE_SENSE_MASK; + return 0; +} +EXPORT_SYMBOL_GPL(irq_domain_translate_twocell); + +int irq_domain_alloc_descs(int virq, unsigned int cnt, irq_hw_number_t hwirq, + int node, const struct irq_affinity_desc *affinity) +{ + unsigned int hint; + + if (virq >= 0) { + virq = __irq_alloc_descs(virq, virq, cnt, node, THIS_MODULE, + affinity); + } else { + hint = hwirq % nr_irqs; + if (hint == 0) + hint++; + virq = __irq_alloc_descs(-1, hint, cnt, node, THIS_MODULE, + affinity); + if (virq <= 0 && hint > 1) { + virq = __irq_alloc_descs(-1, 1, cnt, node, THIS_MODULE, + affinity); + } + } + + return virq; +} + +/** + * irq_domain_reset_irq_data - Clear hwirq, chip and chip_data in @irq_data + * @irq_data: The pointer to irq_data + */ +void irq_domain_reset_irq_data(struct irq_data *irq_data) +{ + irq_data->hwirq = 0; + irq_data->chip = &no_irq_chip; + irq_data->chip_data = NULL; +} +EXPORT_SYMBOL_GPL(irq_domain_reset_irq_data); + +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY +/** + * irq_domain_create_hierarchy - Add a irqdomain into the hierarchy + * @parent: Parent irq domain to associate with the new domain + * @flags: Irq domain flags associated to the domain + * @size: Size of the domain. See below + * @fwnode: Optional fwnode of the interrupt controller + * @ops: Pointer to the interrupt domain callbacks + * @host_data: Controller private data pointer + * + * If @size is 0 a tree domain is created, otherwise a linear domain. + * + * If successful the parent is associated to the new domain and the + * domain flags are set. + * Returns pointer to IRQ domain, or NULL on failure. + */ +struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent, + unsigned int flags, + unsigned int size, + struct fwnode_handle *fwnode, + const struct irq_domain_ops *ops, + void *host_data) +{ + struct irq_domain *domain; + + if (size) + domain = __irq_domain_create(fwnode, size, size, 0, ops, host_data); + else + domain = __irq_domain_create(fwnode, 0, ~0, 0, ops, host_data); + + if (domain) { + if (parent) + domain->root = parent->root; + domain->parent = parent; + domain->flags |= flags; + + __irq_domain_publish(domain); + } + + return domain; +} +EXPORT_SYMBOL_GPL(irq_domain_create_hierarchy); + +static void irq_domain_insert_irq(int virq) +{ + struct irq_data *data; + + for (data = irq_get_irq_data(virq); data; data = data->parent_data) { + struct irq_domain *domain = data->domain; + + domain->mapcount++; + irq_domain_set_mapping(domain, data->hwirq, data); + } + + irq_clear_status_flags(virq, IRQ_NOREQUEST); +} + +static void irq_domain_remove_irq(int virq) +{ + struct irq_data *data; + + irq_set_status_flags(virq, IRQ_NOREQUEST); + irq_set_chip_and_handler(virq, NULL, NULL); + synchronize_irq(virq); + smp_mb(); + + for (data = irq_get_irq_data(virq); data; data = data->parent_data) { + struct irq_domain *domain = data->domain; + irq_hw_number_t hwirq = data->hwirq; + + domain->mapcount--; + irq_domain_clear_mapping(domain, hwirq); + } +} + +static struct irq_data *irq_domain_insert_irq_data(struct irq_domain *domain, + struct irq_data *child) +{ + struct irq_data *irq_data; + + irq_data = kzalloc_node(sizeof(*irq_data), GFP_KERNEL, + irq_data_get_node(child)); + if (irq_data) { + child->parent_data = irq_data; + irq_data->irq = child->irq; + irq_data->common = child->common; + irq_data->domain = domain; + } + + return irq_data; +} + +static void __irq_domain_free_hierarchy(struct irq_data *irq_data) +{ + struct irq_data *tmp; + + while (irq_data) { + tmp = irq_data; + irq_data = irq_data->parent_data; + kfree(tmp); + } +} + +static void irq_domain_free_irq_data(unsigned int virq, unsigned int nr_irqs) +{ + struct irq_data *irq_data, *tmp; + int i; + + for (i = 0; i < nr_irqs; i++) { + irq_data = irq_get_irq_data(virq + i); + tmp = irq_data->parent_data; + irq_data->parent_data = NULL; + irq_data->domain = NULL; + + __irq_domain_free_hierarchy(tmp); + } +} + +/** + * irq_domain_disconnect_hierarchy - Mark the first unused level of a hierarchy + * @domain: IRQ domain from which the hierarchy is to be disconnected + * @virq: IRQ number where the hierarchy is to be trimmed + * + * Marks the @virq level belonging to @domain as disconnected. + * Returns -EINVAL if @virq doesn't have a valid irq_data pointing + * to @domain. + * + * Its only use is to be able to trim levels of hierarchy that do not + * have any real meaning for this interrupt, and that the driver marks + * as such from its .alloc() callback. + */ +int irq_domain_disconnect_hierarchy(struct irq_domain *domain, + unsigned int virq) +{ + struct irq_data *irqd; + + irqd = irq_domain_get_irq_data(domain, virq); + if (!irqd) + return -EINVAL; + + irqd->chip = ERR_PTR(-ENOTCONN); + return 0; +} +EXPORT_SYMBOL_GPL(irq_domain_disconnect_hierarchy); + +static int irq_domain_trim_hierarchy(unsigned int virq) +{ + struct irq_data *tail, *irqd, *irq_data; + + irq_data = irq_get_irq_data(virq); + tail = NULL; + + /* The first entry must have a valid irqchip */ + if (!irq_data->chip || IS_ERR(irq_data->chip)) + return -EINVAL; + + /* + * Validate that the irq_data chain is sane in the presence of + * a hierarchy trimming marker. + */ + for (irqd = irq_data->parent_data; irqd; irq_data = irqd, irqd = irqd->parent_data) { + /* Can't have a valid irqchip after a trim marker */ + if (irqd->chip && tail) + return -EINVAL; + + /* Can't have an empty irqchip before a trim marker */ + if (!irqd->chip && !tail) + return -EINVAL; + + if (IS_ERR(irqd->chip)) { + /* Only -ENOTCONN is a valid trim marker */ + if (PTR_ERR(irqd->chip) != -ENOTCONN) + return -EINVAL; + + tail = irq_data; + } + } + + /* No trim marker, nothing to do */ + if (!tail) + return 0; + + pr_info("IRQ%d: trimming hierarchy from %s\n", + virq, tail->parent_data->domain->name); + + /* Sever the inner part of the hierarchy... */ + irqd = tail; + tail = tail->parent_data; + irqd->parent_data = NULL; + __irq_domain_free_hierarchy(tail); + + return 0; +} + +static int irq_domain_alloc_irq_data(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs) +{ + struct irq_data *irq_data; + struct irq_domain *parent; + int i; + + /* The outermost irq_data is embedded in struct irq_desc */ + for (i = 0; i < nr_irqs; i++) { + irq_data = irq_get_irq_data(virq + i); + irq_data->domain = domain; + + for (parent = domain->parent; parent; parent = parent->parent) { + irq_data = irq_domain_insert_irq_data(parent, irq_data); + if (!irq_data) { + irq_domain_free_irq_data(virq, i + 1); + return -ENOMEM; + } + } + } + + return 0; +} + +/** + * irq_domain_get_irq_data - Get irq_data associated with @virq and @domain + * @domain: domain to match + * @virq: IRQ number to get irq_data + */ +struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, + unsigned int virq) +{ + struct irq_data *irq_data; + + for (irq_data = irq_get_irq_data(virq); irq_data; + irq_data = irq_data->parent_data) + if (irq_data->domain == domain) + return irq_data; + + return NULL; +} +EXPORT_SYMBOL_GPL(irq_domain_get_irq_data); + +/** + * irq_domain_set_hwirq_and_chip - Set hwirq and irqchip of @virq at @domain + * @domain: Interrupt domain to match + * @virq: IRQ number + * @hwirq: The hwirq number + * @chip: The associated interrupt chip + * @chip_data: The associated chip data + */ +int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq, + irq_hw_number_t hwirq, + const struct irq_chip *chip, + void *chip_data) +{ + struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq); + + if (!irq_data) + return -ENOENT; + + irq_data->hwirq = hwirq; + irq_data->chip = (struct irq_chip *)(chip ? chip : &no_irq_chip); + irq_data->chip_data = chip_data; + + return 0; +} +EXPORT_SYMBOL_GPL(irq_domain_set_hwirq_and_chip); + +/** + * irq_domain_set_info - Set the complete data for a @virq in @domain + * @domain: Interrupt domain to match + * @virq: IRQ number + * @hwirq: The hardware interrupt number + * @chip: The associated interrupt chip + * @chip_data: The associated interrupt chip data + * @handler: The interrupt flow handler + * @handler_data: The interrupt flow handler data + * @handler_name: The interrupt handler name + */ +void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, + irq_hw_number_t hwirq, const struct irq_chip *chip, + void *chip_data, irq_flow_handler_t handler, + void *handler_data, const char *handler_name) +{ + irq_domain_set_hwirq_and_chip(domain, virq, hwirq, chip, chip_data); + __irq_set_handler(virq, handler, 0, handler_name); + irq_set_handler_data(virq, handler_data); +} +EXPORT_SYMBOL(irq_domain_set_info); + +/** + * irq_domain_free_irqs_common - Clear irq_data and free the parent + * @domain: Interrupt domain to match + * @virq: IRQ number to start with + * @nr_irqs: The number of irqs to free + */ +void irq_domain_free_irqs_common(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + struct irq_data *irq_data; + int i; + + for (i = 0; i < nr_irqs; i++) { + irq_data = irq_domain_get_irq_data(domain, virq + i); + if (irq_data) + irq_domain_reset_irq_data(irq_data); + } + irq_domain_free_irqs_parent(domain, virq, nr_irqs); +} +EXPORT_SYMBOL_GPL(irq_domain_free_irqs_common); + +/** + * irq_domain_free_irqs_top - Clear handler and handler data, clear irqdata and free parent + * @domain: Interrupt domain to match + * @virq: IRQ number to start with + * @nr_irqs: The number of irqs to free + */ +void irq_domain_free_irqs_top(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + int i; + + for (i = 0; i < nr_irqs; i++) { + irq_set_handler_data(virq + i, NULL); + irq_set_handler(virq + i, NULL); + } + irq_domain_free_irqs_common(domain, virq, nr_irqs); +} + +static void irq_domain_free_irqs_hierarchy(struct irq_domain *domain, + unsigned int irq_base, + unsigned int nr_irqs) +{ + unsigned int i; + + if (!domain->ops->free) + return; + + for (i = 0; i < nr_irqs; i++) { + if (irq_domain_get_irq_data(domain, irq_base + i)) + domain->ops->free(domain, irq_base + i, 1); + } +} + +int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, + unsigned int irq_base, + unsigned int nr_irqs, void *arg) +{ + if (!domain->ops->alloc) { + pr_debug("domain->ops->alloc() is NULL\n"); + return -ENOSYS; + } + + return domain->ops->alloc(domain, irq_base, nr_irqs, arg); +} + +static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base, + unsigned int nr_irqs, int node, void *arg, + bool realloc, const struct irq_affinity_desc *affinity) +{ + int i, ret, virq; + + if (realloc && irq_base >= 0) { + virq = irq_base; + } else { + virq = irq_domain_alloc_descs(irq_base, nr_irqs, 0, node, + affinity); + if (virq < 0) { + pr_debug("cannot allocate IRQ(base %d, count %d)\n", + irq_base, nr_irqs); + return virq; + } + } + + if (irq_domain_alloc_irq_data(domain, virq, nr_irqs)) { + pr_debug("cannot allocate memory for IRQ%d\n", virq); + ret = -ENOMEM; + goto out_free_desc; + } + + ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg); + if (ret < 0) + goto out_free_irq_data; + + for (i = 0; i < nr_irqs; i++) { + ret = irq_domain_trim_hierarchy(virq + i); + if (ret) + goto out_free_irq_data; + } + + for (i = 0; i < nr_irqs; i++) + irq_domain_insert_irq(virq + i); + + return virq; + +out_free_irq_data: + irq_domain_free_irq_data(virq, nr_irqs); +out_free_desc: + irq_free_descs(virq, nr_irqs); + return ret; +} + +/** + * __irq_domain_alloc_irqs - Allocate IRQs from domain + * @domain: domain to allocate from + * @irq_base: allocate specified IRQ number if irq_base >= 0 + * @nr_irqs: number of IRQs to allocate + * @node: NUMA node id for memory allocation + * @arg: domain specific argument + * @realloc: IRQ descriptors have already been allocated if true + * @affinity: Optional irq affinity mask for multiqueue devices + * + * Allocate IRQ numbers and initialized all data structures to support + * hierarchy IRQ domains. + * Parameter @realloc is mainly to support legacy IRQs. + * Returns error code or allocated IRQ number + * + * The whole process to setup an IRQ has been split into two steps. + * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ + * descriptor and required hardware resources. The second step, + * irq_domain_activate_irq(), is to program the hardware with preallocated + * resources. In this way, it's easier to rollback when failing to + * allocate resources. + */ +int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, + unsigned int nr_irqs, int node, void *arg, + bool realloc, const struct irq_affinity_desc *affinity) +{ + int ret; + + if (domain == NULL) { + domain = irq_default_domain; + if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n")) + return -EINVAL; + } + + mutex_lock(&domain->root->mutex); + ret = irq_domain_alloc_irqs_locked(domain, irq_base, nr_irqs, node, arg, + realloc, affinity); + mutex_unlock(&domain->root->mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(__irq_domain_alloc_irqs); + +/* The irq_data was moved, fix the revmap to refer to the new location */ +static void irq_domain_fix_revmap(struct irq_data *d) +{ + void __rcu **slot; + + lockdep_assert_held(&d->domain->root->mutex); + + if (irq_domain_is_nomap(d->domain)) + return; + + /* Fix up the revmap. */ + if (d->hwirq < d->domain->revmap_size) { + /* Not using radix tree */ + rcu_assign_pointer(d->domain->revmap[d->hwirq], d); + } else { + slot = radix_tree_lookup_slot(&d->domain->revmap_tree, d->hwirq); + if (slot) + radix_tree_replace_slot(&d->domain->revmap_tree, slot, d); + } +} + +/** + * irq_domain_push_irq() - Push a domain in to the top of a hierarchy. + * @domain: Domain to push. + * @virq: Irq to push the domain in to. + * @arg: Passed to the irq_domain_ops alloc() function. + * + * For an already existing irqdomain hierarchy, as might be obtained + * via a call to pci_enable_msix(), add an additional domain to the + * head of the processing chain. Must be called before request_irq() + * has been called. + */ +int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg) +{ + struct irq_data *irq_data = irq_get_irq_data(virq); + struct irq_data *parent_irq_data; + struct irq_desc *desc; + int rv = 0; + + /* + * Check that no action has been set, which indicates the virq + * is in a state where this function doesn't have to deal with + * races between interrupt handling and maintaining the + * hierarchy. This will catch gross misuse. Attempting to + * make the check race free would require holding locks across + * calls to struct irq_domain_ops->alloc(), which could lead + * to deadlock, so we just do a simple check before starting. + */ + desc = irq_to_desc(virq); + if (!desc) + return -EINVAL; + if (WARN_ON(desc->action)) + return -EBUSY; + + if (domain == NULL) + return -EINVAL; + + if (WARN_ON(!irq_domain_is_hierarchy(domain))) + return -EINVAL; + + if (!irq_data) + return -EINVAL; + + if (domain->parent != irq_data->domain) + return -EINVAL; + + parent_irq_data = kzalloc_node(sizeof(*parent_irq_data), GFP_KERNEL, + irq_data_get_node(irq_data)); + if (!parent_irq_data) + return -ENOMEM; + + mutex_lock(&domain->root->mutex); + + /* Copy the original irq_data. */ + *parent_irq_data = *irq_data; + + /* + * Overwrite the irq_data, which is embedded in struct irq_desc, with + * values for this domain. + */ + irq_data->parent_data = parent_irq_data; + irq_data->domain = domain; + irq_data->mask = 0; + irq_data->hwirq = 0; + irq_data->chip = NULL; + irq_data->chip_data = NULL; + + /* May (probably does) set hwirq, chip, etc. */ + rv = irq_domain_alloc_irqs_hierarchy(domain, virq, 1, arg); + if (rv) { + /* Restore the original irq_data. */ + *irq_data = *parent_irq_data; + kfree(parent_irq_data); + goto error; + } + + irq_domain_fix_revmap(parent_irq_data); + irq_domain_set_mapping(domain, irq_data->hwirq, irq_data); +error: + mutex_unlock(&domain->root->mutex); + + return rv; +} +EXPORT_SYMBOL_GPL(irq_domain_push_irq); + +/** + * irq_domain_pop_irq() - Remove a domain from the top of a hierarchy. + * @domain: Domain to remove. + * @virq: Irq to remove the domain from. + * + * Undo the effects of a call to irq_domain_push_irq(). Must be + * called either before request_irq() or after free_irq(). + */ +int irq_domain_pop_irq(struct irq_domain *domain, int virq) +{ + struct irq_data *irq_data = irq_get_irq_data(virq); + struct irq_data *parent_irq_data; + struct irq_data *tmp_irq_data; + struct irq_desc *desc; + + /* + * Check that no action is set, which indicates the virq is in + * a state where this function doesn't have to deal with races + * between interrupt handling and maintaining the hierarchy. + * This will catch gross misuse. Attempting to make the check + * race free would require holding locks across calls to + * struct irq_domain_ops->free(), which could lead to + * deadlock, so we just do a simple check before starting. + */ + desc = irq_to_desc(virq); + if (!desc) + return -EINVAL; + if (WARN_ON(desc->action)) + return -EBUSY; + + if (domain == NULL) + return -EINVAL; + + if (!irq_data) + return -EINVAL; + + tmp_irq_data = irq_domain_get_irq_data(domain, virq); + + /* We can only "pop" if this domain is at the top of the list */ + if (WARN_ON(irq_data != tmp_irq_data)) + return -EINVAL; + + if (WARN_ON(irq_data->domain != domain)) + return -EINVAL; + + parent_irq_data = irq_data->parent_data; + if (WARN_ON(!parent_irq_data)) + return -EINVAL; + + mutex_lock(&domain->root->mutex); + + irq_data->parent_data = NULL; + + irq_domain_clear_mapping(domain, irq_data->hwirq); + irq_domain_free_irqs_hierarchy(domain, virq, 1); + + /* Restore the original irq_data. */ + *irq_data = *parent_irq_data; + + irq_domain_fix_revmap(irq_data); + + mutex_unlock(&domain->root->mutex); + + kfree(parent_irq_data); + + return 0; +} +EXPORT_SYMBOL_GPL(irq_domain_pop_irq); + +/** + * irq_domain_free_irqs - Free IRQ number and associated data structures + * @virq: base IRQ number + * @nr_irqs: number of IRQs to free + */ +void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs) +{ + struct irq_data *data = irq_get_irq_data(virq); + struct irq_domain *domain; + int i; + + if (WARN(!data || !data->domain || !data->domain->ops->free, + "NULL pointer, cannot free irq\n")) + return; + + domain = data->domain; + + mutex_lock(&domain->root->mutex); + for (i = 0; i < nr_irqs; i++) + irq_domain_remove_irq(virq + i); + irq_domain_free_irqs_hierarchy(domain, virq, nr_irqs); + mutex_unlock(&domain->root->mutex); + + irq_domain_free_irq_data(virq, nr_irqs); + irq_free_descs(virq, nr_irqs); +} + +/** + * irq_domain_alloc_irqs_parent - Allocate interrupts from parent domain + * @domain: Domain below which interrupts must be allocated + * @irq_base: Base IRQ number + * @nr_irqs: Number of IRQs to allocate + * @arg: Allocation data (arch/domain specific) + */ +int irq_domain_alloc_irqs_parent(struct irq_domain *domain, + unsigned int irq_base, unsigned int nr_irqs, + void *arg) +{ + if (!domain->parent) + return -ENOSYS; + + return irq_domain_alloc_irqs_hierarchy(domain->parent, irq_base, + nr_irqs, arg); +} +EXPORT_SYMBOL_GPL(irq_domain_alloc_irqs_parent); + +/** + * irq_domain_free_irqs_parent - Free interrupts from parent domain + * @domain: Domain below which interrupts must be freed + * @irq_base: Base IRQ number + * @nr_irqs: Number of IRQs to free + */ +void irq_domain_free_irqs_parent(struct irq_domain *domain, + unsigned int irq_base, unsigned int nr_irqs) +{ + if (!domain->parent) + return; + + irq_domain_free_irqs_hierarchy(domain->parent, irq_base, nr_irqs); +} +EXPORT_SYMBOL_GPL(irq_domain_free_irqs_parent); + +static void __irq_domain_deactivate_irq(struct irq_data *irq_data) +{ + if (irq_data && irq_data->domain) { + struct irq_domain *domain = irq_data->domain; + + if (domain->ops->deactivate) + domain->ops->deactivate(domain, irq_data); + if (irq_data->parent_data) + __irq_domain_deactivate_irq(irq_data->parent_data); + } +} + +static int __irq_domain_activate_irq(struct irq_data *irqd, bool reserve) +{ + int ret = 0; + + if (irqd && irqd->domain) { + struct irq_domain *domain = irqd->domain; + + if (irqd->parent_data) + ret = __irq_domain_activate_irq(irqd->parent_data, + reserve); + if (!ret && domain->ops->activate) { + ret = domain->ops->activate(domain, irqd, reserve); + /* Rollback in case of error */ + if (ret && irqd->parent_data) + __irq_domain_deactivate_irq(irqd->parent_data); + } + } + return ret; +} + +/** + * irq_domain_activate_irq - Call domain_ops->activate recursively to activate + * interrupt + * @irq_data: Outermost irq_data associated with interrupt + * @reserve: If set only reserve an interrupt vector instead of assigning one + * + * This is the second step to call domain_ops->activate to program interrupt + * controllers, so the interrupt could actually get delivered. + */ +int irq_domain_activate_irq(struct irq_data *irq_data, bool reserve) +{ + int ret = 0; + + if (!irqd_is_activated(irq_data)) + ret = __irq_domain_activate_irq(irq_data, reserve); + if (!ret) + irqd_set_activated(irq_data); + return ret; +} + +/** + * irq_domain_deactivate_irq - Call domain_ops->deactivate recursively to + * deactivate interrupt + * @irq_data: outermost irq_data associated with interrupt + * + * It calls domain_ops->deactivate to program interrupt controllers to disable + * interrupt delivery. + */ +void irq_domain_deactivate_irq(struct irq_data *irq_data) +{ + if (irqd_is_activated(irq_data)) { + __irq_domain_deactivate_irq(irq_data); + irqd_clr_activated(irq_data); + } +} + +static void irq_domain_check_hierarchy(struct irq_domain *domain) +{ + /* Hierarchy irq_domains must implement callback alloc() */ + if (domain->ops->alloc) + domain->flags |= IRQ_DOMAIN_FLAG_HIERARCHY; +} +#else /* CONFIG_IRQ_DOMAIN_HIERARCHY */ +/** + * irq_domain_get_irq_data - Get irq_data associated with @virq and @domain + * @domain: domain to match + * @virq: IRQ number to get irq_data + */ +struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, + unsigned int virq) +{ + struct irq_data *irq_data = irq_get_irq_data(virq); + + return (irq_data && irq_data->domain == domain) ? irq_data : NULL; +} +EXPORT_SYMBOL_GPL(irq_domain_get_irq_data); + +/** + * irq_domain_set_info - Set the complete data for a @virq in @domain + * @domain: Interrupt domain to match + * @virq: IRQ number + * @hwirq: The hardware interrupt number + * @chip: The associated interrupt chip + * @chip_data: The associated interrupt chip data + * @handler: The interrupt flow handler + * @handler_data: The interrupt flow handler data + * @handler_name: The interrupt handler name + */ +void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, + irq_hw_number_t hwirq, const struct irq_chip *chip, + void *chip_data, irq_flow_handler_t handler, + void *handler_data, const char *handler_name) +{ + irq_set_chip_and_handler_name(virq, chip, handler, handler_name); + irq_set_chip_data(virq, chip_data); + irq_set_handler_data(virq, handler_data); +} + +static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base, + unsigned int nr_irqs, int node, void *arg, + bool realloc, const struct irq_affinity_desc *affinity) +{ + return -EINVAL; +} + +static void irq_domain_check_hierarchy(struct irq_domain *domain) +{ +} +#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ + +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS +#include "internals.h" + +static struct dentry *domain_dir; + +static void +irq_domain_debug_show_one(struct seq_file *m, struct irq_domain *d, int ind) +{ + seq_printf(m, "%*sname: %s\n", ind, "", d->name); + seq_printf(m, "%*ssize: %u\n", ind + 1, "", d->revmap_size); + seq_printf(m, "%*smapped: %u\n", ind + 1, "", d->mapcount); + seq_printf(m, "%*sflags: 0x%08x\n", ind +1 , "", d->flags); + if (d->ops && d->ops->debug_show) + d->ops->debug_show(m, d, NULL, ind + 1); +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + if (!d->parent) + return; + seq_printf(m, "%*sparent: %s\n", ind + 1, "", d->parent->name); + irq_domain_debug_show_one(m, d->parent, ind + 4); +#endif +} + +static int irq_domain_debug_show(struct seq_file *m, void *p) +{ + struct irq_domain *d = m->private; + + /* Default domain? Might be NULL */ + if (!d) { + if (!irq_default_domain) + return 0; + d = irq_default_domain; + } + irq_domain_debug_show_one(m, d, 0); + return 0; +} +DEFINE_SHOW_ATTRIBUTE(irq_domain_debug); + +static void debugfs_add_domain_dir(struct irq_domain *d) +{ + if (!d->name || !domain_dir) + return; + debugfs_create_file(d->name, 0444, domain_dir, d, + &irq_domain_debug_fops); +} + +static void debugfs_remove_domain_dir(struct irq_domain *d) +{ + debugfs_lookup_and_remove(d->name, domain_dir); +} + +void __init irq_domain_debugfs_init(struct dentry *root) +{ + struct irq_domain *d; + + domain_dir = debugfs_create_dir("domains", root); + + debugfs_create_file("default", 0444, domain_dir, NULL, + &irq_domain_debug_fops); + mutex_lock(&irq_domain_mutex); + list_for_each_entry(d, &irq_domain_list, link) + debugfs_add_domain_dir(d); + mutex_unlock(&irq_domain_mutex); +} +#endif diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c new file mode 100644 index 0000000000..d309ba84e0 --- /dev/null +++ b/kernel/irq/manage.c @@ -0,0 +1,2942 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar + * Copyright (C) 2005-2006 Thomas Gleixner + * + * This file contains driver APIs to the irq subsystem. + */ + +#define pr_fmt(fmt) "genirq: " fmt + +#include <linux/irq.h> +#include <linux/kthread.h> +#include <linux/module.h> +#include <linux/random.h> +#include <linux/interrupt.h> +#include <linux/irqdomain.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/sched/rt.h> +#include <linux/sched/task.h> +#include <linux/sched/isolation.h> +#include <uapi/linux/sched/types.h> +#include <linux/task_work.h> + +#include "internals.h" + +#if defined(CONFIG_IRQ_FORCED_THREADING) && !defined(CONFIG_PREEMPT_RT) +DEFINE_STATIC_KEY_FALSE(force_irqthreads_key); + +static int __init setup_forced_irqthreads(char *arg) +{ + static_branch_enable(&force_irqthreads_key); + return 0; +} +early_param("threadirqs", setup_forced_irqthreads); +#endif + +static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip) +{ + struct irq_data *irqd = irq_desc_get_irq_data(desc); + bool inprogress; + + do { + unsigned long flags; + + /* + * Wait until we're out of the critical section. This might + * give the wrong answer due to the lack of memory barriers. + */ + while (irqd_irq_inprogress(&desc->irq_data)) + cpu_relax(); + + /* Ok, that indicated we're done: double-check carefully. */ + raw_spin_lock_irqsave(&desc->lock, flags); + inprogress = irqd_irq_inprogress(&desc->irq_data); + + /* + * If requested and supported, check at the chip whether it + * is in flight at the hardware level, i.e. already pending + * in a CPU and waiting for service and acknowledge. + */ + if (!inprogress && sync_chip) { + /* + * Ignore the return code. inprogress is only updated + * when the chip supports it. + */ + __irq_get_irqchip_state(irqd, IRQCHIP_STATE_ACTIVE, + &inprogress); + } + raw_spin_unlock_irqrestore(&desc->lock, flags); + + /* Oops, that failed? */ + } while (inprogress); +} + +/** + * synchronize_hardirq - wait for pending hard IRQ handlers (on other CPUs) + * @irq: interrupt number to wait for + * + * This function waits for any pending hard IRQ handlers for this + * interrupt to complete before returning. If you use this + * function while holding a resource the IRQ handler may need you + * will deadlock. It does not take associated threaded handlers + * into account. + * + * Do not use this for shutdown scenarios where you must be sure + * that all parts (hardirq and threaded handler) have completed. + * + * Returns: false if a threaded handler is active. + * + * This function may be called - with care - from IRQ context. + * + * It does not check whether there is an interrupt in flight at the + * hardware level, but not serviced yet, as this might deadlock when + * called with interrupts disabled and the target CPU of the interrupt + * is the current CPU. + */ +bool synchronize_hardirq(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (desc) { + __synchronize_hardirq(desc, false); + return !atomic_read(&desc->threads_active); + } + + return true; +} +EXPORT_SYMBOL(synchronize_hardirq); + +static void __synchronize_irq(struct irq_desc *desc) +{ + __synchronize_hardirq(desc, true); + /* + * We made sure that no hardirq handler is running. Now verify that no + * threaded handlers are active. + */ + wait_event(desc->wait_for_threads, !atomic_read(&desc->threads_active)); +} + +/** + * synchronize_irq - wait for pending IRQ handlers (on other CPUs) + * @irq: interrupt number to wait for + * + * This function waits for any pending IRQ handlers for this interrupt + * to complete before returning. If you use this function while + * holding a resource the IRQ handler may need you will deadlock. + * + * Can only be called from preemptible code as it might sleep when + * an interrupt thread is associated to @irq. + * + * It optionally makes sure (when the irq chip supports that method) + * that the interrupt is not pending in any CPU and waiting for + * service. + */ +void synchronize_irq(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (desc) + __synchronize_irq(desc); +} +EXPORT_SYMBOL(synchronize_irq); + +#ifdef CONFIG_SMP +cpumask_var_t irq_default_affinity; + +static bool __irq_can_set_affinity(struct irq_desc *desc) +{ + if (!desc || !irqd_can_balance(&desc->irq_data) || + !desc->irq_data.chip || !desc->irq_data.chip->irq_set_affinity) + return false; + return true; +} + +/** + * irq_can_set_affinity - Check if the affinity of a given irq can be set + * @irq: Interrupt to check + * + */ +int irq_can_set_affinity(unsigned int irq) +{ + return __irq_can_set_affinity(irq_to_desc(irq)); +} + +/** + * irq_can_set_affinity_usr - Check if affinity of a irq can be set from user space + * @irq: Interrupt to check + * + * Like irq_can_set_affinity() above, but additionally checks for the + * AFFINITY_MANAGED flag. + */ +bool irq_can_set_affinity_usr(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + return __irq_can_set_affinity(desc) && + !irqd_affinity_is_managed(&desc->irq_data); +} + +/** + * irq_set_thread_affinity - Notify irq threads to adjust affinity + * @desc: irq descriptor which has affinity changed + * + * We just set IRQTF_AFFINITY and delegate the affinity setting + * to the interrupt thread itself. We can not call + * set_cpus_allowed_ptr() here as we hold desc->lock and this + * code can be called from hard interrupt context. + */ +void irq_set_thread_affinity(struct irq_desc *desc) +{ + struct irqaction *action; + + for_each_action_of_desc(desc, action) { + if (action->thread) + set_bit(IRQTF_AFFINITY, &action->thread_flags); + if (action->secondary && action->secondary->thread) + set_bit(IRQTF_AFFINITY, &action->secondary->thread_flags); + } +} + +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK +static void irq_validate_effective_affinity(struct irq_data *data) +{ + const struct cpumask *m = irq_data_get_effective_affinity_mask(data); + struct irq_chip *chip = irq_data_get_irq_chip(data); + + if (!cpumask_empty(m)) + return; + pr_warn_once("irq_chip %s did not update eff. affinity mask of irq %u\n", + chip->name, data->irq); +} +#else +static inline void irq_validate_effective_affinity(struct irq_data *data) { } +#endif + +int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, + bool force) +{ + struct irq_desc *desc = irq_data_to_desc(data); + struct irq_chip *chip = irq_data_get_irq_chip(data); + const struct cpumask *prog_mask; + int ret; + + static DEFINE_RAW_SPINLOCK(tmp_mask_lock); + static struct cpumask tmp_mask; + + if (!chip || !chip->irq_set_affinity) + return -EINVAL; + + raw_spin_lock(&tmp_mask_lock); + /* + * If this is a managed interrupt and housekeeping is enabled on + * it check whether the requested affinity mask intersects with + * a housekeeping CPU. If so, then remove the isolated CPUs from + * the mask and just keep the housekeeping CPU(s). This prevents + * the affinity setter from routing the interrupt to an isolated + * CPU to avoid that I/O submitted from a housekeeping CPU causes + * interrupts on an isolated one. + * + * If the masks do not intersect or include online CPU(s) then + * keep the requested mask. The isolated target CPUs are only + * receiving interrupts when the I/O operation was submitted + * directly from them. + * + * If all housekeeping CPUs in the affinity mask are offline, the + * interrupt will be migrated by the CPU hotplug code once a + * housekeeping CPU which belongs to the affinity mask comes + * online. + */ + if (irqd_affinity_is_managed(data) && + housekeeping_enabled(HK_TYPE_MANAGED_IRQ)) { + const struct cpumask *hk_mask; + + hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ); + + cpumask_and(&tmp_mask, mask, hk_mask); + if (!cpumask_intersects(&tmp_mask, cpu_online_mask)) + prog_mask = mask; + else + prog_mask = &tmp_mask; + } else { + prog_mask = mask; + } + + /* + * Make sure we only provide online CPUs to the irqchip, + * unless we are being asked to force the affinity (in which + * case we do as we are told). + */ + cpumask_and(&tmp_mask, prog_mask, cpu_online_mask); + if (!force && !cpumask_empty(&tmp_mask)) + ret = chip->irq_set_affinity(data, &tmp_mask, force); + else if (force) + ret = chip->irq_set_affinity(data, mask, force); + else + ret = -EINVAL; + + raw_spin_unlock(&tmp_mask_lock); + + switch (ret) { + case IRQ_SET_MASK_OK: + case IRQ_SET_MASK_OK_DONE: + cpumask_copy(desc->irq_common_data.affinity, mask); + fallthrough; + case IRQ_SET_MASK_OK_NOCOPY: + irq_validate_effective_affinity(data); + irq_set_thread_affinity(desc); + ret = 0; + } + + return ret; +} + +#ifdef CONFIG_GENERIC_PENDING_IRQ +static inline int irq_set_affinity_pending(struct irq_data *data, + const struct cpumask *dest) +{ + struct irq_desc *desc = irq_data_to_desc(data); + + irqd_set_move_pending(data); + irq_copy_pending(desc, dest); + return 0; +} +#else +static inline int irq_set_affinity_pending(struct irq_data *data, + const struct cpumask *dest) +{ + return -EBUSY; +} +#endif + +static int irq_try_set_affinity(struct irq_data *data, + const struct cpumask *dest, bool force) +{ + int ret = irq_do_set_affinity(data, dest, force); + + /* + * In case that the underlying vector management is busy and the + * architecture supports the generic pending mechanism then utilize + * this to avoid returning an error to user space. + */ + if (ret == -EBUSY && !force) + ret = irq_set_affinity_pending(data, dest); + return ret; +} + +static bool irq_set_affinity_deactivated(struct irq_data *data, + const struct cpumask *mask) +{ + struct irq_desc *desc = irq_data_to_desc(data); + + /* + * Handle irq chips which can handle affinity only in activated + * state correctly + * + * If the interrupt is not yet activated, just store the affinity + * mask and do not call the chip driver at all. On activation the + * driver has to make sure anyway that the interrupt is in a + * usable state so startup works. + */ + if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) || + irqd_is_activated(data) || !irqd_affinity_on_activate(data)) + return false; + + cpumask_copy(desc->irq_common_data.affinity, mask); + irq_data_update_effective_affinity(data, mask); + irqd_set(data, IRQD_AFFINITY_SET); + return true; +} + +int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, + bool force) +{ + struct irq_chip *chip = irq_data_get_irq_chip(data); + struct irq_desc *desc = irq_data_to_desc(data); + int ret = 0; + + if (!chip || !chip->irq_set_affinity) + return -EINVAL; + + if (irq_set_affinity_deactivated(data, mask)) + return 0; + + if (irq_can_move_pcntxt(data) && !irqd_is_setaffinity_pending(data)) { + ret = irq_try_set_affinity(data, mask, force); + } else { + irqd_set_move_pending(data); + irq_copy_pending(desc, mask); + } + + if (desc->affinity_notify) { + kref_get(&desc->affinity_notify->kref); + if (!schedule_work(&desc->affinity_notify->work)) { + /* Work was already scheduled, drop our extra ref */ + kref_put(&desc->affinity_notify->kref, + desc->affinity_notify->release); + } + } + irqd_set(data, IRQD_AFFINITY_SET); + + return ret; +} + +/** + * irq_update_affinity_desc - Update affinity management for an interrupt + * @irq: The interrupt number to update + * @affinity: Pointer to the affinity descriptor + * + * This interface can be used to configure the affinity management of + * interrupts which have been allocated already. + * + * There are certain limitations on when it may be used - attempts to use it + * for when the kernel is configured for generic IRQ reservation mode (in + * config GENERIC_IRQ_RESERVATION_MODE) will fail, as it may conflict with + * managed/non-managed interrupt accounting. In addition, attempts to use it on + * an interrupt which is already started or which has already been configured + * as managed will also fail, as these mean invalid init state or double init. + */ +int irq_update_affinity_desc(unsigned int irq, + struct irq_affinity_desc *affinity) +{ + struct irq_desc *desc; + unsigned long flags; + bool activated; + int ret = 0; + + /* + * Supporting this with the reservation scheme used by x86 needs + * some more thought. Fail it for now. + */ + if (IS_ENABLED(CONFIG_GENERIC_IRQ_RESERVATION_MODE)) + return -EOPNOTSUPP; + + desc = irq_get_desc_buslock(irq, &flags, 0); + if (!desc) + return -EINVAL; + + /* Requires the interrupt to be shut down */ + if (irqd_is_started(&desc->irq_data)) { + ret = -EBUSY; + goto out_unlock; + } + + /* Interrupts which are already managed cannot be modified */ + if (irqd_affinity_is_managed(&desc->irq_data)) { + ret = -EBUSY; + goto out_unlock; + } + + /* + * Deactivate the interrupt. That's required to undo + * anything an earlier activation has established. + */ + activated = irqd_is_activated(&desc->irq_data); + if (activated) + irq_domain_deactivate_irq(&desc->irq_data); + + if (affinity->is_managed) { + irqd_set(&desc->irq_data, IRQD_AFFINITY_MANAGED); + irqd_set(&desc->irq_data, IRQD_MANAGED_SHUTDOWN); + } + + cpumask_copy(desc->irq_common_data.affinity, &affinity->mask); + + /* Restore the activation state */ + if (activated) + irq_domain_activate_irq(&desc->irq_data, false); + +out_unlock: + irq_put_desc_busunlock(desc, flags); + return ret; +} + +static int __irq_set_affinity(unsigned int irq, const struct cpumask *mask, + bool force) +{ + struct irq_desc *desc = irq_to_desc(irq); + unsigned long flags; + int ret; + + if (!desc) + return -EINVAL; + + raw_spin_lock_irqsave(&desc->lock, flags); + ret = irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask, force); + raw_spin_unlock_irqrestore(&desc->lock, flags); + return ret; +} + +/** + * irq_set_affinity - Set the irq affinity of a given irq + * @irq: Interrupt to set affinity + * @cpumask: cpumask + * + * Fails if cpumask does not contain an online CPU + */ +int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) +{ + return __irq_set_affinity(irq, cpumask, false); +} +EXPORT_SYMBOL_GPL(irq_set_affinity); + +/** + * irq_force_affinity - Force the irq affinity of a given irq + * @irq: Interrupt to set affinity + * @cpumask: cpumask + * + * Same as irq_set_affinity, but without checking the mask against + * online cpus. + * + * Solely for low level cpu hotplug code, where we need to make per + * cpu interrupts affine before the cpu becomes online. + */ +int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask) +{ + return __irq_set_affinity(irq, cpumask, true); +} +EXPORT_SYMBOL_GPL(irq_force_affinity); + +int __irq_apply_affinity_hint(unsigned int irq, const struct cpumask *m, + bool setaffinity) +{ + unsigned long flags; + struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); + + if (!desc) + return -EINVAL; + desc->affinity_hint = m; + irq_put_desc_unlock(desc, flags); + if (m && setaffinity) + __irq_set_affinity(irq, m, false); + return 0; +} +EXPORT_SYMBOL_GPL(__irq_apply_affinity_hint); + +static void irq_affinity_notify(struct work_struct *work) +{ + struct irq_affinity_notify *notify = + container_of(work, struct irq_affinity_notify, work); + struct irq_desc *desc = irq_to_desc(notify->irq); + cpumask_var_t cpumask; + unsigned long flags; + + if (!desc || !alloc_cpumask_var(&cpumask, GFP_KERNEL)) + goto out; + + raw_spin_lock_irqsave(&desc->lock, flags); + if (irq_move_pending(&desc->irq_data)) + irq_get_pending(cpumask, desc); + else + cpumask_copy(cpumask, desc->irq_common_data.affinity); + raw_spin_unlock_irqrestore(&desc->lock, flags); + + notify->notify(notify, cpumask); + + free_cpumask_var(cpumask); +out: + kref_put(¬ify->kref, notify->release); +} + +/** + * irq_set_affinity_notifier - control notification of IRQ affinity changes + * @irq: Interrupt for which to enable/disable notification + * @notify: Context for notification, or %NULL to disable + * notification. Function pointers must be initialised; + * the other fields will be initialised by this function. + * + * Must be called in process context. Notification may only be enabled + * after the IRQ is allocated and must be disabled before the IRQ is + * freed using free_irq(). + */ +int +irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) +{ + struct irq_desc *desc = irq_to_desc(irq); + struct irq_affinity_notify *old_notify; + unsigned long flags; + + /* The release function is promised process context */ + might_sleep(); + + if (!desc || desc->istate & IRQS_NMI) + return -EINVAL; + + /* Complete initialisation of *notify */ + if (notify) { + notify->irq = irq; + kref_init(¬ify->kref); + INIT_WORK(¬ify->work, irq_affinity_notify); + } + + raw_spin_lock_irqsave(&desc->lock, flags); + old_notify = desc->affinity_notify; + desc->affinity_notify = notify; + raw_spin_unlock_irqrestore(&desc->lock, flags); + + if (old_notify) { + if (cancel_work_sync(&old_notify->work)) { + /* Pending work had a ref, put that one too */ + kref_put(&old_notify->kref, old_notify->release); + } + kref_put(&old_notify->kref, old_notify->release); + } + + return 0; +} +EXPORT_SYMBOL_GPL(irq_set_affinity_notifier); + +#ifndef CONFIG_AUTO_IRQ_AFFINITY +/* + * Generic version of the affinity autoselector. + */ +int irq_setup_affinity(struct irq_desc *desc) +{ + struct cpumask *set = irq_default_affinity; + int ret, node = irq_desc_get_node(desc); + static DEFINE_RAW_SPINLOCK(mask_lock); + static struct cpumask mask; + + /* Excludes PER_CPU and NO_BALANCE interrupts */ + if (!__irq_can_set_affinity(desc)) + return 0; + + raw_spin_lock(&mask_lock); + /* + * Preserve the managed affinity setting and a userspace affinity + * setup, but make sure that one of the targets is online. + */ + if (irqd_affinity_is_managed(&desc->irq_data) || + irqd_has_set(&desc->irq_data, IRQD_AFFINITY_SET)) { + if (cpumask_intersects(desc->irq_common_data.affinity, + cpu_online_mask)) + set = desc->irq_common_data.affinity; + else + irqd_clear(&desc->irq_data, IRQD_AFFINITY_SET); + } + + cpumask_and(&mask, cpu_online_mask, set); + if (cpumask_empty(&mask)) + cpumask_copy(&mask, cpu_online_mask); + + if (node != NUMA_NO_NODE) { + const struct cpumask *nodemask = cpumask_of_node(node); + + /* make sure at least one of the cpus in nodemask is online */ + if (cpumask_intersects(&mask, nodemask)) + cpumask_and(&mask, &mask, nodemask); + } + ret = irq_do_set_affinity(&desc->irq_data, &mask, false); + raw_spin_unlock(&mask_lock); + return ret; +} +#else +/* Wrapper for ALPHA specific affinity selector magic */ +int irq_setup_affinity(struct irq_desc *desc) +{ + return irq_select_affinity(irq_desc_get_irq(desc)); +} +#endif /* CONFIG_AUTO_IRQ_AFFINITY */ +#endif /* CONFIG_SMP */ + + +/** + * irq_set_vcpu_affinity - Set vcpu affinity for the interrupt + * @irq: interrupt number to set affinity + * @vcpu_info: vCPU specific data or pointer to a percpu array of vCPU + * specific data for percpu_devid interrupts + * + * This function uses the vCPU specific data to set the vCPU + * affinity for an irq. The vCPU specific data is passed from + * outside, such as KVM. One example code path is as below: + * KVM -> IOMMU -> irq_set_vcpu_affinity(). + */ +int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info) +{ + unsigned long flags; + struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); + struct irq_data *data; + struct irq_chip *chip; + int ret = -ENOSYS; + + if (!desc) + return -EINVAL; + + data = irq_desc_get_irq_data(desc); + do { + chip = irq_data_get_irq_chip(data); + if (chip && chip->irq_set_vcpu_affinity) + break; +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + data = data->parent_data; +#else + data = NULL; +#endif + } while (data); + + if (data) + ret = chip->irq_set_vcpu_affinity(data, vcpu_info); + irq_put_desc_unlock(desc, flags); + + return ret; +} +EXPORT_SYMBOL_GPL(irq_set_vcpu_affinity); + +void __disable_irq(struct irq_desc *desc) +{ + if (!desc->depth++) + irq_disable(desc); +} + +static int __disable_irq_nosync(unsigned int irq) +{ + unsigned long flags; + struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); + + if (!desc) + return -EINVAL; + __disable_irq(desc); + irq_put_desc_busunlock(desc, flags); + return 0; +} + +/** + * disable_irq_nosync - disable an irq without waiting + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Disables and Enables are + * nested. + * Unlike disable_irq(), this function does not ensure existing + * instances of the IRQ handler have completed before returning. + * + * This function may be called from IRQ context. + */ +void disable_irq_nosync(unsigned int irq) +{ + __disable_irq_nosync(irq); +} +EXPORT_SYMBOL(disable_irq_nosync); + +/** + * disable_irq - disable an irq and wait for completion + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Enables and Disables are + * nested. + * This function waits for any pending IRQ handlers for this interrupt + * to complete before returning. If you use this function while + * holding a resource the IRQ handler may need you will deadlock. + * + * Can only be called from preemptible code as it might sleep when + * an interrupt thread is associated to @irq. + * + */ +void disable_irq(unsigned int irq) +{ + might_sleep(); + if (!__disable_irq_nosync(irq)) + synchronize_irq(irq); +} +EXPORT_SYMBOL(disable_irq); + +/** + * disable_hardirq - disables an irq and waits for hardirq completion + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Enables and Disables are + * nested. + * This function waits for any pending hard IRQ handlers for this + * interrupt to complete before returning. If you use this function while + * holding a resource the hard IRQ handler may need you will deadlock. + * + * When used to optimistically disable an interrupt from atomic context + * the return value must be checked. + * + * Returns: false if a threaded handler is active. + * + * This function may be called - with care - from IRQ context. + */ +bool disable_hardirq(unsigned int irq) +{ + if (!__disable_irq_nosync(irq)) + return synchronize_hardirq(irq); + + return false; +} +EXPORT_SYMBOL_GPL(disable_hardirq); + +/** + * disable_nmi_nosync - disable an nmi without waiting + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Disables and enables are + * nested. + * The interrupt to disable must have been requested through request_nmi. + * Unlike disable_nmi(), this function does not ensure existing + * instances of the IRQ handler have completed before returning. + */ +void disable_nmi_nosync(unsigned int irq) +{ + disable_irq_nosync(irq); +} + +void __enable_irq(struct irq_desc *desc) +{ + switch (desc->depth) { + case 0: + err_out: + WARN(1, KERN_WARNING "Unbalanced enable for IRQ %d\n", + irq_desc_get_irq(desc)); + break; + case 1: { + if (desc->istate & IRQS_SUSPENDED) + goto err_out; + /* Prevent probing on this irq: */ + irq_settings_set_noprobe(desc); + /* + * Call irq_startup() not irq_enable() here because the + * interrupt might be marked NOAUTOEN. So irq_startup() + * needs to be invoked when it gets enabled the first + * time. If it was already started up, then irq_startup() + * will invoke irq_enable() under the hood. + */ + irq_startup(desc, IRQ_RESEND, IRQ_START_FORCE); + break; + } + default: + desc->depth--; + } +} + +/** + * enable_irq - enable handling of an irq + * @irq: Interrupt to enable + * + * Undoes the effect of one call to disable_irq(). If this + * matches the last disable, processing of interrupts on this + * IRQ line is re-enabled. + * + * This function may be called from IRQ context only when + * desc->irq_data.chip->bus_lock and desc->chip->bus_sync_unlock are NULL ! + */ +void enable_irq(unsigned int irq) +{ + unsigned long flags; + struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); + + if (!desc) + return; + if (WARN(!desc->irq_data.chip, + KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq)) + goto out; + + __enable_irq(desc); +out: + irq_put_desc_busunlock(desc, flags); +} +EXPORT_SYMBOL(enable_irq); + +/** + * enable_nmi - enable handling of an nmi + * @irq: Interrupt to enable + * + * The interrupt to enable must have been requested through request_nmi. + * Undoes the effect of one call to disable_nmi(). If this + * matches the last disable, processing of interrupts on this + * IRQ line is re-enabled. + */ +void enable_nmi(unsigned int irq) +{ + enable_irq(irq); +} + +static int set_irq_wake_real(unsigned int irq, unsigned int on) +{ + struct irq_desc *desc = irq_to_desc(irq); + int ret = -ENXIO; + + if (irq_desc_get_chip(desc)->flags & IRQCHIP_SKIP_SET_WAKE) + return 0; + + if (desc->irq_data.chip->irq_set_wake) + ret = desc->irq_data.chip->irq_set_wake(&desc->irq_data, on); + + return ret; +} + +/** + * irq_set_irq_wake - control irq power management wakeup + * @irq: interrupt to control + * @on: enable/disable power management wakeup + * + * Enable/disable power management wakeup mode, which is + * disabled by default. Enables and disables must match, + * just as they match for non-wakeup mode support. + * + * Wakeup mode lets this IRQ wake the system from sleep + * states like "suspend to RAM". + * + * Note: irq enable/disable state is completely orthogonal + * to the enable/disable state of irq wake. An irq can be + * disabled with disable_irq() and still wake the system as + * long as the irq has wake enabled. If this does not hold, + * then the underlying irq chip and the related driver need + * to be investigated. + */ +int irq_set_irq_wake(unsigned int irq, unsigned int on) +{ + unsigned long flags; + struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); + int ret = 0; + + if (!desc) + return -EINVAL; + + /* Don't use NMIs as wake up interrupts please */ + if (desc->istate & IRQS_NMI) { + ret = -EINVAL; + goto out_unlock; + } + + /* wakeup-capable irqs can be shared between drivers that + * don't need to have the same sleep mode behaviors. + */ + if (on) { + if (desc->wake_depth++ == 0) { + ret = set_irq_wake_real(irq, on); + if (ret) + desc->wake_depth = 0; + else + irqd_set(&desc->irq_data, IRQD_WAKEUP_STATE); + } + } else { + if (desc->wake_depth == 0) { + WARN(1, "Unbalanced IRQ %d wake disable\n", irq); + } else if (--desc->wake_depth == 0) { + ret = set_irq_wake_real(irq, on); + if (ret) + desc->wake_depth = 1; + else + irqd_clear(&desc->irq_data, IRQD_WAKEUP_STATE); + } + } + +out_unlock: + irq_put_desc_busunlock(desc, flags); + return ret; +} +EXPORT_SYMBOL(irq_set_irq_wake); + +/* + * Internal function that tells the architecture code whether a + * particular irq has been exclusively allocated or is available + * for driver use. + */ +int can_request_irq(unsigned int irq, unsigned long irqflags) +{ + unsigned long flags; + struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); + int canrequest = 0; + + if (!desc) + return 0; + + if (irq_settings_can_request(desc)) { + if (!desc->action || + irqflags & desc->action->flags & IRQF_SHARED) + canrequest = 1; + } + irq_put_desc_unlock(desc, flags); + return canrequest; +} + +int __irq_set_trigger(struct irq_desc *desc, unsigned long flags) +{ + struct irq_chip *chip = desc->irq_data.chip; + int ret, unmask = 0; + + if (!chip || !chip->irq_set_type) { + /* + * IRQF_TRIGGER_* but the PIC does not support multiple + * flow-types? + */ + pr_debug("No set_type function for IRQ %d (%s)\n", + irq_desc_get_irq(desc), + chip ? (chip->name ? : "unknown") : "unknown"); + return 0; + } + + if (chip->flags & IRQCHIP_SET_TYPE_MASKED) { + if (!irqd_irq_masked(&desc->irq_data)) + mask_irq(desc); + if (!irqd_irq_disabled(&desc->irq_data)) + unmask = 1; + } + + /* Mask all flags except trigger mode */ + flags &= IRQ_TYPE_SENSE_MASK; + ret = chip->irq_set_type(&desc->irq_data, flags); + + switch (ret) { + case IRQ_SET_MASK_OK: + case IRQ_SET_MASK_OK_DONE: + irqd_clear(&desc->irq_data, IRQD_TRIGGER_MASK); + irqd_set(&desc->irq_data, flags); + fallthrough; + + case IRQ_SET_MASK_OK_NOCOPY: + flags = irqd_get_trigger_type(&desc->irq_data); + irq_settings_set_trigger_mask(desc, flags); + irqd_clear(&desc->irq_data, IRQD_LEVEL); + irq_settings_clr_level(desc); + if (flags & IRQ_TYPE_LEVEL_MASK) { + irq_settings_set_level(desc); + irqd_set(&desc->irq_data, IRQD_LEVEL); + } + + ret = 0; + break; + default: + pr_err("Setting trigger mode %lu for irq %u failed (%pS)\n", + flags, irq_desc_get_irq(desc), chip->irq_set_type); + } + if (unmask) + unmask_irq(desc); + return ret; +} + +#ifdef CONFIG_HARDIRQS_SW_RESEND +int irq_set_parent(int irq, int parent_irq) +{ + unsigned long flags; + struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); + + if (!desc) + return -EINVAL; + + desc->parent_irq = parent_irq; + + irq_put_desc_unlock(desc, flags); + return 0; +} +EXPORT_SYMBOL_GPL(irq_set_parent); +#endif + +/* + * Default primary interrupt handler for threaded interrupts. Is + * assigned as primary handler when request_threaded_irq is called + * with handler == NULL. Useful for oneshot interrupts. + */ +static irqreturn_t irq_default_primary_handler(int irq, void *dev_id) +{ + return IRQ_WAKE_THREAD; +} + +/* + * Primary handler for nested threaded interrupts. Should never be + * called. + */ +static irqreturn_t irq_nested_primary_handler(int irq, void *dev_id) +{ + WARN(1, "Primary handler called for nested irq %d\n", irq); + return IRQ_NONE; +} + +static irqreturn_t irq_forced_secondary_handler(int irq, void *dev_id) +{ + WARN(1, "Secondary action handler called for irq %d\n", irq); + return IRQ_NONE; +} + +static int irq_wait_for_interrupt(struct irqaction *action) +{ + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + + if (kthread_should_stop()) { + /* may need to run one last time */ + if (test_and_clear_bit(IRQTF_RUNTHREAD, + &action->thread_flags)) { + __set_current_state(TASK_RUNNING); + return 0; + } + __set_current_state(TASK_RUNNING); + return -1; + } + + if (test_and_clear_bit(IRQTF_RUNTHREAD, + &action->thread_flags)) { + __set_current_state(TASK_RUNNING); + return 0; + } + schedule(); + } +} + +/* + * Oneshot interrupts keep the irq line masked until the threaded + * handler finished. unmask if the interrupt has not been disabled and + * is marked MASKED. + */ +static void irq_finalize_oneshot(struct irq_desc *desc, + struct irqaction *action) +{ + if (!(desc->istate & IRQS_ONESHOT) || + action->handler == irq_forced_secondary_handler) + return; +again: + chip_bus_lock(desc); + raw_spin_lock_irq(&desc->lock); + + /* + * Implausible though it may be we need to protect us against + * the following scenario: + * + * The thread is faster done than the hard interrupt handler + * on the other CPU. If we unmask the irq line then the + * interrupt can come in again and masks the line, leaves due + * to IRQS_INPROGRESS and the irq line is masked forever. + * + * This also serializes the state of shared oneshot handlers + * versus "desc->threads_oneshot |= action->thread_mask;" in + * irq_wake_thread(). See the comment there which explains the + * serialization. + */ + if (unlikely(irqd_irq_inprogress(&desc->irq_data))) { + raw_spin_unlock_irq(&desc->lock); + chip_bus_sync_unlock(desc); + cpu_relax(); + goto again; + } + + /* + * Now check again, whether the thread should run. Otherwise + * we would clear the threads_oneshot bit of this thread which + * was just set. + */ + if (test_bit(IRQTF_RUNTHREAD, &action->thread_flags)) + goto out_unlock; + + desc->threads_oneshot &= ~action->thread_mask; + + if (!desc->threads_oneshot && !irqd_irq_disabled(&desc->irq_data) && + irqd_irq_masked(&desc->irq_data)) + unmask_threaded_irq(desc); + +out_unlock: + raw_spin_unlock_irq(&desc->lock); + chip_bus_sync_unlock(desc); +} + +#ifdef CONFIG_SMP +/* + * Check whether we need to change the affinity of the interrupt thread. + */ +static void +irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) +{ + cpumask_var_t mask; + bool valid = true; + + if (!test_and_clear_bit(IRQTF_AFFINITY, &action->thread_flags)) + return; + + /* + * In case we are out of memory we set IRQTF_AFFINITY again and + * try again next time + */ + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) { + set_bit(IRQTF_AFFINITY, &action->thread_flags); + return; + } + + raw_spin_lock_irq(&desc->lock); + /* + * This code is triggered unconditionally. Check the affinity + * mask pointer. For CPU_MASK_OFFSTACK=n this is optimized out. + */ + if (cpumask_available(desc->irq_common_data.affinity)) { + const struct cpumask *m; + + m = irq_data_get_effective_affinity_mask(&desc->irq_data); + cpumask_copy(mask, m); + } else { + valid = false; + } + raw_spin_unlock_irq(&desc->lock); + + if (valid) + set_cpus_allowed_ptr(current, mask); + free_cpumask_var(mask); +} +#else +static inline void +irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { } +#endif + +/* + * Interrupts which are not explicitly requested as threaded + * interrupts rely on the implicit bh/preempt disable of the hard irq + * context. So we need to disable bh here to avoid deadlocks and other + * side effects. + */ +static irqreturn_t +irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) +{ + irqreturn_t ret; + + local_bh_disable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_disable(); + ret = action->thread_fn(action->irq, action->dev_id); + if (ret == IRQ_HANDLED) + atomic_inc(&desc->threads_handled); + + irq_finalize_oneshot(desc, action); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_enable(); + local_bh_enable(); + return ret; +} + +/* + * Interrupts explicitly requested as threaded interrupts want to be + * preemptible - many of them need to sleep and wait for slow busses to + * complete. + */ +static irqreturn_t irq_thread_fn(struct irq_desc *desc, + struct irqaction *action) +{ + irqreturn_t ret; + + ret = action->thread_fn(action->irq, action->dev_id); + if (ret == IRQ_HANDLED) + atomic_inc(&desc->threads_handled); + + irq_finalize_oneshot(desc, action); + return ret; +} + +void wake_threads_waitq(struct irq_desc *desc) +{ + if (atomic_dec_and_test(&desc->threads_active)) + wake_up(&desc->wait_for_threads); +} + +static void irq_thread_dtor(struct callback_head *unused) +{ + struct task_struct *tsk = current; + struct irq_desc *desc; + struct irqaction *action; + + if (WARN_ON_ONCE(!(current->flags & PF_EXITING))) + return; + + action = kthread_data(tsk); + + pr_err("exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n", + tsk->comm, tsk->pid, action->irq); + + + desc = irq_to_desc(action->irq); + /* + * If IRQTF_RUNTHREAD is set, we need to decrement + * desc->threads_active and wake possible waiters. + */ + if (test_and_clear_bit(IRQTF_RUNTHREAD, &action->thread_flags)) + wake_threads_waitq(desc); + + /* Prevent a stale desc->threads_oneshot */ + irq_finalize_oneshot(desc, action); +} + +static void irq_wake_secondary(struct irq_desc *desc, struct irqaction *action) +{ + struct irqaction *secondary = action->secondary; + + if (WARN_ON_ONCE(!secondary)) + return; + + raw_spin_lock_irq(&desc->lock); + __irq_wake_thread(desc, secondary); + raw_spin_unlock_irq(&desc->lock); +} + +/* + * Internal function to notify that a interrupt thread is ready. + */ +static void irq_thread_set_ready(struct irq_desc *desc, + struct irqaction *action) +{ + set_bit(IRQTF_READY, &action->thread_flags); + wake_up(&desc->wait_for_threads); +} + +/* + * Internal function to wake up a interrupt thread and wait until it is + * ready. + */ +static void wake_up_and_wait_for_irq_thread_ready(struct irq_desc *desc, + struct irqaction *action) +{ + if (!action || !action->thread) + return; + + wake_up_process(action->thread); + wait_event(desc->wait_for_threads, + test_bit(IRQTF_READY, &action->thread_flags)); +} + +/* + * Interrupt handler thread + */ +static int irq_thread(void *data) +{ + struct callback_head on_exit_work; + struct irqaction *action = data; + struct irq_desc *desc = irq_to_desc(action->irq); + irqreturn_t (*handler_fn)(struct irq_desc *desc, + struct irqaction *action); + + irq_thread_set_ready(desc, action); + + sched_set_fifo(current); + + if (force_irqthreads() && test_bit(IRQTF_FORCED_THREAD, + &action->thread_flags)) + handler_fn = irq_forced_thread_fn; + else + handler_fn = irq_thread_fn; + + init_task_work(&on_exit_work, irq_thread_dtor); + task_work_add(current, &on_exit_work, TWA_NONE); + + irq_thread_check_affinity(desc, action); + + while (!irq_wait_for_interrupt(action)) { + irqreturn_t action_ret; + + irq_thread_check_affinity(desc, action); + + action_ret = handler_fn(desc, action); + if (action_ret == IRQ_WAKE_THREAD) + irq_wake_secondary(desc, action); + + wake_threads_waitq(desc); + } + + /* + * This is the regular exit path. __free_irq() is stopping the + * thread via kthread_stop() after calling + * synchronize_hardirq(). So neither IRQTF_RUNTHREAD nor the + * oneshot mask bit can be set. + */ + task_work_cancel(current, irq_thread_dtor); + return 0; +} + +/** + * irq_wake_thread - wake the irq thread for the action identified by dev_id + * @irq: Interrupt line + * @dev_id: Device identity for which the thread should be woken + * + */ +void irq_wake_thread(unsigned int irq, void *dev_id) +{ + struct irq_desc *desc = irq_to_desc(irq); + struct irqaction *action; + unsigned long flags; + + if (!desc || WARN_ON(irq_settings_is_per_cpu_devid(desc))) + return; + + raw_spin_lock_irqsave(&desc->lock, flags); + for_each_action_of_desc(desc, action) { + if (action->dev_id == dev_id) { + if (action->thread) + __irq_wake_thread(desc, action); + break; + } + } + raw_spin_unlock_irqrestore(&desc->lock, flags); +} +EXPORT_SYMBOL_GPL(irq_wake_thread); + +static int irq_setup_forced_threading(struct irqaction *new) +{ + if (!force_irqthreads()) + return 0; + if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT)) + return 0; + + /* + * No further action required for interrupts which are requested as + * threaded interrupts already + */ + if (new->handler == irq_default_primary_handler) + return 0; + + new->flags |= IRQF_ONESHOT; + + /* + * Handle the case where we have a real primary handler and a + * thread handler. We force thread them as well by creating a + * secondary action. + */ + if (new->handler && new->thread_fn) { + /* Allocate the secondary action */ + new->secondary = kzalloc(sizeof(struct irqaction), GFP_KERNEL); + if (!new->secondary) + return -ENOMEM; + new->secondary->handler = irq_forced_secondary_handler; + new->secondary->thread_fn = new->thread_fn; + new->secondary->dev_id = new->dev_id; + new->secondary->irq = new->irq; + new->secondary->name = new->name; + } + /* Deal with the primary handler */ + set_bit(IRQTF_FORCED_THREAD, &new->thread_flags); + new->thread_fn = new->handler; + new->handler = irq_default_primary_handler; + return 0; +} + +static int irq_request_resources(struct irq_desc *desc) +{ + struct irq_data *d = &desc->irq_data; + struct irq_chip *c = d->chip; + + return c->irq_request_resources ? c->irq_request_resources(d) : 0; +} + +static void irq_release_resources(struct irq_desc *desc) +{ + struct irq_data *d = &desc->irq_data; + struct irq_chip *c = d->chip; + + if (c->irq_release_resources) + c->irq_release_resources(d); +} + +static bool irq_supports_nmi(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + /* Only IRQs directly managed by the root irqchip can be set as NMI */ + if (d->parent_data) + return false; +#endif + /* Don't support NMIs for chips behind a slow bus */ + if (d->chip->irq_bus_lock || d->chip->irq_bus_sync_unlock) + return false; + + return d->chip->flags & IRQCHIP_SUPPORTS_NMI; +} + +static int irq_nmi_setup(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + struct irq_chip *c = d->chip; + + return c->irq_nmi_setup ? c->irq_nmi_setup(d) : -EINVAL; +} + +static void irq_nmi_teardown(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + struct irq_chip *c = d->chip; + + if (c->irq_nmi_teardown) + c->irq_nmi_teardown(d); +} + +static int +setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary) +{ + struct task_struct *t; + + if (!secondary) { + t = kthread_create(irq_thread, new, "irq/%d-%s", irq, + new->name); + } else { + t = kthread_create(irq_thread, new, "irq/%d-s-%s", irq, + new->name); + } + + if (IS_ERR(t)) + return PTR_ERR(t); + + /* + * We keep the reference to the task struct even if + * the thread dies to avoid that the interrupt code + * references an already freed task_struct. + */ + new->thread = get_task_struct(t); + /* + * Tell the thread to set its affinity. This is + * important for shared interrupt handlers as we do + * not invoke setup_affinity() for the secondary + * handlers as everything is already set up. Even for + * interrupts marked with IRQF_NO_BALANCE this is + * correct as we want the thread to move to the cpu(s) + * on which the requesting code placed the interrupt. + */ + set_bit(IRQTF_AFFINITY, &new->thread_flags); + return 0; +} + +/* + * Internal function to register an irqaction - typically used to + * allocate special interrupts that are part of the architecture. + * + * Locking rules: + * + * desc->request_mutex Provides serialization against a concurrent free_irq() + * chip_bus_lock Provides serialization for slow bus operations + * desc->lock Provides serialization against hard interrupts + * + * chip_bus_lock and desc->lock are sufficient for all other management and + * interrupt related functions. desc->request_mutex solely serializes + * request/free_irq(). + */ +static int +__setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) +{ + struct irqaction *old, **old_ptr; + unsigned long flags, thread_mask = 0; + int ret, nested, shared = 0; + + if (!desc) + return -EINVAL; + + if (desc->irq_data.chip == &no_irq_chip) + return -ENOSYS; + if (!try_module_get(desc->owner)) + return -ENODEV; + + new->irq = irq; + + /* + * If the trigger type is not specified by the caller, + * then use the default for this interrupt. + */ + if (!(new->flags & IRQF_TRIGGER_MASK)) + new->flags |= irqd_get_trigger_type(&desc->irq_data); + + /* + * Check whether the interrupt nests into another interrupt + * thread. + */ + nested = irq_settings_is_nested_thread(desc); + if (nested) { + if (!new->thread_fn) { + ret = -EINVAL; + goto out_mput; + } + /* + * Replace the primary handler which was provided from + * the driver for non nested interrupt handling by the + * dummy function which warns when called. + */ + new->handler = irq_nested_primary_handler; + } else { + if (irq_settings_can_thread(desc)) { + ret = irq_setup_forced_threading(new); + if (ret) + goto out_mput; + } + } + + /* + * Create a handler thread when a thread function is supplied + * and the interrupt does not nest into another interrupt + * thread. + */ + if (new->thread_fn && !nested) { + ret = setup_irq_thread(new, irq, false); + if (ret) + goto out_mput; + if (new->secondary) { + ret = setup_irq_thread(new->secondary, irq, true); + if (ret) + goto out_thread; + } + } + + /* + * Drivers are often written to work w/o knowledge about the + * underlying irq chip implementation, so a request for a + * threaded irq without a primary hard irq context handler + * requires the ONESHOT flag to be set. Some irq chips like + * MSI based interrupts are per se one shot safe. Check the + * chip flags, so we can avoid the unmask dance at the end of + * the threaded handler for those. + */ + if (desc->irq_data.chip->flags & IRQCHIP_ONESHOT_SAFE) + new->flags &= ~IRQF_ONESHOT; + + /* + * Protects against a concurrent __free_irq() call which might wait + * for synchronize_hardirq() to complete without holding the optional + * chip bus lock and desc->lock. Also protects against handing out + * a recycled oneshot thread_mask bit while it's still in use by + * its previous owner. + */ + mutex_lock(&desc->request_mutex); + + /* + * Acquire bus lock as the irq_request_resources() callback below + * might rely on the serialization or the magic power management + * functions which are abusing the irq_bus_lock() callback, + */ + chip_bus_lock(desc); + + /* First installed action requests resources. */ + if (!desc->action) { + ret = irq_request_resources(desc); + if (ret) { + pr_err("Failed to request resources for %s (irq %d) on irqchip %s\n", + new->name, irq, desc->irq_data.chip->name); + goto out_bus_unlock; + } + } + + /* + * The following block of code has to be executed atomically + * protected against a concurrent interrupt and any of the other + * management calls which are not serialized via + * desc->request_mutex or the optional bus lock. + */ + raw_spin_lock_irqsave(&desc->lock, flags); + old_ptr = &desc->action; + old = *old_ptr; + if (old) { + /* + * Can't share interrupts unless both agree to and are + * the same type (level, edge, polarity). So both flag + * fields must have IRQF_SHARED set and the bits which + * set the trigger type must match. Also all must + * agree on ONESHOT. + * Interrupt lines used for NMIs cannot be shared. + */ + unsigned int oldtype; + + if (desc->istate & IRQS_NMI) { + pr_err("Invalid attempt to share NMI for %s (irq %d) on irqchip %s.\n", + new->name, irq, desc->irq_data.chip->name); + ret = -EINVAL; + goto out_unlock; + } + + /* + * If nobody did set the configuration before, inherit + * the one provided by the requester. + */ + if (irqd_trigger_type_was_set(&desc->irq_data)) { + oldtype = irqd_get_trigger_type(&desc->irq_data); + } else { + oldtype = new->flags & IRQF_TRIGGER_MASK; + irqd_set_trigger_type(&desc->irq_data, oldtype); + } + + if (!((old->flags & new->flags) & IRQF_SHARED) || + (oldtype != (new->flags & IRQF_TRIGGER_MASK)) || + ((old->flags ^ new->flags) & IRQF_ONESHOT)) + goto mismatch; + + /* All handlers must agree on per-cpuness */ + if ((old->flags & IRQF_PERCPU) != + (new->flags & IRQF_PERCPU)) + goto mismatch; + + /* add new interrupt at end of irq queue */ + do { + /* + * Or all existing action->thread_mask bits, + * so we can find the next zero bit for this + * new action. + */ + thread_mask |= old->thread_mask; + old_ptr = &old->next; + old = *old_ptr; + } while (old); + shared = 1; + } + + /* + * Setup the thread mask for this irqaction for ONESHOT. For + * !ONESHOT irqs the thread mask is 0 so we can avoid a + * conditional in irq_wake_thread(). + */ + if (new->flags & IRQF_ONESHOT) { + /* + * Unlikely to have 32 resp 64 irqs sharing one line, + * but who knows. + */ + if (thread_mask == ~0UL) { + ret = -EBUSY; + goto out_unlock; + } + /* + * The thread_mask for the action is or'ed to + * desc->thread_active to indicate that the + * IRQF_ONESHOT thread handler has been woken, but not + * yet finished. The bit is cleared when a thread + * completes. When all threads of a shared interrupt + * line have completed desc->threads_active becomes + * zero and the interrupt line is unmasked. See + * handle.c:irq_wake_thread() for further information. + * + * If no thread is woken by primary (hard irq context) + * interrupt handlers, then desc->threads_active is + * also checked for zero to unmask the irq line in the + * affected hard irq flow handlers + * (handle_[fasteoi|level]_irq). + * + * The new action gets the first zero bit of + * thread_mask assigned. See the loop above which or's + * all existing action->thread_mask bits. + */ + new->thread_mask = 1UL << ffz(thread_mask); + + } else if (new->handler == irq_default_primary_handler && + !(desc->irq_data.chip->flags & IRQCHIP_ONESHOT_SAFE)) { + /* + * The interrupt was requested with handler = NULL, so + * we use the default primary handler for it. But it + * does not have the oneshot flag set. In combination + * with level interrupts this is deadly, because the + * default primary handler just wakes the thread, then + * the irq lines is reenabled, but the device still + * has the level irq asserted. Rinse and repeat.... + * + * While this works for edge type interrupts, we play + * it safe and reject unconditionally because we can't + * say for sure which type this interrupt really + * has. The type flags are unreliable as the + * underlying chip implementation can override them. + */ + pr_err("Threaded irq requested with handler=NULL and !ONESHOT for %s (irq %d)\n", + new->name, irq); + ret = -EINVAL; + goto out_unlock; + } + + if (!shared) { + /* Setup the type (level, edge polarity) if configured: */ + if (new->flags & IRQF_TRIGGER_MASK) { + ret = __irq_set_trigger(desc, + new->flags & IRQF_TRIGGER_MASK); + + if (ret) + goto out_unlock; + } + + /* + * Activate the interrupt. That activation must happen + * independently of IRQ_NOAUTOEN. request_irq() can fail + * and the callers are supposed to handle + * that. enable_irq() of an interrupt requested with + * IRQ_NOAUTOEN is not supposed to fail. The activation + * keeps it in shutdown mode, it merily associates + * resources if necessary and if that's not possible it + * fails. Interrupts which are in managed shutdown mode + * will simply ignore that activation request. + */ + ret = irq_activate(desc); + if (ret) + goto out_unlock; + + desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \ + IRQS_ONESHOT | IRQS_WAITING); + irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS); + + if (new->flags & IRQF_PERCPU) { + irqd_set(&desc->irq_data, IRQD_PER_CPU); + irq_settings_set_per_cpu(desc); + if (new->flags & IRQF_NO_DEBUG) + irq_settings_set_no_debug(desc); + } + + if (noirqdebug) + irq_settings_set_no_debug(desc); + + if (new->flags & IRQF_ONESHOT) + desc->istate |= IRQS_ONESHOT; + + /* Exclude IRQ from balancing if requested */ + if (new->flags & IRQF_NOBALANCING) { + irq_settings_set_no_balancing(desc); + irqd_set(&desc->irq_data, IRQD_NO_BALANCING); + } + + if (!(new->flags & IRQF_NO_AUTOEN) && + irq_settings_can_autoenable(desc)) { + irq_startup(desc, IRQ_RESEND, IRQ_START_COND); + } else { + /* + * Shared interrupts do not go well with disabling + * auto enable. The sharing interrupt might request + * it while it's still disabled and then wait for + * interrupts forever. + */ + WARN_ON_ONCE(new->flags & IRQF_SHARED); + /* Undo nested disables: */ + desc->depth = 1; + } + + } else if (new->flags & IRQF_TRIGGER_MASK) { + unsigned int nmsk = new->flags & IRQF_TRIGGER_MASK; + unsigned int omsk = irqd_get_trigger_type(&desc->irq_data); + + if (nmsk != omsk) + /* hope the handler works with current trigger mode */ + pr_warn("irq %d uses trigger mode %u; requested %u\n", + irq, omsk, nmsk); + } + + *old_ptr = new; + + irq_pm_install_action(desc, new); + + /* Reset broken irq detection when installing new handler */ + desc->irq_count = 0; + desc->irqs_unhandled = 0; + + /* + * Check whether we disabled the irq via the spurious handler + * before. Reenable it and give it another chance. + */ + if (shared && (desc->istate & IRQS_SPURIOUS_DISABLED)) { + desc->istate &= ~IRQS_SPURIOUS_DISABLED; + __enable_irq(desc); + } + + raw_spin_unlock_irqrestore(&desc->lock, flags); + chip_bus_sync_unlock(desc); + mutex_unlock(&desc->request_mutex); + + irq_setup_timings(desc, new); + + wake_up_and_wait_for_irq_thread_ready(desc, new); + wake_up_and_wait_for_irq_thread_ready(desc, new->secondary); + + register_irq_proc(irq, desc); + new->dir = NULL; + register_handler_proc(irq, new); + return 0; + +mismatch: + if (!(new->flags & IRQF_PROBE_SHARED)) { + pr_err("Flags mismatch irq %d. %08x (%s) vs. %08x (%s)\n", + irq, new->flags, new->name, old->flags, old->name); +#ifdef CONFIG_DEBUG_SHIRQ + dump_stack(); +#endif + } + ret = -EBUSY; + +out_unlock: + raw_spin_unlock_irqrestore(&desc->lock, flags); + + if (!desc->action) + irq_release_resources(desc); +out_bus_unlock: + chip_bus_sync_unlock(desc); + mutex_unlock(&desc->request_mutex); + +out_thread: + if (new->thread) { + struct task_struct *t = new->thread; + + new->thread = NULL; + kthread_stop(t); + put_task_struct(t); + } + if (new->secondary && new->secondary->thread) { + struct task_struct *t = new->secondary->thread; + + new->secondary->thread = NULL; + kthread_stop(t); + put_task_struct(t); + } +out_mput: + module_put(desc->owner); + return ret; +} + +/* + * Internal function to unregister an irqaction - used to free + * regular and special interrupts that are part of the architecture. + */ +static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id) +{ + unsigned irq = desc->irq_data.irq; + struct irqaction *action, **action_ptr; + unsigned long flags; + + WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq); + + mutex_lock(&desc->request_mutex); + chip_bus_lock(desc); + raw_spin_lock_irqsave(&desc->lock, flags); + + /* + * There can be multiple actions per IRQ descriptor, find the right + * one based on the dev_id: + */ + action_ptr = &desc->action; + for (;;) { + action = *action_ptr; + + if (!action) { + WARN(1, "Trying to free already-free IRQ %d\n", irq); + raw_spin_unlock_irqrestore(&desc->lock, flags); + chip_bus_sync_unlock(desc); + mutex_unlock(&desc->request_mutex); + return NULL; + } + + if (action->dev_id == dev_id) + break; + action_ptr = &action->next; + } + + /* Found it - now remove it from the list of entries: */ + *action_ptr = action->next; + + irq_pm_remove_action(desc, action); + + /* If this was the last handler, shut down the IRQ line: */ + if (!desc->action) { + irq_settings_clr_disable_unlazy(desc); + /* Only shutdown. Deactivate after synchronize_hardirq() */ + irq_shutdown(desc); + } + +#ifdef CONFIG_SMP + /* make sure affinity_hint is cleaned up */ + if (WARN_ON_ONCE(desc->affinity_hint)) + desc->affinity_hint = NULL; +#endif + + raw_spin_unlock_irqrestore(&desc->lock, flags); + /* + * Drop bus_lock here so the changes which were done in the chip + * callbacks above are synced out to the irq chips which hang + * behind a slow bus (I2C, SPI) before calling synchronize_hardirq(). + * + * Aside of that the bus_lock can also be taken from the threaded + * handler in irq_finalize_oneshot() which results in a deadlock + * because kthread_stop() would wait forever for the thread to + * complete, which is blocked on the bus lock. + * + * The still held desc->request_mutex() protects against a + * concurrent request_irq() of this irq so the release of resources + * and timing data is properly serialized. + */ + chip_bus_sync_unlock(desc); + + unregister_handler_proc(irq, action); + + /* + * Make sure it's not being used on another CPU and if the chip + * supports it also make sure that there is no (not yet serviced) + * interrupt in flight at the hardware level. + */ + __synchronize_irq(desc); + +#ifdef CONFIG_DEBUG_SHIRQ + /* + * It's a shared IRQ -- the driver ought to be prepared for an IRQ + * event to happen even now it's being freed, so let's make sure that + * is so by doing an extra call to the handler .... + * + * ( We do this after actually deregistering it, to make sure that a + * 'real' IRQ doesn't run in parallel with our fake. ) + */ + if (action->flags & IRQF_SHARED) { + local_irq_save(flags); + action->handler(irq, dev_id); + local_irq_restore(flags); + } +#endif + + /* + * The action has already been removed above, but the thread writes + * its oneshot mask bit when it completes. Though request_mutex is + * held across this which prevents __setup_irq() from handing out + * the same bit to a newly requested action. + */ + if (action->thread) { + kthread_stop(action->thread); + put_task_struct(action->thread); + if (action->secondary && action->secondary->thread) { + kthread_stop(action->secondary->thread); + put_task_struct(action->secondary->thread); + } + } + + /* Last action releases resources */ + if (!desc->action) { + /* + * Reacquire bus lock as irq_release_resources() might + * require it to deallocate resources over the slow bus. + */ + chip_bus_lock(desc); + /* + * There is no interrupt on the fly anymore. Deactivate it + * completely. + */ + raw_spin_lock_irqsave(&desc->lock, flags); + irq_domain_deactivate_irq(&desc->irq_data); + raw_spin_unlock_irqrestore(&desc->lock, flags); + + irq_release_resources(desc); + chip_bus_sync_unlock(desc); + irq_remove_timings(desc); + } + + mutex_unlock(&desc->request_mutex); + + irq_chip_pm_put(&desc->irq_data); + module_put(desc->owner); + kfree(action->secondary); + return action; +} + +/** + * free_irq - free an interrupt allocated with request_irq + * @irq: Interrupt line to free + * @dev_id: Device identity to free + * + * Remove an interrupt handler. The handler is removed and if the + * interrupt line is no longer in use by any driver it is disabled. + * On a shared IRQ the caller must ensure the interrupt is disabled + * on the card it drives before calling this function. The function + * does not return until any executing interrupts for this IRQ + * have completed. + * + * This function must not be called from interrupt context. + * + * Returns the devname argument passed to request_irq. + */ +const void *free_irq(unsigned int irq, void *dev_id) +{ + struct irq_desc *desc = irq_to_desc(irq); + struct irqaction *action; + const char *devname; + + if (!desc || WARN_ON(irq_settings_is_per_cpu_devid(desc))) + return NULL; + +#ifdef CONFIG_SMP + if (WARN_ON(desc->affinity_notify)) + desc->affinity_notify = NULL; +#endif + + action = __free_irq(desc, dev_id); + + if (!action) + return NULL; + + devname = action->name; + kfree(action); + return devname; +} +EXPORT_SYMBOL(free_irq); + +/* This function must be called with desc->lock held */ +static const void *__cleanup_nmi(unsigned int irq, struct irq_desc *desc) +{ + const char *devname = NULL; + + desc->istate &= ~IRQS_NMI; + + if (!WARN_ON(desc->action == NULL)) { + irq_pm_remove_action(desc, desc->action); + devname = desc->action->name; + unregister_handler_proc(irq, desc->action); + + kfree(desc->action); + desc->action = NULL; + } + + irq_settings_clr_disable_unlazy(desc); + irq_shutdown_and_deactivate(desc); + + irq_release_resources(desc); + + irq_chip_pm_put(&desc->irq_data); + module_put(desc->owner); + + return devname; +} + +const void *free_nmi(unsigned int irq, void *dev_id) +{ + struct irq_desc *desc = irq_to_desc(irq); + unsigned long flags; + const void *devname; + + if (!desc || WARN_ON(!(desc->istate & IRQS_NMI))) + return NULL; + + if (WARN_ON(irq_settings_is_per_cpu_devid(desc))) + return NULL; + + /* NMI still enabled */ + if (WARN_ON(desc->depth == 0)) + disable_nmi_nosync(irq); + + raw_spin_lock_irqsave(&desc->lock, flags); + + irq_nmi_teardown(desc); + devname = __cleanup_nmi(irq, desc); + + raw_spin_unlock_irqrestore(&desc->lock, flags); + + return devname; +} + +/** + * request_threaded_irq - allocate an interrupt line + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs. + * Primary handler for threaded interrupts. + * If handler is NULL and thread_fn != NULL + * the default primary handler is installed. + * @thread_fn: Function called from the irq handler thread + * If NULL, no irq thread is created + * @irqflags: Interrupt type flags + * @devname: An ascii name for the claiming device + * @dev_id: A cookie passed back to the handler function + * + * This call allocates interrupt resources and enables the + * interrupt line and IRQ handling. From the point this + * call is made your handler function may be invoked. Since + * your handler function must clear any interrupt the board + * raises, you must take care both to initialise your hardware + * and to set up the interrupt handler in the right order. + * + * If you want to set up a threaded irq handler for your device + * then you need to supply @handler and @thread_fn. @handler is + * still called in hard interrupt context and has to check + * whether the interrupt originates from the device. If yes it + * needs to disable the interrupt on the device and return + * IRQ_WAKE_THREAD which will wake up the handler thread and run + * @thread_fn. This split handler design is necessary to support + * shared interrupts. + * + * Dev_id must be globally unique. Normally the address of the + * device data structure is used as the cookie. Since the handler + * receives this value it makes sense to use it. + * + * If your interrupt is shared you must pass a non NULL dev_id + * as this is required when freeing the interrupt. + * + * Flags: + * + * IRQF_SHARED Interrupt is shared + * IRQF_TRIGGER_* Specify active edge(s) or level + * IRQF_ONESHOT Run thread_fn with interrupt line masked + */ +int request_threaded_irq(unsigned int irq, irq_handler_t handler, + irq_handler_t thread_fn, unsigned long irqflags, + const char *devname, void *dev_id) +{ + struct irqaction *action; + struct irq_desc *desc; + int retval; + + if (irq == IRQ_NOTCONNECTED) + return -ENOTCONN; + + /* + * Sanity-check: shared interrupts must pass in a real dev-ID, + * otherwise we'll have trouble later trying to figure out + * which interrupt is which (messes up the interrupt freeing + * logic etc). + * + * Also shared interrupts do not go well with disabling auto enable. + * The sharing interrupt might request it while it's still disabled + * and then wait for interrupts forever. + * + * Also IRQF_COND_SUSPEND only makes sense for shared interrupts and + * it cannot be set along with IRQF_NO_SUSPEND. + */ + if (((irqflags & IRQF_SHARED) && !dev_id) || + ((irqflags & IRQF_SHARED) && (irqflags & IRQF_NO_AUTOEN)) || + (!(irqflags & IRQF_SHARED) && (irqflags & IRQF_COND_SUSPEND)) || + ((irqflags & IRQF_NO_SUSPEND) && (irqflags & IRQF_COND_SUSPEND))) + return -EINVAL; + + desc = irq_to_desc(irq); + if (!desc) + return -EINVAL; + + if (!irq_settings_can_request(desc) || + WARN_ON(irq_settings_is_per_cpu_devid(desc))) + return -EINVAL; + + if (!handler) { + if (!thread_fn) + return -EINVAL; + handler = irq_default_primary_handler; + } + + action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); + if (!action) + return -ENOMEM; + + action->handler = handler; + action->thread_fn = thread_fn; + action->flags = irqflags; + action->name = devname; + action->dev_id = dev_id; + + retval = irq_chip_pm_get(&desc->irq_data); + if (retval < 0) { + kfree(action); + return retval; + } + + retval = __setup_irq(irq, desc, action); + + if (retval) { + irq_chip_pm_put(&desc->irq_data); + kfree(action->secondary); + kfree(action); + } + +#ifdef CONFIG_DEBUG_SHIRQ_FIXME + if (!retval && (irqflags & IRQF_SHARED)) { + /* + * It's a shared IRQ -- the driver ought to be prepared for it + * to happen immediately, so let's make sure.... + * We disable the irq to make sure that a 'real' IRQ doesn't + * run in parallel with our fake. + */ + unsigned long flags; + + disable_irq(irq); + local_irq_save(flags); + + handler(irq, dev_id); + + local_irq_restore(flags); + enable_irq(irq); + } +#endif + return retval; +} +EXPORT_SYMBOL(request_threaded_irq); + +/** + * request_any_context_irq - allocate an interrupt line + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs. + * Threaded handler for threaded interrupts. + * @flags: Interrupt type flags + * @name: An ascii name for the claiming device + * @dev_id: A cookie passed back to the handler function + * + * This call allocates interrupt resources and enables the + * interrupt line and IRQ handling. It selects either a + * hardirq or threaded handling method depending on the + * context. + * + * On failure, it returns a negative value. On success, + * it returns either IRQC_IS_HARDIRQ or IRQC_IS_NESTED. + */ +int request_any_context_irq(unsigned int irq, irq_handler_t handler, + unsigned long flags, const char *name, void *dev_id) +{ + struct irq_desc *desc; + int ret; + + if (irq == IRQ_NOTCONNECTED) + return -ENOTCONN; + + desc = irq_to_desc(irq); + if (!desc) + return -EINVAL; + + if (irq_settings_is_nested_thread(desc)) { + ret = request_threaded_irq(irq, NULL, handler, + flags, name, dev_id); + return !ret ? IRQC_IS_NESTED : ret; + } + + ret = request_irq(irq, handler, flags, name, dev_id); + return !ret ? IRQC_IS_HARDIRQ : ret; +} +EXPORT_SYMBOL_GPL(request_any_context_irq); + +/** + * request_nmi - allocate an interrupt line for NMI delivery + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs. + * Threaded handler for threaded interrupts. + * @irqflags: Interrupt type flags + * @name: An ascii name for the claiming device + * @dev_id: A cookie passed back to the handler function + * + * This call allocates interrupt resources and enables the + * interrupt line and IRQ handling. It sets up the IRQ line + * to be handled as an NMI. + * + * An interrupt line delivering NMIs cannot be shared and IRQ handling + * cannot be threaded. + * + * Interrupt lines requested for NMI delivering must produce per cpu + * interrupts and have auto enabling setting disabled. + * + * Dev_id must be globally unique. Normally the address of the + * device data structure is used as the cookie. Since the handler + * receives this value it makes sense to use it. + * + * If the interrupt line cannot be used to deliver NMIs, function + * will fail and return a negative value. + */ +int request_nmi(unsigned int irq, irq_handler_t handler, + unsigned long irqflags, const char *name, void *dev_id) +{ + struct irqaction *action; + struct irq_desc *desc; + unsigned long flags; + int retval; + + if (irq == IRQ_NOTCONNECTED) + return -ENOTCONN; + + /* NMI cannot be shared, used for Polling */ + if (irqflags & (IRQF_SHARED | IRQF_COND_SUSPEND | IRQF_IRQPOLL)) + return -EINVAL; + + if (!(irqflags & IRQF_PERCPU)) + return -EINVAL; + + if (!handler) + return -EINVAL; + + desc = irq_to_desc(irq); + + if (!desc || (irq_settings_can_autoenable(desc) && + !(irqflags & IRQF_NO_AUTOEN)) || + !irq_settings_can_request(desc) || + WARN_ON(irq_settings_is_per_cpu_devid(desc)) || + !irq_supports_nmi(desc)) + return -EINVAL; + + action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); + if (!action) + return -ENOMEM; + + action->handler = handler; + action->flags = irqflags | IRQF_NO_THREAD | IRQF_NOBALANCING; + action->name = name; + action->dev_id = dev_id; + + retval = irq_chip_pm_get(&desc->irq_data); + if (retval < 0) + goto err_out; + + retval = __setup_irq(irq, desc, action); + if (retval) + goto err_irq_setup; + + raw_spin_lock_irqsave(&desc->lock, flags); + + /* Setup NMI state */ + desc->istate |= IRQS_NMI; + retval = irq_nmi_setup(desc); + if (retval) { + __cleanup_nmi(irq, desc); + raw_spin_unlock_irqrestore(&desc->lock, flags); + return -EINVAL; + } + + raw_spin_unlock_irqrestore(&desc->lock, flags); + + return 0; + +err_irq_setup: + irq_chip_pm_put(&desc->irq_data); +err_out: + kfree(action); + + return retval; +} + +void enable_percpu_irq(unsigned int irq, unsigned int type) +{ + unsigned int cpu = smp_processor_id(); + unsigned long flags; + struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_PERCPU); + + if (!desc) + return; + + /* + * If the trigger type is not specified by the caller, then + * use the default for this interrupt. + */ + type &= IRQ_TYPE_SENSE_MASK; + if (type == IRQ_TYPE_NONE) + type = irqd_get_trigger_type(&desc->irq_data); + + if (type != IRQ_TYPE_NONE) { + int ret; + + ret = __irq_set_trigger(desc, type); + + if (ret) { + WARN(1, "failed to set type for IRQ%d\n", irq); + goto out; + } + } + + irq_percpu_enable(desc, cpu); +out: + irq_put_desc_unlock(desc, flags); +} +EXPORT_SYMBOL_GPL(enable_percpu_irq); + +void enable_percpu_nmi(unsigned int irq, unsigned int type) +{ + enable_percpu_irq(irq, type); +} + +/** + * irq_percpu_is_enabled - Check whether the per cpu irq is enabled + * @irq: Linux irq number to check for + * + * Must be called from a non migratable context. Returns the enable + * state of a per cpu interrupt on the current cpu. + */ +bool irq_percpu_is_enabled(unsigned int irq) +{ + unsigned int cpu = smp_processor_id(); + struct irq_desc *desc; + unsigned long flags; + bool is_enabled; + + desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_PERCPU); + if (!desc) + return false; + + is_enabled = cpumask_test_cpu(cpu, desc->percpu_enabled); + irq_put_desc_unlock(desc, flags); + + return is_enabled; +} +EXPORT_SYMBOL_GPL(irq_percpu_is_enabled); + +void disable_percpu_irq(unsigned int irq) +{ + unsigned int cpu = smp_processor_id(); + unsigned long flags; + struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_PERCPU); + + if (!desc) + return; + + irq_percpu_disable(desc, cpu); + irq_put_desc_unlock(desc, flags); +} +EXPORT_SYMBOL_GPL(disable_percpu_irq); + +void disable_percpu_nmi(unsigned int irq) +{ + disable_percpu_irq(irq); +} + +/* + * Internal function to unregister a percpu irqaction. + */ +static struct irqaction *__free_percpu_irq(unsigned int irq, void __percpu *dev_id) +{ + struct irq_desc *desc = irq_to_desc(irq); + struct irqaction *action; + unsigned long flags; + + WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq); + + if (!desc) + return NULL; + + raw_spin_lock_irqsave(&desc->lock, flags); + + action = desc->action; + if (!action || action->percpu_dev_id != dev_id) { + WARN(1, "Trying to free already-free IRQ %d\n", irq); + goto bad; + } + + if (!cpumask_empty(desc->percpu_enabled)) { + WARN(1, "percpu IRQ %d still enabled on CPU%d!\n", + irq, cpumask_first(desc->percpu_enabled)); + goto bad; + } + + /* Found it - now remove it from the list of entries: */ + desc->action = NULL; + + desc->istate &= ~IRQS_NMI; + + raw_spin_unlock_irqrestore(&desc->lock, flags); + + unregister_handler_proc(irq, action); + + irq_chip_pm_put(&desc->irq_data); + module_put(desc->owner); + return action; + +bad: + raw_spin_unlock_irqrestore(&desc->lock, flags); + return NULL; +} + +/** + * remove_percpu_irq - free a per-cpu interrupt + * @irq: Interrupt line to free + * @act: irqaction for the interrupt + * + * Used to remove interrupts statically setup by the early boot process. + */ +void remove_percpu_irq(unsigned int irq, struct irqaction *act) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (desc && irq_settings_is_per_cpu_devid(desc)) + __free_percpu_irq(irq, act->percpu_dev_id); +} + +/** + * free_percpu_irq - free an interrupt allocated with request_percpu_irq + * @irq: Interrupt line to free + * @dev_id: Device identity to free + * + * Remove a percpu interrupt handler. The handler is removed, but + * the interrupt line is not disabled. This must be done on each + * CPU before calling this function. The function does not return + * until any executing interrupts for this IRQ have completed. + * + * This function must not be called from interrupt context. + */ +void free_percpu_irq(unsigned int irq, void __percpu *dev_id) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (!desc || !irq_settings_is_per_cpu_devid(desc)) + return; + + chip_bus_lock(desc); + kfree(__free_percpu_irq(irq, dev_id)); + chip_bus_sync_unlock(desc); +} +EXPORT_SYMBOL_GPL(free_percpu_irq); + +void free_percpu_nmi(unsigned int irq, void __percpu *dev_id) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (!desc || !irq_settings_is_per_cpu_devid(desc)) + return; + + if (WARN_ON(!(desc->istate & IRQS_NMI))) + return; + + kfree(__free_percpu_irq(irq, dev_id)); +} + +/** + * setup_percpu_irq - setup a per-cpu interrupt + * @irq: Interrupt line to setup + * @act: irqaction for the interrupt + * + * Used to statically setup per-cpu interrupts in the early boot process. + */ +int setup_percpu_irq(unsigned int irq, struct irqaction *act) +{ + struct irq_desc *desc = irq_to_desc(irq); + int retval; + + if (!desc || !irq_settings_is_per_cpu_devid(desc)) + return -EINVAL; + + retval = irq_chip_pm_get(&desc->irq_data); + if (retval < 0) + return retval; + + retval = __setup_irq(irq, desc, act); + + if (retval) + irq_chip_pm_put(&desc->irq_data); + + return retval; +} + +/** + * __request_percpu_irq - allocate a percpu interrupt line + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs. + * @flags: Interrupt type flags (IRQF_TIMER only) + * @devname: An ascii name for the claiming device + * @dev_id: A percpu cookie passed back to the handler function + * + * This call allocates interrupt resources and enables the + * interrupt on the local CPU. If the interrupt is supposed to be + * enabled on other CPUs, it has to be done on each CPU using + * enable_percpu_irq(). + * + * Dev_id must be globally unique. It is a per-cpu variable, and + * the handler gets called with the interrupted CPU's instance of + * that variable. + */ +int __request_percpu_irq(unsigned int irq, irq_handler_t handler, + unsigned long flags, const char *devname, + void __percpu *dev_id) +{ + struct irqaction *action; + struct irq_desc *desc; + int retval; + + if (!dev_id) + return -EINVAL; + + desc = irq_to_desc(irq); + if (!desc || !irq_settings_can_request(desc) || + !irq_settings_is_per_cpu_devid(desc)) + return -EINVAL; + + if (flags && flags != IRQF_TIMER) + return -EINVAL; + + action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); + if (!action) + return -ENOMEM; + + action->handler = handler; + action->flags = flags | IRQF_PERCPU | IRQF_NO_SUSPEND; + action->name = devname; + action->percpu_dev_id = dev_id; + + retval = irq_chip_pm_get(&desc->irq_data); + if (retval < 0) { + kfree(action); + return retval; + } + + retval = __setup_irq(irq, desc, action); + + if (retval) { + irq_chip_pm_put(&desc->irq_data); + kfree(action); + } + + return retval; +} +EXPORT_SYMBOL_GPL(__request_percpu_irq); + +/** + * request_percpu_nmi - allocate a percpu interrupt line for NMI delivery + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs. + * @name: An ascii name for the claiming device + * @dev_id: A percpu cookie passed back to the handler function + * + * This call allocates interrupt resources for a per CPU NMI. Per CPU NMIs + * have to be setup on each CPU by calling prepare_percpu_nmi() before + * being enabled on the same CPU by using enable_percpu_nmi(). + * + * Dev_id must be globally unique. It is a per-cpu variable, and + * the handler gets called with the interrupted CPU's instance of + * that variable. + * + * Interrupt lines requested for NMI delivering should have auto enabling + * setting disabled. + * + * If the interrupt line cannot be used to deliver NMIs, function + * will fail returning a negative value. + */ +int request_percpu_nmi(unsigned int irq, irq_handler_t handler, + const char *name, void __percpu *dev_id) +{ + struct irqaction *action; + struct irq_desc *desc; + unsigned long flags; + int retval; + + if (!handler) + return -EINVAL; + + desc = irq_to_desc(irq); + + if (!desc || !irq_settings_can_request(desc) || + !irq_settings_is_per_cpu_devid(desc) || + irq_settings_can_autoenable(desc) || + !irq_supports_nmi(desc)) + return -EINVAL; + + /* The line cannot already be NMI */ + if (desc->istate & IRQS_NMI) + return -EINVAL; + + action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); + if (!action) + return -ENOMEM; + + action->handler = handler; + action->flags = IRQF_PERCPU | IRQF_NO_SUSPEND | IRQF_NO_THREAD + | IRQF_NOBALANCING; + action->name = name; + action->percpu_dev_id = dev_id; + + retval = irq_chip_pm_get(&desc->irq_data); + if (retval < 0) + goto err_out; + + retval = __setup_irq(irq, desc, action); + if (retval) + goto err_irq_setup; + + raw_spin_lock_irqsave(&desc->lock, flags); + desc->istate |= IRQS_NMI; + raw_spin_unlock_irqrestore(&desc->lock, flags); + + return 0; + +err_irq_setup: + irq_chip_pm_put(&desc->irq_data); +err_out: + kfree(action); + + return retval; +} + +/** + * prepare_percpu_nmi - performs CPU local setup for NMI delivery + * @irq: Interrupt line to prepare for NMI delivery + * + * This call prepares an interrupt line to deliver NMI on the current CPU, + * before that interrupt line gets enabled with enable_percpu_nmi(). + * + * As a CPU local operation, this should be called from non-preemptible + * context. + * + * If the interrupt line cannot be used to deliver NMIs, function + * will fail returning a negative value. + */ +int prepare_percpu_nmi(unsigned int irq) +{ + unsigned long flags; + struct irq_desc *desc; + int ret = 0; + + WARN_ON(preemptible()); + + desc = irq_get_desc_lock(irq, &flags, + IRQ_GET_DESC_CHECK_PERCPU); + if (!desc) + return -EINVAL; + + if (WARN(!(desc->istate & IRQS_NMI), + KERN_ERR "prepare_percpu_nmi called for a non-NMI interrupt: irq %u\n", + irq)) { + ret = -EINVAL; + goto out; + } + + ret = irq_nmi_setup(desc); + if (ret) { + pr_err("Failed to setup NMI delivery: irq %u\n", irq); + goto out; + } + +out: + irq_put_desc_unlock(desc, flags); + return ret; +} + +/** + * teardown_percpu_nmi - undoes NMI setup of IRQ line + * @irq: Interrupt line from which CPU local NMI configuration should be + * removed + * + * This call undoes the setup done by prepare_percpu_nmi(). + * + * IRQ line should not be enabled for the current CPU. + * + * As a CPU local operation, this should be called from non-preemptible + * context. + */ +void teardown_percpu_nmi(unsigned int irq) +{ + unsigned long flags; + struct irq_desc *desc; + + WARN_ON(preemptible()); + + desc = irq_get_desc_lock(irq, &flags, + IRQ_GET_DESC_CHECK_PERCPU); + if (!desc) + return; + + if (WARN_ON(!(desc->istate & IRQS_NMI))) + goto out; + + irq_nmi_teardown(desc); +out: + irq_put_desc_unlock(desc, flags); +} + +int __irq_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which, + bool *state) +{ + struct irq_chip *chip; + int err = -EINVAL; + + do { + chip = irq_data_get_irq_chip(data); + if (WARN_ON_ONCE(!chip)) + return -ENODEV; + if (chip->irq_get_irqchip_state) + break; +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + data = data->parent_data; +#else + data = NULL; +#endif + } while (data); + + if (data) + err = chip->irq_get_irqchip_state(data, which, state); + return err; +} + +/** + * irq_get_irqchip_state - returns the irqchip state of a interrupt. + * @irq: Interrupt line that is forwarded to a VM + * @which: One of IRQCHIP_STATE_* the caller wants to know about + * @state: a pointer to a boolean where the state is to be stored + * + * This call snapshots the internal irqchip state of an + * interrupt, returning into @state the bit corresponding to + * stage @which + * + * This function should be called with preemption disabled if the + * interrupt controller has per-cpu registers. + */ +int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which, + bool *state) +{ + struct irq_desc *desc; + struct irq_data *data; + unsigned long flags; + int err = -EINVAL; + + desc = irq_get_desc_buslock(irq, &flags, 0); + if (!desc) + return err; + + data = irq_desc_get_irq_data(desc); + + err = __irq_get_irqchip_state(data, which, state); + + irq_put_desc_busunlock(desc, flags); + return err; +} +EXPORT_SYMBOL_GPL(irq_get_irqchip_state); + +/** + * irq_set_irqchip_state - set the state of a forwarded interrupt. + * @irq: Interrupt line that is forwarded to a VM + * @which: State to be restored (one of IRQCHIP_STATE_*) + * @val: Value corresponding to @which + * + * This call sets the internal irqchip state of an interrupt, + * depending on the value of @which. + * + * This function should be called with migration disabled if the + * interrupt controller has per-cpu registers. + */ +int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, + bool val) +{ + struct irq_desc *desc; + struct irq_data *data; + struct irq_chip *chip; + unsigned long flags; + int err = -EINVAL; + + desc = irq_get_desc_buslock(irq, &flags, 0); + if (!desc) + return err; + + data = irq_desc_get_irq_data(desc); + + do { + chip = irq_data_get_irq_chip(data); + if (WARN_ON_ONCE(!chip)) { + err = -ENODEV; + goto out_unlock; + } + if (chip->irq_set_irqchip_state) + break; +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + data = data->parent_data; +#else + data = NULL; +#endif + } while (data); + + if (data) + err = chip->irq_set_irqchip_state(data, which, val); + +out_unlock: + irq_put_desc_busunlock(desc, flags); + return err; +} +EXPORT_SYMBOL_GPL(irq_set_irqchip_state); + +/** + * irq_has_action - Check whether an interrupt is requested + * @irq: The linux irq number + * + * Returns: A snapshot of the current state + */ +bool irq_has_action(unsigned int irq) +{ + bool res; + + rcu_read_lock(); + res = irq_desc_has_action(irq_to_desc(irq)); + rcu_read_unlock(); + return res; +} +EXPORT_SYMBOL_GPL(irq_has_action); + +/** + * irq_check_status_bit - Check whether bits in the irq descriptor status are set + * @irq: The linux irq number + * @bitmask: The bitmask to evaluate + * + * Returns: True if one of the bits in @bitmask is set + */ +bool irq_check_status_bit(unsigned int irq, unsigned int bitmask) +{ + struct irq_desc *desc; + bool res = false; + + rcu_read_lock(); + desc = irq_to_desc(irq); + if (desc) + res = !!(desc->status_use_accessors & bitmask); + rcu_read_unlock(); + return res; +} +EXPORT_SYMBOL_GPL(irq_check_status_bit); diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c new file mode 100644 index 0000000000..75d0ae490e --- /dev/null +++ b/kernel/irq/matrix.c @@ -0,0 +1,513 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2017 Thomas Gleixner <tglx@linutronix.de> + +#include <linux/spinlock.h> +#include <linux/seq_file.h> +#include <linux/bitmap.h> +#include <linux/percpu.h> +#include <linux/cpu.h> +#include <linux/irq.h> + +#define IRQ_MATRIX_SIZE (BITS_TO_LONGS(IRQ_MATRIX_BITS)) + +struct cpumap { + unsigned int available; + unsigned int allocated; + unsigned int managed; + unsigned int managed_allocated; + bool initialized; + bool online; + unsigned long alloc_map[IRQ_MATRIX_SIZE]; + unsigned long managed_map[IRQ_MATRIX_SIZE]; +}; + +struct irq_matrix { + unsigned int matrix_bits; + unsigned int alloc_start; + unsigned int alloc_end; + unsigned int alloc_size; + unsigned int global_available; + unsigned int global_reserved; + unsigned int systembits_inalloc; + unsigned int total_allocated; + unsigned int online_maps; + struct cpumap __percpu *maps; + unsigned long scratch_map[IRQ_MATRIX_SIZE]; + unsigned long system_map[IRQ_MATRIX_SIZE]; +}; + +#define CREATE_TRACE_POINTS +#include <trace/events/irq_matrix.h> + +/** + * irq_alloc_matrix - Allocate a irq_matrix structure and initialize it + * @matrix_bits: Number of matrix bits must be <= IRQ_MATRIX_BITS + * @alloc_start: From which bit the allocation search starts + * @alloc_end: At which bit the allocation search ends, i.e first + * invalid bit + */ +__init struct irq_matrix *irq_alloc_matrix(unsigned int matrix_bits, + unsigned int alloc_start, + unsigned int alloc_end) +{ + struct irq_matrix *m; + + if (matrix_bits > IRQ_MATRIX_BITS) + return NULL; + + m = kzalloc(sizeof(*m), GFP_KERNEL); + if (!m) + return NULL; + + m->matrix_bits = matrix_bits; + m->alloc_start = alloc_start; + m->alloc_end = alloc_end; + m->alloc_size = alloc_end - alloc_start; + m->maps = alloc_percpu(*m->maps); + if (!m->maps) { + kfree(m); + return NULL; + } + return m; +} + +/** + * irq_matrix_online - Bring the local CPU matrix online + * @m: Matrix pointer + */ +void irq_matrix_online(struct irq_matrix *m) +{ + struct cpumap *cm = this_cpu_ptr(m->maps); + + BUG_ON(cm->online); + + if (!cm->initialized) { + cm->available = m->alloc_size; + cm->available -= cm->managed + m->systembits_inalloc; + cm->initialized = true; + } + m->global_available += cm->available; + cm->online = true; + m->online_maps++; + trace_irq_matrix_online(m); +} + +/** + * irq_matrix_offline - Bring the local CPU matrix offline + * @m: Matrix pointer + */ +void irq_matrix_offline(struct irq_matrix *m) +{ + struct cpumap *cm = this_cpu_ptr(m->maps); + + /* Update the global available size */ + m->global_available -= cm->available; + cm->online = false; + m->online_maps--; + trace_irq_matrix_offline(m); +} + +static unsigned int matrix_alloc_area(struct irq_matrix *m, struct cpumap *cm, + unsigned int num, bool managed) +{ + unsigned int area, start = m->alloc_start; + unsigned int end = m->alloc_end; + + bitmap_or(m->scratch_map, cm->managed_map, m->system_map, end); + bitmap_or(m->scratch_map, m->scratch_map, cm->alloc_map, end); + area = bitmap_find_next_zero_area(m->scratch_map, end, start, num, 0); + if (area >= end) + return area; + if (managed) + bitmap_set(cm->managed_map, area, num); + else + bitmap_set(cm->alloc_map, area, num); + return area; +} + +/* Find the best CPU which has the lowest vector allocation count */ +static unsigned int matrix_find_best_cpu(struct irq_matrix *m, + const struct cpumask *msk) +{ + unsigned int cpu, best_cpu, maxavl = 0; + struct cpumap *cm; + + best_cpu = UINT_MAX; + + for_each_cpu(cpu, msk) { + cm = per_cpu_ptr(m->maps, cpu); + + if (!cm->online || cm->available <= maxavl) + continue; + + best_cpu = cpu; + maxavl = cm->available; + } + return best_cpu; +} + +/* Find the best CPU which has the lowest number of managed IRQs allocated */ +static unsigned int matrix_find_best_cpu_managed(struct irq_matrix *m, + const struct cpumask *msk) +{ + unsigned int cpu, best_cpu, allocated = UINT_MAX; + struct cpumap *cm; + + best_cpu = UINT_MAX; + + for_each_cpu(cpu, msk) { + cm = per_cpu_ptr(m->maps, cpu); + + if (!cm->online || cm->managed_allocated > allocated) + continue; + + best_cpu = cpu; + allocated = cm->managed_allocated; + } + return best_cpu; +} + +/** + * irq_matrix_assign_system - Assign system wide entry in the matrix + * @m: Matrix pointer + * @bit: Which bit to reserve + * @replace: Replace an already allocated vector with a system + * vector at the same bit position. + * + * The BUG_ON()s below are on purpose. If this goes wrong in the + * early boot process, then the chance to survive is about zero. + * If this happens when the system is life, it's not much better. + */ +void irq_matrix_assign_system(struct irq_matrix *m, unsigned int bit, + bool replace) +{ + struct cpumap *cm = this_cpu_ptr(m->maps); + + BUG_ON(bit > m->matrix_bits); + BUG_ON(m->online_maps > 1 || (m->online_maps && !replace)); + + set_bit(bit, m->system_map); + if (replace) { + BUG_ON(!test_and_clear_bit(bit, cm->alloc_map)); + cm->allocated--; + m->total_allocated--; + } + if (bit >= m->alloc_start && bit < m->alloc_end) + m->systembits_inalloc++; + + trace_irq_matrix_assign_system(bit, m); +} + +/** + * irq_matrix_reserve_managed - Reserve a managed interrupt in a CPU map + * @m: Matrix pointer + * @msk: On which CPUs the bits should be reserved. + * + * Can be called for offline CPUs. Note, this will only reserve one bit + * on all CPUs in @msk, but it's not guaranteed that the bits are at the + * same offset on all CPUs + */ +int irq_matrix_reserve_managed(struct irq_matrix *m, const struct cpumask *msk) +{ + unsigned int cpu, failed_cpu; + + for_each_cpu(cpu, msk) { + struct cpumap *cm = per_cpu_ptr(m->maps, cpu); + unsigned int bit; + + bit = matrix_alloc_area(m, cm, 1, true); + if (bit >= m->alloc_end) + goto cleanup; + cm->managed++; + if (cm->online) { + cm->available--; + m->global_available--; + } + trace_irq_matrix_reserve_managed(bit, cpu, m, cm); + } + return 0; +cleanup: + failed_cpu = cpu; + for_each_cpu(cpu, msk) { + if (cpu == failed_cpu) + break; + irq_matrix_remove_managed(m, cpumask_of(cpu)); + } + return -ENOSPC; +} + +/** + * irq_matrix_remove_managed - Remove managed interrupts in a CPU map + * @m: Matrix pointer + * @msk: On which CPUs the bits should be removed + * + * Can be called for offline CPUs + * + * This removes not allocated managed interrupts from the map. It does + * not matter which one because the managed interrupts free their + * allocation when they shut down. If not, the accounting is screwed, + * but all what can be done at this point is warn about it. + */ +void irq_matrix_remove_managed(struct irq_matrix *m, const struct cpumask *msk) +{ + unsigned int cpu; + + for_each_cpu(cpu, msk) { + struct cpumap *cm = per_cpu_ptr(m->maps, cpu); + unsigned int bit, end = m->alloc_end; + + if (WARN_ON_ONCE(!cm->managed)) + continue; + + /* Get managed bit which are not allocated */ + bitmap_andnot(m->scratch_map, cm->managed_map, cm->alloc_map, end); + + bit = find_first_bit(m->scratch_map, end); + if (WARN_ON_ONCE(bit >= end)) + continue; + + clear_bit(bit, cm->managed_map); + + cm->managed--; + if (cm->online) { + cm->available++; + m->global_available++; + } + trace_irq_matrix_remove_managed(bit, cpu, m, cm); + } +} + +/** + * irq_matrix_alloc_managed - Allocate a managed interrupt in a CPU map + * @m: Matrix pointer + * @msk: Which CPUs to search in + * @mapped_cpu: Pointer to store the CPU for which the irq was allocated + */ +int irq_matrix_alloc_managed(struct irq_matrix *m, const struct cpumask *msk, + unsigned int *mapped_cpu) +{ + unsigned int bit, cpu, end; + struct cpumap *cm; + + if (cpumask_empty(msk)) + return -EINVAL; + + cpu = matrix_find_best_cpu_managed(m, msk); + if (cpu == UINT_MAX) + return -ENOSPC; + + cm = per_cpu_ptr(m->maps, cpu); + end = m->alloc_end; + /* Get managed bit which are not allocated */ + bitmap_andnot(m->scratch_map, cm->managed_map, cm->alloc_map, end); + bit = find_first_bit(m->scratch_map, end); + if (bit >= end) + return -ENOSPC; + set_bit(bit, cm->alloc_map); + cm->allocated++; + cm->managed_allocated++; + m->total_allocated++; + *mapped_cpu = cpu; + trace_irq_matrix_alloc_managed(bit, cpu, m, cm); + return bit; +} + +/** + * irq_matrix_assign - Assign a preallocated interrupt in the local CPU map + * @m: Matrix pointer + * @bit: Which bit to mark + * + * This should only be used to mark preallocated vectors + */ +void irq_matrix_assign(struct irq_matrix *m, unsigned int bit) +{ + struct cpumap *cm = this_cpu_ptr(m->maps); + + if (WARN_ON_ONCE(bit < m->alloc_start || bit >= m->alloc_end)) + return; + if (WARN_ON_ONCE(test_and_set_bit(bit, cm->alloc_map))) + return; + cm->allocated++; + m->total_allocated++; + cm->available--; + m->global_available--; + trace_irq_matrix_assign(bit, smp_processor_id(), m, cm); +} + +/** + * irq_matrix_reserve - Reserve interrupts + * @m: Matrix pointer + * + * This is merely a book keeping call. It increments the number of globally + * reserved interrupt bits w/o actually allocating them. This allows to + * setup interrupt descriptors w/o assigning low level resources to it. + * The actual allocation happens when the interrupt gets activated. + */ +void irq_matrix_reserve(struct irq_matrix *m) +{ + if (m->global_reserved == m->global_available) + pr_warn("Interrupt reservation exceeds available resources\n"); + + m->global_reserved++; + trace_irq_matrix_reserve(m); +} + +/** + * irq_matrix_remove_reserved - Remove interrupt reservation + * @m: Matrix pointer + * + * This is merely a book keeping call. It decrements the number of globally + * reserved interrupt bits. This is used to undo irq_matrix_reserve() when the + * interrupt was never in use and a real vector allocated, which undid the + * reservation. + */ +void irq_matrix_remove_reserved(struct irq_matrix *m) +{ + m->global_reserved--; + trace_irq_matrix_remove_reserved(m); +} + +/** + * irq_matrix_alloc - Allocate a regular interrupt in a CPU map + * @m: Matrix pointer + * @msk: Which CPUs to search in + * @reserved: Allocate previously reserved interrupts + * @mapped_cpu: Pointer to store the CPU for which the irq was allocated + */ +int irq_matrix_alloc(struct irq_matrix *m, const struct cpumask *msk, + bool reserved, unsigned int *mapped_cpu) +{ + unsigned int cpu, bit; + struct cpumap *cm; + + /* + * Not required in theory, but matrix_find_best_cpu() uses + * for_each_cpu() which ignores the cpumask on UP . + */ + if (cpumask_empty(msk)) + return -EINVAL; + + cpu = matrix_find_best_cpu(m, msk); + if (cpu == UINT_MAX) + return -ENOSPC; + + cm = per_cpu_ptr(m->maps, cpu); + bit = matrix_alloc_area(m, cm, 1, false); + if (bit >= m->alloc_end) + return -ENOSPC; + cm->allocated++; + cm->available--; + m->total_allocated++; + m->global_available--; + if (reserved) + m->global_reserved--; + *mapped_cpu = cpu; + trace_irq_matrix_alloc(bit, cpu, m, cm); + return bit; + +} + +/** + * irq_matrix_free - Free allocated interrupt in the matrix + * @m: Matrix pointer + * @cpu: Which CPU map needs be updated + * @bit: The bit to remove + * @managed: If true, the interrupt is managed and not accounted + * as available. + */ +void irq_matrix_free(struct irq_matrix *m, unsigned int cpu, + unsigned int bit, bool managed) +{ + struct cpumap *cm = per_cpu_ptr(m->maps, cpu); + + if (WARN_ON_ONCE(bit < m->alloc_start || bit >= m->alloc_end)) + return; + + if (WARN_ON_ONCE(!test_and_clear_bit(bit, cm->alloc_map))) + return; + + cm->allocated--; + if(managed) + cm->managed_allocated--; + + if (cm->online) + m->total_allocated--; + + if (!managed) { + cm->available++; + if (cm->online) + m->global_available++; + } + trace_irq_matrix_free(bit, cpu, m, cm); +} + +/** + * irq_matrix_available - Get the number of globally available irqs + * @m: Pointer to the matrix to query + * @cpudown: If true, the local CPU is about to go down, adjust + * the number of available irqs accordingly + */ +unsigned int irq_matrix_available(struct irq_matrix *m, bool cpudown) +{ + struct cpumap *cm = this_cpu_ptr(m->maps); + + if (!cpudown) + return m->global_available; + return m->global_available - cm->available; +} + +/** + * irq_matrix_reserved - Get the number of globally reserved irqs + * @m: Pointer to the matrix to query + */ +unsigned int irq_matrix_reserved(struct irq_matrix *m) +{ + return m->global_reserved; +} + +/** + * irq_matrix_allocated - Get the number of allocated non-managed irqs on the local CPU + * @m: Pointer to the matrix to search + * + * This returns number of allocated non-managed interrupts. + */ +unsigned int irq_matrix_allocated(struct irq_matrix *m) +{ + struct cpumap *cm = this_cpu_ptr(m->maps); + + return cm->allocated - cm->managed_allocated; +} + +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS +/** + * irq_matrix_debug_show - Show detailed allocation information + * @sf: Pointer to the seq_file to print to + * @m: Pointer to the matrix allocator + * @ind: Indentation for the print format + * + * Note, this is a lockless snapshot. + */ +void irq_matrix_debug_show(struct seq_file *sf, struct irq_matrix *m, int ind) +{ + unsigned int nsys = bitmap_weight(m->system_map, m->matrix_bits); + int cpu; + + seq_printf(sf, "Online bitmaps: %6u\n", m->online_maps); + seq_printf(sf, "Global available: %6u\n", m->global_available); + seq_printf(sf, "Global reserved: %6u\n", m->global_reserved); + seq_printf(sf, "Total allocated: %6u\n", m->total_allocated); + seq_printf(sf, "System: %u: %*pbl\n", nsys, m->matrix_bits, + m->system_map); + seq_printf(sf, "%*s| CPU | avl | man | mac | act | vectors\n", ind, " "); + cpus_read_lock(); + for_each_online_cpu(cpu) { + struct cpumap *cm = per_cpu_ptr(m->maps, cpu); + + seq_printf(sf, "%*s %4d %4u %4u %4u %4u %*pbl\n", ind, " ", + cpu, cm->available, cm->managed, + cm->managed_allocated, cm->allocated, + m->matrix_bits, cm->alloc_map); + } + cpus_read_unlock(); +} +#endif diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c new file mode 100644 index 0000000000..61ca924ef4 --- /dev/null +++ b/kernel/irq/migration.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/irq.h> +#include <linux/interrupt.h> + +#include "internals.h" + +/** + * irq_fixup_move_pending - Cleanup irq move pending from a dying CPU + * @desc: Interrupt descriptor to clean up + * @force_clear: If set clear the move pending bit unconditionally. + * If not set, clear it only when the dying CPU is the + * last one in the pending mask. + * + * Returns true if the pending bit was set and the pending mask contains an + * online CPU other than the dying CPU. + */ +bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear) +{ + struct irq_data *data = irq_desc_get_irq_data(desc); + + if (!irqd_is_setaffinity_pending(data)) + return false; + + /* + * The outgoing CPU might be the last online target in a pending + * interrupt move. If that's the case clear the pending move bit. + */ + if (cpumask_any_and(desc->pending_mask, cpu_online_mask) >= nr_cpu_ids) { + irqd_clr_move_pending(data); + return false; + } + if (force_clear) + irqd_clr_move_pending(data); + return true; +} + +void irq_move_masked_irq(struct irq_data *idata) +{ + struct irq_desc *desc = irq_data_to_desc(idata); + struct irq_data *data = &desc->irq_data; + struct irq_chip *chip = data->chip; + + if (likely(!irqd_is_setaffinity_pending(data))) + return; + + irqd_clr_move_pending(data); + + /* + * Paranoia: cpu-local interrupts shouldn't be calling in here anyway. + */ + if (irqd_is_per_cpu(data)) { + WARN_ON(1); + return; + } + + if (unlikely(cpumask_empty(desc->pending_mask))) + return; + + if (!chip->irq_set_affinity) + return; + + assert_raw_spin_locked(&desc->lock); + + /* + * If there was a valid mask to work with, please + * do the disable, re-program, enable sequence. + * This is *not* particularly important for level triggered + * but in a edge trigger case, we might be setting rte + * when an active trigger is coming in. This could + * cause some ioapics to mal-function. + * Being paranoid i guess! + * + * For correct operation this depends on the caller + * masking the irqs. + */ + if (cpumask_any_and(desc->pending_mask, cpu_online_mask) < nr_cpu_ids) { + int ret; + + ret = irq_do_set_affinity(data, desc->pending_mask, false); + /* + * If the there is a cleanup pending in the underlying + * vector management, reschedule the move for the next + * interrupt. Leave desc->pending_mask intact. + */ + if (ret == -EBUSY) { + irqd_set_move_pending(data); + return; + } + } + cpumask_clear(desc->pending_mask); +} + +void __irq_move_irq(struct irq_data *idata) +{ + bool masked; + + /* + * Get top level irq_data when CONFIG_IRQ_DOMAIN_HIERARCHY is enabled, + * and it should be optimized away when CONFIG_IRQ_DOMAIN_HIERARCHY is + * disabled. So we avoid an "#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY" here. + */ + idata = irq_desc_get_irq_data(irq_data_to_desc(idata)); + + if (unlikely(irqd_irq_disabled(idata))) + return; + + /* + * Be careful vs. already masked interrupts. If this is a + * threaded interrupt with ONESHOT set, we can end up with an + * interrupt storm. + */ + masked = irqd_irq_masked(idata); + if (!masked) + idata->chip->irq_mask(idata); + irq_move_masked_irq(idata); + if (!masked) + idata->chip->irq_unmask(idata); +} diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c new file mode 100644 index 0000000000..79b4a58ba9 --- /dev/null +++ b/kernel/irq/msi.c @@ -0,0 +1,1667 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2014 Intel Corp. + * Author: Jiang Liu <jiang.liu@linux.intel.com> + * + * This file is licensed under GPLv2. + * + * This file contains common code to support Message Signaled Interrupts for + * PCI compatible and non PCI compatible devices. + */ +#include <linux/types.h> +#include <linux/device.h> +#include <linux/irq.h> +#include <linux/irqdomain.h> +#include <linux/msi.h> +#include <linux/slab.h> +#include <linux/sysfs.h> +#include <linux/pci.h> + +#include "internals.h" + +/** + * struct msi_ctrl - MSI internal management control structure + * @domid: ID of the domain on which management operations should be done + * @first: First (hardware) slot index to operate on + * @last: Last (hardware) slot index to operate on + * @nirqs: The number of Linux interrupts to allocate. Can be larger + * than the range due to PCI/multi-MSI. + */ +struct msi_ctrl { + unsigned int domid; + unsigned int first; + unsigned int last; + unsigned int nirqs; +}; + +/* Invalid Xarray index which is outside of any searchable range */ +#define MSI_XA_MAX_INDEX (ULONG_MAX - 1) +/* The maximum domain size */ +#define MSI_XA_DOMAIN_SIZE (MSI_MAX_INDEX + 1) + +static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl); +static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid); +static inline int msi_sysfs_create_group(struct device *dev); + + +/** + * msi_alloc_desc - Allocate an initialized msi_desc + * @dev: Pointer to the device for which this is allocated + * @nvec: The number of vectors used in this entry + * @affinity: Optional pointer to an affinity mask array size of @nvec + * + * If @affinity is not %NULL then an affinity array[@nvec] is allocated + * and the affinity masks and flags from @affinity are copied. + * + * Return: pointer to allocated &msi_desc on success or %NULL on failure + */ +static struct msi_desc *msi_alloc_desc(struct device *dev, int nvec, + const struct irq_affinity_desc *affinity) +{ + struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL); + + if (!desc) + return NULL; + + desc->dev = dev; + desc->nvec_used = nvec; + if (affinity) { + desc->affinity = kmemdup(affinity, nvec * sizeof(*desc->affinity), GFP_KERNEL); + if (!desc->affinity) { + kfree(desc); + return NULL; + } + } + return desc; +} + +static void msi_free_desc(struct msi_desc *desc) +{ + kfree(desc->affinity); + kfree(desc); +} + +static int msi_insert_desc(struct device *dev, struct msi_desc *desc, + unsigned int domid, unsigned int index) +{ + struct msi_device_data *md = dev->msi.data; + struct xarray *xa = &md->__domains[domid].store; + unsigned int hwsize; + int ret; + + hwsize = msi_domain_get_hwsize(dev, domid); + + if (index == MSI_ANY_INDEX) { + struct xa_limit limit = { .min = 0, .max = hwsize - 1 }; + unsigned int index; + + /* Let the xarray allocate a free index within the limit */ + ret = xa_alloc(xa, &index, desc, limit, GFP_KERNEL); + if (ret) + goto fail; + + desc->msi_index = index; + return 0; + } else { + if (index >= hwsize) { + ret = -ERANGE; + goto fail; + } + + desc->msi_index = index; + ret = xa_insert(xa, index, desc, GFP_KERNEL); + if (ret) + goto fail; + return 0; + } +fail: + msi_free_desc(desc); + return ret; +} + +/** + * msi_domain_insert_msi_desc - Allocate and initialize a MSI descriptor and + * insert it at @init_desc->msi_index + * + * @dev: Pointer to the device for which the descriptor is allocated + * @domid: The id of the interrupt domain to which the desriptor is added + * @init_desc: Pointer to an MSI descriptor to initialize the new descriptor + * + * Return: 0 on success or an appropriate failure code. + */ +int msi_domain_insert_msi_desc(struct device *dev, unsigned int domid, + struct msi_desc *init_desc) +{ + struct msi_desc *desc; + + lockdep_assert_held(&dev->msi.data->mutex); + + desc = msi_alloc_desc(dev, init_desc->nvec_used, init_desc->affinity); + if (!desc) + return -ENOMEM; + + /* Copy type specific data to the new descriptor. */ + desc->pci = init_desc->pci; + + return msi_insert_desc(dev, desc, domid, init_desc->msi_index); +} + +static bool msi_desc_match(struct msi_desc *desc, enum msi_desc_filter filter) +{ + switch (filter) { + case MSI_DESC_ALL: + return true; + case MSI_DESC_NOTASSOCIATED: + return !desc->irq; + case MSI_DESC_ASSOCIATED: + return !!desc->irq; + } + WARN_ON_ONCE(1); + return false; +} + +static bool msi_ctrl_valid(struct device *dev, struct msi_ctrl *ctrl) +{ + unsigned int hwsize; + + if (WARN_ON_ONCE(ctrl->domid >= MSI_MAX_DEVICE_IRQDOMAINS || + (dev->msi.domain && + !dev->msi.data->__domains[ctrl->domid].domain))) + return false; + + hwsize = msi_domain_get_hwsize(dev, ctrl->domid); + if (WARN_ON_ONCE(ctrl->first > ctrl->last || + ctrl->first >= hwsize || + ctrl->last >= hwsize)) + return false; + return true; +} + +static void msi_domain_free_descs(struct device *dev, struct msi_ctrl *ctrl) +{ + struct msi_desc *desc; + struct xarray *xa; + unsigned long idx; + + lockdep_assert_held(&dev->msi.data->mutex); + + if (!msi_ctrl_valid(dev, ctrl)) + return; + + xa = &dev->msi.data->__domains[ctrl->domid].store; + xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) { + xa_erase(xa, idx); + + /* Leak the descriptor when it is still referenced */ + if (WARN_ON_ONCE(msi_desc_match(desc, MSI_DESC_ASSOCIATED))) + continue; + msi_free_desc(desc); + } +} + +/** + * msi_domain_free_msi_descs_range - Free a range of MSI descriptors of a device in an irqdomain + * @dev: Device for which to free the descriptors + * @domid: Id of the domain to operate on + * @first: Index to start freeing from (inclusive) + * @last: Last index to be freed (inclusive) + */ +void msi_domain_free_msi_descs_range(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last) +{ + struct msi_ctrl ctrl = { + .domid = domid, + .first = first, + .last = last, + }; + + msi_domain_free_descs(dev, &ctrl); +} + +/** + * msi_domain_add_simple_msi_descs - Allocate and initialize MSI descriptors + * @dev: Pointer to the device for which the descriptors are allocated + * @ctrl: Allocation control struct + * + * Return: 0 on success or an appropriate failure code. + */ +static int msi_domain_add_simple_msi_descs(struct device *dev, struct msi_ctrl *ctrl) +{ + struct msi_desc *desc; + unsigned int idx; + int ret; + + lockdep_assert_held(&dev->msi.data->mutex); + + if (!msi_ctrl_valid(dev, ctrl)) + return -EINVAL; + + for (idx = ctrl->first; idx <= ctrl->last; idx++) { + desc = msi_alloc_desc(dev, 1, NULL); + if (!desc) + goto fail_mem; + ret = msi_insert_desc(dev, desc, ctrl->domid, idx); + if (ret) + goto fail; + } + return 0; + +fail_mem: + ret = -ENOMEM; +fail: + msi_domain_free_descs(dev, ctrl); + return ret; +} + +void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg) +{ + *msg = entry->msg; +} + +void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg) +{ + struct msi_desc *entry = irq_get_msi_desc(irq); + + __get_cached_msi_msg(entry, msg); +} +EXPORT_SYMBOL_GPL(get_cached_msi_msg); + +static void msi_device_data_release(struct device *dev, void *res) +{ + struct msi_device_data *md = res; + int i; + + for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++) { + msi_remove_device_irq_domain(dev, i); + WARN_ON_ONCE(!xa_empty(&md->__domains[i].store)); + xa_destroy(&md->__domains[i].store); + } + dev->msi.data = NULL; +} + +/** + * msi_setup_device_data - Setup MSI device data + * @dev: Device for which MSI device data should be set up + * + * Return: 0 on success, appropriate error code otherwise + * + * This can be called more than once for @dev. If the MSI device data is + * already allocated the call succeeds. The allocated memory is + * automatically released when the device is destroyed. + */ +int msi_setup_device_data(struct device *dev) +{ + struct msi_device_data *md; + int ret, i; + + if (dev->msi.data) + return 0; + + md = devres_alloc(msi_device_data_release, sizeof(*md), GFP_KERNEL); + if (!md) + return -ENOMEM; + + ret = msi_sysfs_create_group(dev); + if (ret) { + devres_free(md); + return ret; + } + + for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++) + xa_init_flags(&md->__domains[i].store, XA_FLAGS_ALLOC); + + /* + * If @dev::msi::domain is set and is a global MSI domain, copy the + * pointer into the domain array so all code can operate on domain + * ids. The NULL pointer check is required to keep the legacy + * architecture specific PCI/MSI support working. + */ + if (dev->msi.domain && !irq_domain_is_msi_parent(dev->msi.domain)) + md->__domains[MSI_DEFAULT_DOMAIN].domain = dev->msi.domain; + + mutex_init(&md->mutex); + dev->msi.data = md; + devres_add(dev, md); + return 0; +} + +/** + * msi_lock_descs - Lock the MSI descriptor storage of a device + * @dev: Device to operate on + */ +void msi_lock_descs(struct device *dev) +{ + mutex_lock(&dev->msi.data->mutex); +} +EXPORT_SYMBOL_GPL(msi_lock_descs); + +/** + * msi_unlock_descs - Unlock the MSI descriptor storage of a device + * @dev: Device to operate on + */ +void msi_unlock_descs(struct device *dev) +{ + /* Invalidate the index which was cached by the iterator */ + dev->msi.data->__iter_idx = MSI_XA_MAX_INDEX; + mutex_unlock(&dev->msi.data->mutex); +} +EXPORT_SYMBOL_GPL(msi_unlock_descs); + +static struct msi_desc *msi_find_desc(struct msi_device_data *md, unsigned int domid, + enum msi_desc_filter filter) +{ + struct xarray *xa = &md->__domains[domid].store; + struct msi_desc *desc; + + xa_for_each_start(xa, md->__iter_idx, desc, md->__iter_idx) { + if (msi_desc_match(desc, filter)) + return desc; + } + md->__iter_idx = MSI_XA_MAX_INDEX; + return NULL; +} + +/** + * msi_domain_first_desc - Get the first MSI descriptor of an irqdomain associated to a device + * @dev: Device to operate on + * @domid: The id of the interrupt domain which should be walked. + * @filter: Descriptor state filter + * + * Must be called with the MSI descriptor mutex held, i.e. msi_lock_descs() + * must be invoked before the call. + * + * Return: Pointer to the first MSI descriptor matching the search + * criteria, NULL if none found. + */ +struct msi_desc *msi_domain_first_desc(struct device *dev, unsigned int domid, + enum msi_desc_filter filter) +{ + struct msi_device_data *md = dev->msi.data; + + if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS)) + return NULL; + + lockdep_assert_held(&md->mutex); + + md->__iter_idx = 0; + return msi_find_desc(md, domid, filter); +} +EXPORT_SYMBOL_GPL(msi_domain_first_desc); + +/** + * msi_next_desc - Get the next MSI descriptor of a device + * @dev: Device to operate on + * @domid: The id of the interrupt domain which should be walked. + * @filter: Descriptor state filter + * + * The first invocation of msi_next_desc() has to be preceeded by a + * successful invocation of __msi_first_desc(). Consecutive invocations are + * only valid if the previous one was successful. All these operations have + * to be done within the same MSI mutex held region. + * + * Return: Pointer to the next MSI descriptor matching the search + * criteria, NULL if none found. + */ +struct msi_desc *msi_next_desc(struct device *dev, unsigned int domid, + enum msi_desc_filter filter) +{ + struct msi_device_data *md = dev->msi.data; + + if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS)) + return NULL; + + lockdep_assert_held(&md->mutex); + + if (md->__iter_idx >= (unsigned long)MSI_MAX_INDEX) + return NULL; + + md->__iter_idx++; + return msi_find_desc(md, domid, filter); +} +EXPORT_SYMBOL_GPL(msi_next_desc); + +/** + * msi_domain_get_virq - Lookup the Linux interrupt number for a MSI index on a interrupt domain + * @dev: Device to operate on + * @domid: Domain ID of the interrupt domain associated to the device + * @index: MSI interrupt index to look for (0-based) + * + * Return: The Linux interrupt number on success (> 0), 0 if not found + */ +unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigned int index) +{ + struct msi_desc *desc; + unsigned int ret = 0; + bool pcimsi = false; + struct xarray *xa; + + if (!dev->msi.data) + return 0; + + if (WARN_ON_ONCE(index > MSI_MAX_INDEX || domid >= MSI_MAX_DEVICE_IRQDOMAINS)) + return 0; + + /* This check is only valid for the PCI default MSI domain */ + if (dev_is_pci(dev) && domid == MSI_DEFAULT_DOMAIN) + pcimsi = to_pci_dev(dev)->msi_enabled; + + msi_lock_descs(dev); + xa = &dev->msi.data->__domains[domid].store; + desc = xa_load(xa, pcimsi ? 0 : index); + if (desc && desc->irq) { + /* + * PCI-MSI has only one descriptor for multiple interrupts. + * PCI-MSIX and platform MSI use a descriptor per + * interrupt. + */ + if (pcimsi) { + if (index < desc->nvec_used) + ret = desc->irq + index; + } else { + ret = desc->irq; + } + } + + msi_unlock_descs(dev); + return ret; +} +EXPORT_SYMBOL_GPL(msi_domain_get_virq); + +#ifdef CONFIG_SYSFS +static struct attribute *msi_dev_attrs[] = { + NULL +}; + +static const struct attribute_group msi_irqs_group = { + .name = "msi_irqs", + .attrs = msi_dev_attrs, +}; + +static inline int msi_sysfs_create_group(struct device *dev) +{ + return devm_device_add_group(dev, &msi_irqs_group); +} + +static ssize_t msi_mode_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + /* MSI vs. MSIX is per device not per interrupt */ + bool is_msix = dev_is_pci(dev) ? to_pci_dev(dev)->msix_enabled : false; + + return sysfs_emit(buf, "%s\n", is_msix ? "msix" : "msi"); +} + +static void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc) +{ + struct device_attribute *attrs = desc->sysfs_attrs; + int i; + + if (!attrs) + return; + + desc->sysfs_attrs = NULL; + for (i = 0; i < desc->nvec_used; i++) { + if (attrs[i].show) + sysfs_remove_file_from_group(&dev->kobj, &attrs[i].attr, msi_irqs_group.name); + kfree(attrs[i].attr.name); + } + kfree(attrs); +} + +static int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc) +{ + struct device_attribute *attrs; + int ret, i; + + attrs = kcalloc(desc->nvec_used, sizeof(*attrs), GFP_KERNEL); + if (!attrs) + return -ENOMEM; + + desc->sysfs_attrs = attrs; + for (i = 0; i < desc->nvec_used; i++) { + sysfs_attr_init(&attrs[i].attr); + attrs[i].attr.name = kasprintf(GFP_KERNEL, "%d", desc->irq + i); + if (!attrs[i].attr.name) { + ret = -ENOMEM; + goto fail; + } + + attrs[i].attr.mode = 0444; + attrs[i].show = msi_mode_show; + + ret = sysfs_add_file_to_group(&dev->kobj, &attrs[i].attr, msi_irqs_group.name); + if (ret) { + attrs[i].show = NULL; + goto fail; + } + } + return 0; + +fail: + msi_sysfs_remove_desc(dev, desc); + return ret; +} + +#if defined(CONFIG_PCI_MSI_ARCH_FALLBACKS) || defined(CONFIG_PCI_XEN) +/** + * msi_device_populate_sysfs - Populate msi_irqs sysfs entries for a device + * @dev: The device (PCI, platform etc) which will get sysfs entries + */ +int msi_device_populate_sysfs(struct device *dev) +{ + struct msi_desc *desc; + int ret; + + msi_for_each_desc(desc, dev, MSI_DESC_ASSOCIATED) { + if (desc->sysfs_attrs) + continue; + ret = msi_sysfs_populate_desc(dev, desc); + if (ret) + return ret; + } + return 0; +} + +/** + * msi_device_destroy_sysfs - Destroy msi_irqs sysfs entries for a device + * @dev: The device (PCI, platform etc) for which to remove + * sysfs entries + */ +void msi_device_destroy_sysfs(struct device *dev) +{ + struct msi_desc *desc; + + msi_for_each_desc(desc, dev, MSI_DESC_ALL) + msi_sysfs_remove_desc(dev, desc); +} +#endif /* CONFIG_PCI_MSI_ARCH_FALLBACK || CONFIG_PCI_XEN */ +#else /* CONFIG_SYSFS */ +static inline int msi_sysfs_create_group(struct device *dev) { return 0; } +static inline int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc) { return 0; } +static inline void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc) { } +#endif /* !CONFIG_SYSFS */ + +static struct irq_domain *msi_get_device_domain(struct device *dev, unsigned int domid) +{ + struct irq_domain *domain; + + lockdep_assert_held(&dev->msi.data->mutex); + + if (WARN_ON_ONCE(domid >= MSI_MAX_DEVICE_IRQDOMAINS)) + return NULL; + + domain = dev->msi.data->__domains[domid].domain; + if (!domain) + return NULL; + + if (WARN_ON_ONCE(irq_domain_is_msi_parent(domain))) + return NULL; + + return domain; +} + +static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid) +{ + struct msi_domain_info *info; + struct irq_domain *domain; + + domain = msi_get_device_domain(dev, domid); + if (domain) { + info = domain->host_data; + return info->hwsize; + } + /* No domain, default to MSI_XA_DOMAIN_SIZE */ + return MSI_XA_DOMAIN_SIZE; +} + +static inline void irq_chip_write_msi_msg(struct irq_data *data, + struct msi_msg *msg) +{ + data->chip->irq_write_msi_msg(data, msg); +} + +static void msi_check_level(struct irq_domain *domain, struct msi_msg *msg) +{ + struct msi_domain_info *info = domain->host_data; + + /* + * If the MSI provider has messed with the second message and + * not advertized that it is level-capable, signal the breakage. + */ + WARN_ON(!((info->flags & MSI_FLAG_LEVEL_CAPABLE) && + (info->chip->flags & IRQCHIP_SUPPORTS_LEVEL_MSI)) && + (msg[1].address_lo || msg[1].address_hi || msg[1].data)); +} + +/** + * msi_domain_set_affinity - Generic affinity setter function for MSI domains + * @irq_data: The irq data associated to the interrupt + * @mask: The affinity mask to set + * @force: Flag to enforce setting (disable online checks) + * + * Intended to be used by MSI interrupt controllers which are + * implemented with hierarchical domains. + * + * Return: IRQ_SET_MASK_* result code + */ +int msi_domain_set_affinity(struct irq_data *irq_data, + const struct cpumask *mask, bool force) +{ + struct irq_data *parent = irq_data->parent_data; + struct msi_msg msg[2] = { [1] = { }, }; + int ret; + + ret = parent->chip->irq_set_affinity(parent, mask, force); + if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) { + BUG_ON(irq_chip_compose_msi_msg(irq_data, msg)); + msi_check_level(irq_data->domain, msg); + irq_chip_write_msi_msg(irq_data, msg); + } + + return ret; +} + +static int msi_domain_activate(struct irq_domain *domain, + struct irq_data *irq_data, bool early) +{ + struct msi_msg msg[2] = { [1] = { }, }; + + BUG_ON(irq_chip_compose_msi_msg(irq_data, msg)); + msi_check_level(irq_data->domain, msg); + irq_chip_write_msi_msg(irq_data, msg); + return 0; +} + +static void msi_domain_deactivate(struct irq_domain *domain, + struct irq_data *irq_data) +{ + struct msi_msg msg[2]; + + memset(msg, 0, sizeof(msg)); + irq_chip_write_msi_msg(irq_data, msg); +} + +static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + struct msi_domain_info *info = domain->host_data; + struct msi_domain_ops *ops = info->ops; + irq_hw_number_t hwirq = ops->get_hwirq(info, arg); + int i, ret; + + if (irq_find_mapping(domain, hwirq) > 0) + return -EEXIST; + + if (domain->parent) { + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); + if (ret < 0) + return ret; + } + + for (i = 0; i < nr_irqs; i++) { + ret = ops->msi_init(domain, info, virq + i, hwirq + i, arg); + if (ret < 0) { + if (ops->msi_free) { + for (i--; i > 0; i--) + ops->msi_free(domain, info, virq + i); + } + irq_domain_free_irqs_top(domain, virq, nr_irqs); + return ret; + } + } + + return 0; +} + +static void msi_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + struct msi_domain_info *info = domain->host_data; + int i; + + if (info->ops->msi_free) { + for (i = 0; i < nr_irqs; i++) + info->ops->msi_free(domain, info, virq + i); + } + irq_domain_free_irqs_top(domain, virq, nr_irqs); +} + +static const struct irq_domain_ops msi_domain_ops = { + .alloc = msi_domain_alloc, + .free = msi_domain_free, + .activate = msi_domain_activate, + .deactivate = msi_domain_deactivate, +}; + +static irq_hw_number_t msi_domain_ops_get_hwirq(struct msi_domain_info *info, + msi_alloc_info_t *arg) +{ + return arg->hwirq; +} + +static int msi_domain_ops_prepare(struct irq_domain *domain, struct device *dev, + int nvec, msi_alloc_info_t *arg) +{ + memset(arg, 0, sizeof(*arg)); + return 0; +} + +static void msi_domain_ops_set_desc(msi_alloc_info_t *arg, + struct msi_desc *desc) +{ + arg->desc = desc; +} + +static int msi_domain_ops_init(struct irq_domain *domain, + struct msi_domain_info *info, + unsigned int virq, irq_hw_number_t hwirq, + msi_alloc_info_t *arg) +{ + irq_domain_set_hwirq_and_chip(domain, virq, hwirq, info->chip, + info->chip_data); + if (info->handler && info->handler_name) { + __irq_set_handler(virq, info->handler, 0, info->handler_name); + if (info->handler_data) + irq_set_handler_data(virq, info->handler_data); + } + return 0; +} + +static struct msi_domain_ops msi_domain_ops_default = { + .get_hwirq = msi_domain_ops_get_hwirq, + .msi_init = msi_domain_ops_init, + .msi_prepare = msi_domain_ops_prepare, + .set_desc = msi_domain_ops_set_desc, +}; + +static void msi_domain_update_dom_ops(struct msi_domain_info *info) +{ + struct msi_domain_ops *ops = info->ops; + + if (ops == NULL) { + info->ops = &msi_domain_ops_default; + return; + } + + if (!(info->flags & MSI_FLAG_USE_DEF_DOM_OPS)) + return; + + if (ops->get_hwirq == NULL) + ops->get_hwirq = msi_domain_ops_default.get_hwirq; + if (ops->msi_init == NULL) + ops->msi_init = msi_domain_ops_default.msi_init; + if (ops->msi_prepare == NULL) + ops->msi_prepare = msi_domain_ops_default.msi_prepare; + if (ops->set_desc == NULL) + ops->set_desc = msi_domain_ops_default.set_desc; +} + +static void msi_domain_update_chip_ops(struct msi_domain_info *info) +{ + struct irq_chip *chip = info->chip; + + BUG_ON(!chip || !chip->irq_mask || !chip->irq_unmask); + if (!chip->irq_set_affinity) + chip->irq_set_affinity = msi_domain_set_affinity; +} + +static struct irq_domain *__msi_create_irq_domain(struct fwnode_handle *fwnode, + struct msi_domain_info *info, + unsigned int flags, + struct irq_domain *parent) +{ + struct irq_domain *domain; + + if (info->hwsize > MSI_XA_DOMAIN_SIZE) + return NULL; + + /* + * Hardware size 0 is valid for backwards compatibility and for + * domains which are not backed by a hardware table. Grant the + * maximum index space. + */ + if (!info->hwsize) + info->hwsize = MSI_XA_DOMAIN_SIZE; + + msi_domain_update_dom_ops(info); + if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS) + msi_domain_update_chip_ops(info); + + domain = irq_domain_create_hierarchy(parent, flags | IRQ_DOMAIN_FLAG_MSI, 0, + fwnode, &msi_domain_ops, info); + + if (domain) + irq_domain_update_bus_token(domain, info->bus_token); + + return domain; +} + +/** + * msi_create_irq_domain - Create an MSI interrupt domain + * @fwnode: Optional fwnode of the interrupt controller + * @info: MSI domain info + * @parent: Parent irq domain + * + * Return: pointer to the created &struct irq_domain or %NULL on failure + */ +struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode, + struct msi_domain_info *info, + struct irq_domain *parent) +{ + return __msi_create_irq_domain(fwnode, info, 0, parent); +} + +/** + * msi_parent_init_dev_msi_info - Delegate initialization of device MSI info down + * in the domain hierarchy + * @dev: The device for which the domain should be created + * @domain: The domain in the hierarchy this op is being called on + * @msi_parent_domain: The IRQ_DOMAIN_FLAG_MSI_PARENT domain for the child to + * be created + * @msi_child_info: The MSI domain info of the IRQ_DOMAIN_FLAG_MSI_DEVICE + * domain to be created + * + * Return: true on success, false otherwise + * + * This is the most complex problem of per device MSI domains and the + * underlying interrupt domain hierarchy: + * + * The device domain to be initialized requests the broadest feature set + * possible and the underlying domain hierarchy puts restrictions on it. + * + * That's trivial for a simple parent->child relationship, but it gets + * interesting with an intermediate domain: root->parent->child. The + * intermediate 'parent' can expand the capabilities which the 'root' + * domain is providing. So that creates a classic hen and egg problem: + * Which entity is doing the restrictions/expansions? + * + * One solution is to let the root domain handle the initialization that's + * why there is the @domain and the @msi_parent_domain pointer. + */ +bool msi_parent_init_dev_msi_info(struct device *dev, struct irq_domain *domain, + struct irq_domain *msi_parent_domain, + struct msi_domain_info *msi_child_info) +{ + struct irq_domain *parent = domain->parent; + + if (WARN_ON_ONCE(!parent || !parent->msi_parent_ops || + !parent->msi_parent_ops->init_dev_msi_info)) + return false; + + return parent->msi_parent_ops->init_dev_msi_info(dev, parent, msi_parent_domain, + msi_child_info); +} + +/** + * msi_create_device_irq_domain - Create a device MSI interrupt domain + * @dev: Pointer to the device + * @domid: Domain id + * @template: MSI domain info bundle used as template + * @hwsize: Maximum number of MSI table entries (0 if unknown or unlimited) + * @domain_data: Optional pointer to domain specific data which is set in + * msi_domain_info::data + * @chip_data: Optional pointer to chip specific data which is set in + * msi_domain_info::chip_data + * + * Return: True on success, false otherwise + * + * There is no firmware node required for this interface because the per + * device domains are software constructs which are actually closer to the + * hardware reality than any firmware can describe them. + * + * The domain name and the irq chip name for a MSI device domain are + * composed by: "$(PREFIX)$(CHIPNAME)-$(DEVNAME)" + * + * $PREFIX: Optional prefix provided by the underlying MSI parent domain + * via msi_parent_ops::prefix. If that pointer is NULL the prefix + * is empty. + * $CHIPNAME: The name of the irq_chip in @template + * $DEVNAME: The name of the device + * + * This results in understandable chip names and hardware interrupt numbers + * in e.g. /proc/interrupts + * + * PCI-MSI-0000:00:1c.0 0-edge Parent domain has no prefix + * IR-PCI-MSI-0000:00:1c.4 0-edge Same with interrupt remapping prefix 'IR-' + * + * IR-PCI-MSIX-0000:3d:00.0 0-edge Hardware interrupt numbers reflect + * IR-PCI-MSIX-0000:3d:00.0 1-edge the real MSI-X index on that device + * IR-PCI-MSIX-0000:3d:00.0 2-edge + * + * On IMS domains the hardware interrupt number is either a table entry + * index or a purely software managed index but it is guaranteed to be + * unique. + * + * The domain pointer is stored in @dev::msi::data::__irqdomains[]. All + * subsequent operations on the domain depend on the domain id. + * + * The domain is automatically freed when the device is removed via devres + * in the context of @dev::msi::data freeing, but it can also be + * independently removed via @msi_remove_device_irq_domain(). + */ +bool msi_create_device_irq_domain(struct device *dev, unsigned int domid, + const struct msi_domain_template *template, + unsigned int hwsize, void *domain_data, + void *chip_data) +{ + struct irq_domain *domain, *parent = dev->msi.domain; + const struct msi_parent_ops *pops; + struct msi_domain_template *bundle; + struct fwnode_handle *fwnode; + + if (!irq_domain_is_msi_parent(parent)) + return false; + + if (domid >= MSI_MAX_DEVICE_IRQDOMAINS) + return false; + + bundle = kmemdup(template, sizeof(*bundle), GFP_KERNEL); + if (!bundle) + return false; + + bundle->info.hwsize = hwsize; + bundle->info.chip = &bundle->chip; + bundle->info.ops = &bundle->ops; + bundle->info.data = domain_data; + bundle->info.chip_data = chip_data; + + pops = parent->msi_parent_ops; + snprintf(bundle->name, sizeof(bundle->name), "%s%s-%s", + pops->prefix ? : "", bundle->chip.name, dev_name(dev)); + bundle->chip.name = bundle->name; + + fwnode = irq_domain_alloc_named_fwnode(bundle->name); + if (!fwnode) + goto free_bundle; + + if (msi_setup_device_data(dev)) + goto free_fwnode; + + msi_lock_descs(dev); + + if (WARN_ON_ONCE(msi_get_device_domain(dev, domid))) + goto fail; + + if (!pops->init_dev_msi_info(dev, parent, parent, &bundle->info)) + goto fail; + + domain = __msi_create_irq_domain(fwnode, &bundle->info, IRQ_DOMAIN_FLAG_MSI_DEVICE, parent); + if (!domain) + goto fail; + + domain->dev = dev; + dev->msi.data->__domains[domid].domain = domain; + msi_unlock_descs(dev); + return true; + +fail: + msi_unlock_descs(dev); +free_fwnode: + irq_domain_free_fwnode(fwnode); +free_bundle: + kfree(bundle); + return false; +} + +/** + * msi_remove_device_irq_domain - Free a device MSI interrupt domain + * @dev: Pointer to the device + * @domid: Domain id + */ +void msi_remove_device_irq_domain(struct device *dev, unsigned int domid) +{ + struct fwnode_handle *fwnode = NULL; + struct msi_domain_info *info; + struct irq_domain *domain; + + msi_lock_descs(dev); + + domain = msi_get_device_domain(dev, domid); + + if (!domain || !irq_domain_is_msi_device(domain)) + goto unlock; + + dev->msi.data->__domains[domid].domain = NULL; + info = domain->host_data; + if (irq_domain_is_msi_device(domain)) + fwnode = domain->fwnode; + irq_domain_remove(domain); + irq_domain_free_fwnode(fwnode); + kfree(container_of(info, struct msi_domain_template, info)); + +unlock: + msi_unlock_descs(dev); +} + +/** + * msi_match_device_irq_domain - Match a device irq domain against a bus token + * @dev: Pointer to the device + * @domid: Domain id + * @bus_token: Bus token to match against the domain bus token + * + * Return: True if device domain exists and bus tokens match. + */ +bool msi_match_device_irq_domain(struct device *dev, unsigned int domid, + enum irq_domain_bus_token bus_token) +{ + struct msi_domain_info *info; + struct irq_domain *domain; + bool ret = false; + + msi_lock_descs(dev); + domain = msi_get_device_domain(dev, domid); + if (domain && irq_domain_is_msi_device(domain)) { + info = domain->host_data; + ret = info->bus_token == bus_token; + } + msi_unlock_descs(dev); + return ret; +} + +int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev, + int nvec, msi_alloc_info_t *arg) +{ + struct msi_domain_info *info = domain->host_data; + struct msi_domain_ops *ops = info->ops; + + return ops->msi_prepare(domain, dev, nvec, arg); +} + +int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev, + int virq_base, int nvec, msi_alloc_info_t *arg) +{ + struct msi_domain_info *info = domain->host_data; + struct msi_domain_ops *ops = info->ops; + struct msi_ctrl ctrl = { + .domid = MSI_DEFAULT_DOMAIN, + .first = virq_base, + .last = virq_base + nvec - 1, + }; + struct msi_desc *desc; + struct xarray *xa; + int ret, virq; + + msi_lock_descs(dev); + + if (!msi_ctrl_valid(dev, &ctrl)) { + ret = -EINVAL; + goto unlock; + } + + ret = msi_domain_add_simple_msi_descs(dev, &ctrl); + if (ret) + goto unlock; + + xa = &dev->msi.data->__domains[ctrl.domid].store; + + for (virq = virq_base; virq < virq_base + nvec; virq++) { + desc = xa_load(xa, virq); + desc->irq = virq; + + ops->set_desc(arg, desc); + ret = irq_domain_alloc_irqs_hierarchy(domain, virq, 1, arg); + if (ret) + goto fail; + + irq_set_msi_desc(virq, desc); + } + msi_unlock_descs(dev); + return 0; + +fail: + for (--virq; virq >= virq_base; virq--) { + msi_domain_depopulate_descs(dev, virq, 1); + irq_domain_free_irqs_common(domain, virq, 1); + } + msi_domain_free_descs(dev, &ctrl); +unlock: + msi_unlock_descs(dev); + return ret; +} + +void msi_domain_depopulate_descs(struct device *dev, int virq_base, int nvec) +{ + struct msi_ctrl ctrl = { + .domid = MSI_DEFAULT_DOMAIN, + .first = virq_base, + .last = virq_base + nvec - 1, + }; + struct msi_desc *desc; + struct xarray *xa; + unsigned long idx; + + if (!msi_ctrl_valid(dev, &ctrl)) + return; + + xa = &dev->msi.data->__domains[ctrl.domid].store; + xa_for_each_range(xa, idx, desc, ctrl.first, ctrl.last) + desc->irq = 0; +} + +/* + * Carefully check whether the device can use reservation mode. If + * reservation mode is enabled then the early activation will assign a + * dummy vector to the device. If the PCI/MSI device does not support + * masking of the entry then this can result in spurious interrupts when + * the device driver is not absolutely careful. But even then a malfunction + * of the hardware could result in a spurious interrupt on the dummy vector + * and render the device unusable. If the entry can be masked then the core + * logic will prevent the spurious interrupt and reservation mode can be + * used. For now reservation mode is restricted to PCI/MSI. + */ +static bool msi_check_reservation_mode(struct irq_domain *domain, + struct msi_domain_info *info, + struct device *dev) +{ + struct msi_desc *desc; + + switch(domain->bus_token) { + case DOMAIN_BUS_PCI_MSI: + case DOMAIN_BUS_PCI_DEVICE_MSI: + case DOMAIN_BUS_PCI_DEVICE_MSIX: + case DOMAIN_BUS_VMD_MSI: + break; + default: + return false; + } + + if (!(info->flags & MSI_FLAG_MUST_REACTIVATE)) + return false; + + if (IS_ENABLED(CONFIG_PCI_MSI) && pci_msi_ignore_mask) + return false; + + /* + * Checking the first MSI descriptor is sufficient. MSIX supports + * masking and MSI does so when the can_mask attribute is set. + */ + desc = msi_first_desc(dev, MSI_DESC_ALL); + return desc->pci.msi_attrib.is_msix || desc->pci.msi_attrib.can_mask; +} + +static int msi_handle_pci_fail(struct irq_domain *domain, struct msi_desc *desc, + int allocated) +{ + switch(domain->bus_token) { + case DOMAIN_BUS_PCI_MSI: + case DOMAIN_BUS_PCI_DEVICE_MSI: + case DOMAIN_BUS_PCI_DEVICE_MSIX: + case DOMAIN_BUS_VMD_MSI: + if (IS_ENABLED(CONFIG_PCI_MSI)) + break; + fallthrough; + default: + return -ENOSPC; + } + + /* Let a failed PCI multi MSI allocation retry */ + if (desc->nvec_used > 1) + return 1; + + /* If there was a successful allocation let the caller know */ + return allocated ? allocated : -ENOSPC; +} + +#define VIRQ_CAN_RESERVE 0x01 +#define VIRQ_ACTIVATE 0x02 + +static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflags) +{ + struct irq_data *irqd = irq_domain_get_irq_data(domain, virq); + int ret; + + if (!(vflags & VIRQ_CAN_RESERVE)) { + irqd_clr_can_reserve(irqd); + + /* + * If the interrupt is managed but no CPU is available to + * service it, shut it down until better times. Note that + * we only do this on the !RESERVE path as x86 (the only + * architecture using this flag) deals with this in a + * different way by using a catch-all vector. + */ + if ((vflags & VIRQ_ACTIVATE) && + irqd_affinity_is_managed(irqd) && + !cpumask_intersects(irq_data_get_affinity_mask(irqd), + cpu_online_mask)) { + irqd_set_managed_shutdown(irqd); + return 0; + } + } + + if (!(vflags & VIRQ_ACTIVATE)) + return 0; + + ret = irq_domain_activate_irq(irqd, vflags & VIRQ_CAN_RESERVE); + if (ret) + return ret; + /* + * If the interrupt uses reservation mode, clear the activated bit + * so request_irq() will assign the final vector. + */ + if (vflags & VIRQ_CAN_RESERVE) + irqd_clr_activated(irqd); + return 0; +} + +static int __msi_domain_alloc_irqs(struct device *dev, struct irq_domain *domain, + struct msi_ctrl *ctrl) +{ + struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store; + struct msi_domain_info *info = domain->host_data; + struct msi_domain_ops *ops = info->ops; + unsigned int vflags = 0, allocated = 0; + msi_alloc_info_t arg = { }; + struct msi_desc *desc; + unsigned long idx; + int i, ret, virq; + + ret = msi_domain_prepare_irqs(domain, dev, ctrl->nirqs, &arg); + if (ret) + return ret; + + /* + * This flag is set by the PCI layer as we need to activate + * the MSI entries before the PCI layer enables MSI in the + * card. Otherwise the card latches a random msi message. + */ + if (info->flags & MSI_FLAG_ACTIVATE_EARLY) + vflags |= VIRQ_ACTIVATE; + + /* + * Interrupt can use a reserved vector and will not occupy + * a real device vector until the interrupt is requested. + */ + if (msi_check_reservation_mode(domain, info, dev)) + vflags |= VIRQ_CAN_RESERVE; + + xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) { + if (!msi_desc_match(desc, MSI_DESC_NOTASSOCIATED)) + continue; + + /* This should return -ECONFUSED... */ + if (WARN_ON_ONCE(allocated >= ctrl->nirqs)) + return -EINVAL; + + if (ops->prepare_desc) + ops->prepare_desc(domain, &arg, desc); + + ops->set_desc(&arg, desc); + + virq = __irq_domain_alloc_irqs(domain, -1, desc->nvec_used, + dev_to_node(dev), &arg, false, + desc->affinity); + if (virq < 0) + return msi_handle_pci_fail(domain, desc, allocated); + + for (i = 0; i < desc->nvec_used; i++) { + irq_set_msi_desc_off(virq, i, desc); + irq_debugfs_copy_devname(virq + i, dev); + ret = msi_init_virq(domain, virq + i, vflags); + if (ret) + return ret; + } + if (info->flags & MSI_FLAG_DEV_SYSFS) { + ret = msi_sysfs_populate_desc(dev, desc); + if (ret) + return ret; + } + allocated++; + } + return 0; +} + +static int msi_domain_alloc_simple_msi_descs(struct device *dev, + struct msi_domain_info *info, + struct msi_ctrl *ctrl) +{ + if (!(info->flags & MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS)) + return 0; + + return msi_domain_add_simple_msi_descs(dev, ctrl); +} + +static int __msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl) +{ + struct msi_domain_info *info; + struct msi_domain_ops *ops; + struct irq_domain *domain; + int ret; + + if (!msi_ctrl_valid(dev, ctrl)) + return -EINVAL; + + domain = msi_get_device_domain(dev, ctrl->domid); + if (!domain) + return -ENODEV; + + info = domain->host_data; + + ret = msi_domain_alloc_simple_msi_descs(dev, info, ctrl); + if (ret) + return ret; + + ops = info->ops; + if (ops->domain_alloc_irqs) + return ops->domain_alloc_irqs(domain, dev, ctrl->nirqs); + + return __msi_domain_alloc_irqs(dev, domain, ctrl); +} + +static int msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl) +{ + int ret = __msi_domain_alloc_locked(dev, ctrl); + + if (ret) + msi_domain_free_locked(dev, ctrl); + return ret; +} + +/** + * msi_domain_alloc_irqs_range_locked - Allocate interrupts from a MSI interrupt domain + * @dev: Pointer to device struct of the device for which the interrupts + * are allocated + * @domid: Id of the interrupt domain to operate on + * @first: First index to allocate (inclusive) + * @last: Last index to allocate (inclusive) + * + * Must be invoked from within a msi_lock_descs() / msi_unlock_descs() + * pair. Use this for MSI irqdomains which implement their own descriptor + * allocation/free. + * + * Return: %0 on success or an error code. + */ +int msi_domain_alloc_irqs_range_locked(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last) +{ + struct msi_ctrl ctrl = { + .domid = domid, + .first = first, + .last = last, + .nirqs = last + 1 - first, + }; + + return msi_domain_alloc_locked(dev, &ctrl); +} + +/** + * msi_domain_alloc_irqs_range - Allocate interrupts from a MSI interrupt domain + * @dev: Pointer to device struct of the device for which the interrupts + * are allocated + * @domid: Id of the interrupt domain to operate on + * @first: First index to allocate (inclusive) + * @last: Last index to allocate (inclusive) + * + * Return: %0 on success or an error code. + */ +int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last) +{ + int ret; + + msi_lock_descs(dev); + ret = msi_domain_alloc_irqs_range_locked(dev, domid, first, last); + msi_unlock_descs(dev); + return ret; +} + +/** + * msi_domain_alloc_irqs_all_locked - Allocate all interrupts from a MSI interrupt domain + * + * @dev: Pointer to device struct of the device for which the interrupts + * are allocated + * @domid: Id of the interrupt domain to operate on + * @nirqs: The number of interrupts to allocate + * + * This function scans all MSI descriptors of the MSI domain and allocates interrupts + * for all unassigned ones. That function is to be used for MSI domain usage where + * the descriptor allocation is handled at the call site, e.g. PCI/MSI[X]. + * + * Return: %0 on success or an error code. + */ +int msi_domain_alloc_irqs_all_locked(struct device *dev, unsigned int domid, int nirqs) +{ + struct msi_ctrl ctrl = { + .domid = domid, + .first = 0, + .last = msi_domain_get_hwsize(dev, domid) - 1, + .nirqs = nirqs, + }; + + return msi_domain_alloc_locked(dev, &ctrl); +} + +/** + * msi_domain_alloc_irq_at - Allocate an interrupt from a MSI interrupt domain at + * a given index - or at the next free index + * + * @dev: Pointer to device struct of the device for which the interrupts + * are allocated + * @domid: Id of the interrupt domain to operate on + * @index: Index for allocation. If @index == %MSI_ANY_INDEX the allocation + * uses the next free index. + * @affdesc: Optional pointer to an interrupt affinity descriptor structure + * @icookie: Optional pointer to a domain specific per instance cookie. If + * non-NULL the content of the cookie is stored in msi_desc::data. + * Must be NULL for MSI-X allocations + * + * This requires a MSI interrupt domain which lets the core code manage the + * MSI descriptors. + * + * Return: struct msi_map + * + * On success msi_map::index contains the allocated index number and + * msi_map::virq the corresponding Linux interrupt number + * + * On failure msi_map::index contains the error code and msi_map::virq + * is %0. + */ +struct msi_map msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, unsigned int index, + const struct irq_affinity_desc *affdesc, + union msi_instance_cookie *icookie) +{ + struct msi_ctrl ctrl = { .domid = domid, .nirqs = 1, }; + struct irq_domain *domain; + struct msi_map map = { }; + struct msi_desc *desc; + int ret; + + msi_lock_descs(dev); + domain = msi_get_device_domain(dev, domid); + if (!domain) { + map.index = -ENODEV; + goto unlock; + } + + desc = msi_alloc_desc(dev, 1, affdesc); + if (!desc) { + map.index = -ENOMEM; + goto unlock; + } + + if (icookie) + desc->data.icookie = *icookie; + + ret = msi_insert_desc(dev, desc, domid, index); + if (ret) { + map.index = ret; + goto unlock; + } + + ctrl.first = ctrl.last = desc->msi_index; + + ret = __msi_domain_alloc_irqs(dev, domain, &ctrl); + if (ret) { + map.index = ret; + msi_domain_free_locked(dev, &ctrl); + } else { + map.index = desc->msi_index; + map.virq = desc->irq; + } +unlock: + msi_unlock_descs(dev); + return map; +} + +static void __msi_domain_free_irqs(struct device *dev, struct irq_domain *domain, + struct msi_ctrl *ctrl) +{ + struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store; + struct msi_domain_info *info = domain->host_data; + struct irq_data *irqd; + struct msi_desc *desc; + unsigned long idx; + int i; + + xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) { + /* Only handle MSI entries which have an interrupt associated */ + if (!msi_desc_match(desc, MSI_DESC_ASSOCIATED)) + continue; + + /* Make sure all interrupts are deactivated */ + for (i = 0; i < desc->nvec_used; i++) { + irqd = irq_domain_get_irq_data(domain, desc->irq + i); + if (irqd && irqd_is_activated(irqd)) + irq_domain_deactivate_irq(irqd); + } + + irq_domain_free_irqs(desc->irq, desc->nvec_used); + if (info->flags & MSI_FLAG_DEV_SYSFS) + msi_sysfs_remove_desc(dev, desc); + desc->irq = 0; + } +} + +static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl) +{ + struct msi_domain_info *info; + struct msi_domain_ops *ops; + struct irq_domain *domain; + + if (!msi_ctrl_valid(dev, ctrl)) + return; + + domain = msi_get_device_domain(dev, ctrl->domid); + if (!domain) + return; + + info = domain->host_data; + ops = info->ops; + + if (ops->domain_free_irqs) + ops->domain_free_irqs(domain, dev); + else + __msi_domain_free_irqs(dev, domain, ctrl); + + if (ops->msi_post_free) + ops->msi_post_free(domain, dev); + + if (info->flags & MSI_FLAG_FREE_MSI_DESCS) + msi_domain_free_descs(dev, ctrl); +} + +/** + * msi_domain_free_irqs_range_locked - Free a range of interrupts from a MSI interrupt domain + * associated to @dev with msi_lock held + * @dev: Pointer to device struct of the device for which the interrupts + * are freed + * @domid: Id of the interrupt domain to operate on + * @first: First index to free (inclusive) + * @last: Last index to free (inclusive) + */ +void msi_domain_free_irqs_range_locked(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last) +{ + struct msi_ctrl ctrl = { + .domid = domid, + .first = first, + .last = last, + }; + msi_domain_free_locked(dev, &ctrl); +} + +/** + * msi_domain_free_irqs_range - Free a range of interrupts from a MSI interrupt domain + * associated to @dev + * @dev: Pointer to device struct of the device for which the interrupts + * are freed + * @domid: Id of the interrupt domain to operate on + * @first: First index to free (inclusive) + * @last: Last index to free (inclusive) + */ +void msi_domain_free_irqs_range(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last) +{ + msi_lock_descs(dev); + msi_domain_free_irqs_range_locked(dev, domid, first, last); + msi_unlock_descs(dev); +} + +/** + * msi_domain_free_irqs_all_locked - Free all interrupts from a MSI interrupt domain + * associated to a device + * @dev: Pointer to device struct of the device for which the interrupts + * are freed + * @domid: The id of the domain to operate on + * + * Must be invoked from within a msi_lock_descs() / msi_unlock_descs() + * pair. Use this for MSI irqdomains which implement their own vector + * allocation. + */ +void msi_domain_free_irqs_all_locked(struct device *dev, unsigned int domid) +{ + msi_domain_free_irqs_range_locked(dev, domid, 0, + msi_domain_get_hwsize(dev, domid) - 1); +} + +/** + * msi_domain_free_irqs_all - Free all interrupts from a MSI interrupt domain + * associated to a device + * @dev: Pointer to device struct of the device for which the interrupts + * are freed + * @domid: The id of the domain to operate on + */ +void msi_domain_free_irqs_all(struct device *dev, unsigned int domid) +{ + msi_lock_descs(dev); + msi_domain_free_irqs_all_locked(dev, domid); + msi_unlock_descs(dev); +} + +/** + * msi_get_domain_info - Get the MSI interrupt domain info for @domain + * @domain: The interrupt domain to retrieve data from + * + * Return: the pointer to the msi_domain_info stored in @domain->host_data. + */ +struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain) +{ + return (struct msi_domain_info *)domain->host_data; +} + +/** + * msi_device_has_isolated_msi - True if the device has isolated MSI + * @dev: The device to check + * + * Isolated MSI means that HW modeled by an irq_domain on the path from the + * initiating device to the CPU will validate that the MSI message specifies an + * interrupt number that the device is authorized to trigger. This must block + * devices from triggering interrupts they are not authorized to trigger. + * Currently authorization means the MSI vector is one assigned to the device. + * + * This is interesting for securing VFIO use cases where a rouge MSI (eg created + * by abusing a normal PCI MemWr DMA) must not allow the VFIO userspace to + * impact outside its security domain, eg userspace triggering interrupts on + * kernel drivers, a VM triggering interrupts on the hypervisor, or a VM + * triggering interrupts on another VM. + */ +bool msi_device_has_isolated_msi(struct device *dev) +{ + struct irq_domain *domain = dev_get_msi_domain(dev); + + for (; domain; domain = domain->parent) + if (domain->flags & IRQ_DOMAIN_FLAG_ISOLATED_MSI) + return true; + return arch_is_isolated_msi(); +} +EXPORT_SYMBOL_GPL(msi_device_has_isolated_msi); diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c new file mode 100644 index 0000000000..c556bc49d2 --- /dev/null +++ b/kernel/irq/pm.c @@ -0,0 +1,260 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. + * + * This file contains power management functions related to interrupts. + */ + +#include <linux/irq.h> +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/suspend.h> +#include <linux/syscore_ops.h> + +#include "internals.h" + +bool irq_pm_check_wakeup(struct irq_desc *desc) +{ + if (irqd_is_wakeup_armed(&desc->irq_data)) { + irqd_clear(&desc->irq_data, IRQD_WAKEUP_ARMED); + desc->istate |= IRQS_SUSPENDED | IRQS_PENDING; + desc->depth++; + irq_disable(desc); + pm_system_irq_wakeup(irq_desc_get_irq(desc)); + return true; + } + return false; +} + +/* + * Called from __setup_irq() with desc->lock held after @action has + * been installed in the action chain. + */ +void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) +{ + desc->nr_actions++; + + if (action->flags & IRQF_FORCE_RESUME) + desc->force_resume_depth++; + + WARN_ON_ONCE(desc->force_resume_depth && + desc->force_resume_depth != desc->nr_actions); + + if (action->flags & IRQF_NO_SUSPEND) + desc->no_suspend_depth++; + else if (action->flags & IRQF_COND_SUSPEND) + desc->cond_suspend_depth++; + + WARN_ON_ONCE(desc->no_suspend_depth && + (desc->no_suspend_depth + + desc->cond_suspend_depth) != desc->nr_actions); +} + +/* + * Called from __free_irq() with desc->lock held after @action has + * been removed from the action chain. + */ +void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action) +{ + desc->nr_actions--; + + if (action->flags & IRQF_FORCE_RESUME) + desc->force_resume_depth--; + + if (action->flags & IRQF_NO_SUSPEND) + desc->no_suspend_depth--; + else if (action->flags & IRQF_COND_SUSPEND) + desc->cond_suspend_depth--; +} + +static bool suspend_device_irq(struct irq_desc *desc) +{ + unsigned long chipflags = irq_desc_get_chip(desc)->flags; + struct irq_data *irqd = &desc->irq_data; + + if (!desc->action || irq_desc_is_chained(desc) || + desc->no_suspend_depth) + return false; + + if (irqd_is_wakeup_set(irqd)) { + irqd_set(irqd, IRQD_WAKEUP_ARMED); + + if ((chipflags & IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND) && + irqd_irq_disabled(irqd)) { + /* + * Interrupt marked for wakeup is in disabled state. + * Enable interrupt here to unmask/enable in irqchip + * to be able to resume with such interrupts. + */ + __enable_irq(desc); + irqd_set(irqd, IRQD_IRQ_ENABLED_ON_SUSPEND); + } + /* + * We return true here to force the caller to issue + * synchronize_irq(). We need to make sure that the + * IRQD_WAKEUP_ARMED is visible before we return from + * suspend_device_irqs(). + */ + return true; + } + + desc->istate |= IRQS_SUSPENDED; + __disable_irq(desc); + + /* + * Hardware which has no wakeup source configuration facility + * requires that the non wakeup interrupts are masked at the + * chip level. The chip implementation indicates that with + * IRQCHIP_MASK_ON_SUSPEND. + */ + if (chipflags & IRQCHIP_MASK_ON_SUSPEND) + mask_irq(desc); + return true; +} + +/** + * suspend_device_irqs - disable all currently enabled interrupt lines + * + * During system-wide suspend or hibernation device drivers need to be + * prevented from receiving interrupts and this function is provided + * for this purpose. + * + * So we disable all interrupts and mark them IRQS_SUSPENDED except + * for those which are unused, those which are marked as not + * suspendable via an interrupt request with the flag IRQF_NO_SUSPEND + * set and those which are marked as active wakeup sources. + * + * The active wakeup sources are handled by the flow handler entry + * code which checks for the IRQD_WAKEUP_ARMED flag, suspends the + * interrupt and notifies the pm core about the wakeup. + */ +void suspend_device_irqs(void) +{ + struct irq_desc *desc; + int irq; + + for_each_irq_desc(irq, desc) { + unsigned long flags; + bool sync; + + if (irq_settings_is_nested_thread(desc)) + continue; + raw_spin_lock_irqsave(&desc->lock, flags); + sync = suspend_device_irq(desc); + raw_spin_unlock_irqrestore(&desc->lock, flags); + + if (sync) + synchronize_irq(irq); + } +} + +static void resume_irq(struct irq_desc *desc) +{ + struct irq_data *irqd = &desc->irq_data; + + irqd_clear(irqd, IRQD_WAKEUP_ARMED); + + if (irqd_is_enabled_on_suspend(irqd)) { + /* + * Interrupt marked for wakeup was enabled during suspend + * entry. Disable such interrupts to restore them back to + * original state. + */ + __disable_irq(desc); + irqd_clear(irqd, IRQD_IRQ_ENABLED_ON_SUSPEND); + } + + if (desc->istate & IRQS_SUSPENDED) + goto resume; + + /* Force resume the interrupt? */ + if (!desc->force_resume_depth) + return; + + /* Pretend that it got disabled ! */ + desc->depth++; + irq_state_set_disabled(desc); + irq_state_set_masked(desc); +resume: + desc->istate &= ~IRQS_SUSPENDED; + __enable_irq(desc); +} + +static void resume_irqs(bool want_early) +{ + struct irq_desc *desc; + int irq; + + for_each_irq_desc(irq, desc) { + unsigned long flags; + bool is_early = desc->action && + desc->action->flags & IRQF_EARLY_RESUME; + + if (!is_early && want_early) + continue; + if (irq_settings_is_nested_thread(desc)) + continue; + + raw_spin_lock_irqsave(&desc->lock, flags); + resume_irq(desc); + raw_spin_unlock_irqrestore(&desc->lock, flags); + } +} + +/** + * rearm_wake_irq - rearm a wakeup interrupt line after signaling wakeup + * @irq: Interrupt to rearm + */ +void rearm_wake_irq(unsigned int irq) +{ + unsigned long flags; + struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); + + if (!desc) + return; + + if (!(desc->istate & IRQS_SUSPENDED) || + !irqd_is_wakeup_set(&desc->irq_data)) + goto unlock; + + desc->istate &= ~IRQS_SUSPENDED; + irqd_set(&desc->irq_data, IRQD_WAKEUP_ARMED); + __enable_irq(desc); + +unlock: + irq_put_desc_busunlock(desc, flags); +} + +/** + * irq_pm_syscore_resume - enable interrupt lines early + * + * Enable all interrupt lines with %IRQF_EARLY_RESUME set. + */ +static void irq_pm_syscore_resume(void) +{ + resume_irqs(true); +} + +static struct syscore_ops irq_pm_syscore_ops = { + .resume = irq_pm_syscore_resume, +}; + +static int __init irq_pm_init_ops(void) +{ + register_syscore_ops(&irq_pm_syscore_ops); + return 0; +} + +device_initcall(irq_pm_init_ops); + +/** + * resume_device_irqs - enable interrupt lines disabled by suspend_device_irqs() + * + * Enable all non-%IRQF_EARLY_RESUME interrupt lines previously + * disabled by suspend_device_irqs() that have the IRQS_SUSPENDED flag + * set as well as those with %IRQF_FORCE_RESUME. + */ +void resume_device_irqs(void) +{ + resume_irqs(false); +} diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c new file mode 100644 index 0000000000..623b8136e9 --- /dev/null +++ b/kernel/irq/proc.c @@ -0,0 +1,538 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar + * + * This file contains the /proc/irq/ handling code. + */ + +#include <linux/irq.h> +#include <linux/gfp.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/interrupt.h> +#include <linux/kernel_stat.h> +#include <linux/mutex.h> + +#include "internals.h" + +/* + * Access rules: + * + * procfs protects read/write of /proc/irq/N/ files against a + * concurrent free of the interrupt descriptor. remove_proc_entry() + * immediately prevents new read/writes to happen and waits for + * already running read/write functions to complete. + * + * We remove the proc entries first and then delete the interrupt + * descriptor from the radix tree and free it. So it is guaranteed + * that irq_to_desc(N) is valid as long as the read/writes are + * permitted by procfs. + * + * The read from /proc/interrupts is a different problem because there + * is no protection. So the lookup and the access to irqdesc + * information must be protected by sparse_irq_lock. + */ +static struct proc_dir_entry *root_irq_dir; + +#ifdef CONFIG_SMP + +enum { + AFFINITY, + AFFINITY_LIST, + EFFECTIVE, + EFFECTIVE_LIST, +}; + +static int show_irq_affinity(int type, struct seq_file *m) +{ + struct irq_desc *desc = irq_to_desc((long)m->private); + const struct cpumask *mask; + + switch (type) { + case AFFINITY: + case AFFINITY_LIST: + mask = desc->irq_common_data.affinity; +#ifdef CONFIG_GENERIC_PENDING_IRQ + if (irqd_is_setaffinity_pending(&desc->irq_data)) + mask = desc->pending_mask; +#endif + break; + case EFFECTIVE: + case EFFECTIVE_LIST: +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK + mask = irq_data_get_effective_affinity_mask(&desc->irq_data); + break; +#endif + default: + return -EINVAL; + } + + switch (type) { + case AFFINITY_LIST: + case EFFECTIVE_LIST: + seq_printf(m, "%*pbl\n", cpumask_pr_args(mask)); + break; + case AFFINITY: + case EFFECTIVE: + seq_printf(m, "%*pb\n", cpumask_pr_args(mask)); + break; + } + return 0; +} + +static int irq_affinity_hint_proc_show(struct seq_file *m, void *v) +{ + struct irq_desc *desc = irq_to_desc((long)m->private); + unsigned long flags; + cpumask_var_t mask; + + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + raw_spin_lock_irqsave(&desc->lock, flags); + if (desc->affinity_hint) + cpumask_copy(mask, desc->affinity_hint); + raw_spin_unlock_irqrestore(&desc->lock, flags); + + seq_printf(m, "%*pb\n", cpumask_pr_args(mask)); + free_cpumask_var(mask); + + return 0; +} + +int no_irq_affinity; +static int irq_affinity_proc_show(struct seq_file *m, void *v) +{ + return show_irq_affinity(AFFINITY, m); +} + +static int irq_affinity_list_proc_show(struct seq_file *m, void *v) +{ + return show_irq_affinity(AFFINITY_LIST, m); +} + +#ifndef CONFIG_AUTO_IRQ_AFFINITY +static inline int irq_select_affinity_usr(unsigned int irq) +{ + /* + * If the interrupt is started up already then this fails. The + * interrupt is assigned to an online CPU already. There is no + * point to move it around randomly. Tell user space that the + * selected mask is bogus. + * + * If not then any change to the affinity is pointless because the + * startup code invokes irq_setup_affinity() which will select + * a online CPU anyway. + */ + return -EINVAL; +} +#else +/* ALPHA magic affinity auto selector. Keep it for historical reasons. */ +static inline int irq_select_affinity_usr(unsigned int irq) +{ + return irq_select_affinity(irq); +} +#endif + +static ssize_t write_irq_affinity(int type, struct file *file, + const char __user *buffer, size_t count, loff_t *pos) +{ + unsigned int irq = (int)(long)pde_data(file_inode(file)); + cpumask_var_t new_value; + int err; + + if (!irq_can_set_affinity_usr(irq) || no_irq_affinity) + return -EIO; + + if (!zalloc_cpumask_var(&new_value, GFP_KERNEL)) + return -ENOMEM; + + if (type) + err = cpumask_parselist_user(buffer, count, new_value); + else + err = cpumask_parse_user(buffer, count, new_value); + if (err) + goto free_cpumask; + + /* + * Do not allow disabling IRQs completely - it's a too easy + * way to make the system unusable accidentally :-) At least + * one online CPU still has to be targeted. + */ + if (!cpumask_intersects(new_value, cpu_online_mask)) { + /* + * Special case for empty set - allow the architecture code + * to set default SMP affinity. + */ + err = irq_select_affinity_usr(irq) ? -EINVAL : count; + } else { + err = irq_set_affinity(irq, new_value); + if (!err) + err = count; + } + +free_cpumask: + free_cpumask_var(new_value); + return err; +} + +static ssize_t irq_affinity_proc_write(struct file *file, + const char __user *buffer, size_t count, loff_t *pos) +{ + return write_irq_affinity(0, file, buffer, count, pos); +} + +static ssize_t irq_affinity_list_proc_write(struct file *file, + const char __user *buffer, size_t count, loff_t *pos) +{ + return write_irq_affinity(1, file, buffer, count, pos); +} + +static int irq_affinity_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, irq_affinity_proc_show, pde_data(inode)); +} + +static int irq_affinity_list_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, irq_affinity_list_proc_show, pde_data(inode)); +} + +static const struct proc_ops irq_affinity_proc_ops = { + .proc_open = irq_affinity_proc_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = irq_affinity_proc_write, +}; + +static const struct proc_ops irq_affinity_list_proc_ops = { + .proc_open = irq_affinity_list_proc_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = irq_affinity_list_proc_write, +}; + +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK +static int irq_effective_aff_proc_show(struct seq_file *m, void *v) +{ + return show_irq_affinity(EFFECTIVE, m); +} + +static int irq_effective_aff_list_proc_show(struct seq_file *m, void *v) +{ + return show_irq_affinity(EFFECTIVE_LIST, m); +} +#endif + +static int default_affinity_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%*pb\n", cpumask_pr_args(irq_default_affinity)); + return 0; +} + +static ssize_t default_affinity_write(struct file *file, + const char __user *buffer, size_t count, loff_t *ppos) +{ + cpumask_var_t new_value; + int err; + + if (!zalloc_cpumask_var(&new_value, GFP_KERNEL)) + return -ENOMEM; + + err = cpumask_parse_user(buffer, count, new_value); + if (err) + goto out; + + /* + * Do not allow disabling IRQs completely - it's a too easy + * way to make the system unusable accidentally :-) At least + * one online CPU still has to be targeted. + */ + if (!cpumask_intersects(new_value, cpu_online_mask)) { + err = -EINVAL; + goto out; + } + + cpumask_copy(irq_default_affinity, new_value); + err = count; + +out: + free_cpumask_var(new_value); + return err; +} + +static int default_affinity_open(struct inode *inode, struct file *file) +{ + return single_open(file, default_affinity_show, pde_data(inode)); +} + +static const struct proc_ops default_affinity_proc_ops = { + .proc_open = default_affinity_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = default_affinity_write, +}; + +static int irq_node_proc_show(struct seq_file *m, void *v) +{ + struct irq_desc *desc = irq_to_desc((long) m->private); + + seq_printf(m, "%d\n", irq_desc_get_node(desc)); + return 0; +} +#endif + +static int irq_spurious_proc_show(struct seq_file *m, void *v) +{ + struct irq_desc *desc = irq_to_desc((long) m->private); + + seq_printf(m, "count %u\n" "unhandled %u\n" "last_unhandled %u ms\n", + desc->irq_count, desc->irqs_unhandled, + jiffies_to_msecs(desc->last_unhandled)); + return 0; +} + +#define MAX_NAMELEN 128 + +static int name_unique(unsigned int irq, struct irqaction *new_action) +{ + struct irq_desc *desc = irq_to_desc(irq); + struct irqaction *action; + unsigned long flags; + int ret = 1; + + raw_spin_lock_irqsave(&desc->lock, flags); + for_each_action_of_desc(desc, action) { + if ((action != new_action) && action->name && + !strcmp(new_action->name, action->name)) { + ret = 0; + break; + } + } + raw_spin_unlock_irqrestore(&desc->lock, flags); + return ret; +} + +void register_handler_proc(unsigned int irq, struct irqaction *action) +{ + char name [MAX_NAMELEN]; + struct irq_desc *desc = irq_to_desc(irq); + + if (!desc->dir || action->dir || !action->name || + !name_unique(irq, action)) + return; + + snprintf(name, MAX_NAMELEN, "%s", action->name); + + /* create /proc/irq/1234/handler/ */ + action->dir = proc_mkdir(name, desc->dir); +} + +#undef MAX_NAMELEN + +#define MAX_NAMELEN 10 + +void register_irq_proc(unsigned int irq, struct irq_desc *desc) +{ + static DEFINE_MUTEX(register_lock); + void __maybe_unused *irqp = (void *)(unsigned long) irq; + char name [MAX_NAMELEN]; + + if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip)) + return; + + /* + * irq directories are registered only when a handler is + * added, not when the descriptor is created, so multiple + * tasks might try to register at the same time. + */ + mutex_lock(®ister_lock); + + if (desc->dir) + goto out_unlock; + + sprintf(name, "%d", irq); + + /* create /proc/irq/1234 */ + desc->dir = proc_mkdir(name, root_irq_dir); + if (!desc->dir) + goto out_unlock; + +#ifdef CONFIG_SMP + /* create /proc/irq/<irq>/smp_affinity */ + proc_create_data("smp_affinity", 0644, desc->dir, + &irq_affinity_proc_ops, irqp); + + /* create /proc/irq/<irq>/affinity_hint */ + proc_create_single_data("affinity_hint", 0444, desc->dir, + irq_affinity_hint_proc_show, irqp); + + /* create /proc/irq/<irq>/smp_affinity_list */ + proc_create_data("smp_affinity_list", 0644, desc->dir, + &irq_affinity_list_proc_ops, irqp); + + proc_create_single_data("node", 0444, desc->dir, irq_node_proc_show, + irqp); +# ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK + proc_create_single_data("effective_affinity", 0444, desc->dir, + irq_effective_aff_proc_show, irqp); + proc_create_single_data("effective_affinity_list", 0444, desc->dir, + irq_effective_aff_list_proc_show, irqp); +# endif +#endif + proc_create_single_data("spurious", 0444, desc->dir, + irq_spurious_proc_show, (void *)(long)irq); + +out_unlock: + mutex_unlock(®ister_lock); +} + +void unregister_irq_proc(unsigned int irq, struct irq_desc *desc) +{ + char name [MAX_NAMELEN]; + + if (!root_irq_dir || !desc->dir) + return; +#ifdef CONFIG_SMP + remove_proc_entry("smp_affinity", desc->dir); + remove_proc_entry("affinity_hint", desc->dir); + remove_proc_entry("smp_affinity_list", desc->dir); + remove_proc_entry("node", desc->dir); +# ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK + remove_proc_entry("effective_affinity", desc->dir); + remove_proc_entry("effective_affinity_list", desc->dir); +# endif +#endif + remove_proc_entry("spurious", desc->dir); + + sprintf(name, "%u", irq); + remove_proc_entry(name, root_irq_dir); +} + +#undef MAX_NAMELEN + +void unregister_handler_proc(unsigned int irq, struct irqaction *action) +{ + proc_remove(action->dir); +} + +static void register_default_affinity_proc(void) +{ +#ifdef CONFIG_SMP + proc_create("irq/default_smp_affinity", 0644, NULL, + &default_affinity_proc_ops); +#endif +} + +void init_irq_proc(void) +{ + unsigned int irq; + struct irq_desc *desc; + + /* create /proc/irq */ + root_irq_dir = proc_mkdir("irq", NULL); + if (!root_irq_dir) + return; + + register_default_affinity_proc(); + + /* + * Create entries for all existing IRQs. + */ + for_each_irq_desc(irq, desc) + register_irq_proc(irq, desc); +} + +#ifdef CONFIG_GENERIC_IRQ_SHOW + +int __weak arch_show_interrupts(struct seq_file *p, int prec) +{ + return 0; +} + +#ifndef ACTUAL_NR_IRQS +# define ACTUAL_NR_IRQS nr_irqs +#endif + +int show_interrupts(struct seq_file *p, void *v) +{ + static int prec; + + unsigned long flags, any_count = 0; + int i = *(loff_t *) v, j; + struct irqaction *action; + struct irq_desc *desc; + + if (i > ACTUAL_NR_IRQS) + return 0; + + if (i == ACTUAL_NR_IRQS) + return arch_show_interrupts(p, prec); + + /* print header and calculate the width of the first column */ + if (i == 0) { + for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec) + j *= 10; + + seq_printf(p, "%*s", prec + 8, ""); + for_each_online_cpu(j) + seq_printf(p, "CPU%-8d", j); + seq_putc(p, '\n'); + } + + rcu_read_lock(); + desc = irq_to_desc(i); + if (!desc || irq_settings_is_hidden(desc)) + goto outsparse; + + if (desc->kstat_irqs) { + for_each_online_cpu(j) + any_count |= data_race(*per_cpu_ptr(desc->kstat_irqs, j)); + } + + if ((!desc->action || irq_desc_is_chained(desc)) && !any_count) + goto outsparse; + + seq_printf(p, "%*d: ", prec, i); + for_each_online_cpu(j) + seq_printf(p, "%10u ", desc->kstat_irqs ? + *per_cpu_ptr(desc->kstat_irqs, j) : 0); + + raw_spin_lock_irqsave(&desc->lock, flags); + if (desc->irq_data.chip) { + if (desc->irq_data.chip->irq_print_chip) + desc->irq_data.chip->irq_print_chip(&desc->irq_data, p); + else if (desc->irq_data.chip->name) + seq_printf(p, " %8s", desc->irq_data.chip->name); + else + seq_printf(p, " %8s", "-"); + } else { + seq_printf(p, " %8s", "None"); + } + if (desc->irq_data.domain) + seq_printf(p, " %*lu", prec, desc->irq_data.hwirq); + else + seq_printf(p, " %*s", prec, ""); +#ifdef CONFIG_GENERIC_IRQ_SHOW_LEVEL + seq_printf(p, " %-8s", irqd_is_level_type(&desc->irq_data) ? "Level" : "Edge"); +#endif + if (desc->name) + seq_printf(p, "-%-8s", desc->name); + + action = desc->action; + if (action) { + seq_printf(p, " %s", action->name); + while ((action = action->next) != NULL) + seq_printf(p, ", %s", action->name); + } + + seq_putc(p, '\n'); + raw_spin_unlock_irqrestore(&desc->lock, flags); +outsparse: + rcu_read_unlock(); + return 0; +} +#endif diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c new file mode 100644 index 0000000000..5f2c66860a --- /dev/null +++ b/kernel/irq/resend.c @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar + * Copyright (C) 2005-2006, Thomas Gleixner + * + * This file contains the IRQ-resend code + * + * If the interrupt is waiting to be processed, we try to re-run it. + * We can't directly run it from here since the caller might be in an + * interrupt-protected region. Not all irq controller chips can + * retrigger interrupts at the hardware level, so in those cases + * we allow the resending of IRQs via a tasklet. + */ + +#include <linux/irq.h> +#include <linux/module.h> +#include <linux/random.h> +#include <linux/interrupt.h> + +#include "internals.h" + +#ifdef CONFIG_HARDIRQS_SW_RESEND + +/* hlist_head to handle software resend of interrupts: */ +static HLIST_HEAD(irq_resend_list); +static DEFINE_RAW_SPINLOCK(irq_resend_lock); + +/* + * Run software resends of IRQ's + */ +static void resend_irqs(struct tasklet_struct *unused) +{ + struct irq_desc *desc; + + raw_spin_lock_irq(&irq_resend_lock); + while (!hlist_empty(&irq_resend_list)) { + desc = hlist_entry(irq_resend_list.first, struct irq_desc, + resend_node); + hlist_del_init(&desc->resend_node); + raw_spin_unlock(&irq_resend_lock); + desc->handle_irq(desc); + raw_spin_lock(&irq_resend_lock); + } + raw_spin_unlock_irq(&irq_resend_lock); +} + +/* Tasklet to handle resend: */ +static DECLARE_TASKLET(resend_tasklet, resend_irqs); + +static int irq_sw_resend(struct irq_desc *desc) +{ + /* + * Validate whether this interrupt can be safely injected from + * non interrupt context + */ + if (handle_enforce_irqctx(&desc->irq_data)) + return -EINVAL; + + /* + * If the interrupt is running in the thread context of the parent + * irq we need to be careful, because we cannot trigger it + * directly. + */ + if (irq_settings_is_nested_thread(desc)) { + /* + * If the parent_irq is valid, we retrigger the parent, + * otherwise we do nothing. + */ + if (!desc->parent_irq) + return -EINVAL; + + desc = irq_to_desc(desc->parent_irq); + if (!desc) + return -EINVAL; + } + + /* Add to resend_list and activate the softirq: */ + raw_spin_lock(&irq_resend_lock); + if (hlist_unhashed(&desc->resend_node)) + hlist_add_head(&desc->resend_node, &irq_resend_list); + raw_spin_unlock(&irq_resend_lock); + tasklet_schedule(&resend_tasklet); + return 0; +} + +void clear_irq_resend(struct irq_desc *desc) +{ + raw_spin_lock(&irq_resend_lock); + hlist_del_init(&desc->resend_node); + raw_spin_unlock(&irq_resend_lock); +} + +void irq_resend_init(struct irq_desc *desc) +{ + INIT_HLIST_NODE(&desc->resend_node); +} +#else +void clear_irq_resend(struct irq_desc *desc) {} +void irq_resend_init(struct irq_desc *desc) {} + +static int irq_sw_resend(struct irq_desc *desc) +{ + return -EINVAL; +} +#endif + +static int try_retrigger(struct irq_desc *desc) +{ + if (desc->irq_data.chip->irq_retrigger) + return desc->irq_data.chip->irq_retrigger(&desc->irq_data); + +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + return irq_chip_retrigger_hierarchy(&desc->irq_data); +#else + return 0; +#endif +} + +/* + * IRQ resend + * + * Is called with interrupts disabled and desc->lock held. + */ +int check_irq_resend(struct irq_desc *desc, bool inject) +{ + int err = 0; + + /* + * We do not resend level type interrupts. Level type interrupts + * are resent by hardware when they are still active. Clear the + * pending bit so suspend/resume does not get confused. + */ + if (irq_settings_is_level(desc)) { + desc->istate &= ~IRQS_PENDING; + return -EINVAL; + } + + if (desc->istate & IRQS_REPLAY) + return -EBUSY; + + if (!(desc->istate & IRQS_PENDING) && !inject) + return 0; + + desc->istate &= ~IRQS_PENDING; + + if (!try_retrigger(desc)) + err = irq_sw_resend(desc); + + /* If the retrigger was successful, mark it with the REPLAY bit */ + if (!err) + desc->istate |= IRQS_REPLAY; + return err; +} + +#ifdef CONFIG_GENERIC_IRQ_INJECTION +/** + * irq_inject_interrupt - Inject an interrupt for testing/error injection + * @irq: The interrupt number + * + * This function must only be used for debug and testing purposes! + * + * Especially on x86 this can cause a premature completion of an interrupt + * affinity change causing the interrupt line to become stale. Very + * unlikely, but possible. + * + * The injection can fail for various reasons: + * - Interrupt is not activated + * - Interrupt is NMI type or currently replaying + * - Interrupt is level type + * - Interrupt does not support hardware retrigger and software resend is + * either not enabled or not possible for the interrupt. + */ +int irq_inject_interrupt(unsigned int irq) +{ + struct irq_desc *desc; + unsigned long flags; + int err; + + /* Try the state injection hardware interface first */ + if (!irq_set_irqchip_state(irq, IRQCHIP_STATE_PENDING, true)) + return 0; + + /* That failed, try via the resend mechanism */ + desc = irq_get_desc_buslock(irq, &flags, 0); + if (!desc) + return -EINVAL; + + /* + * Only try to inject when the interrupt is: + * - not NMI type + * - activated + */ + if ((desc->istate & IRQS_NMI) || !irqd_is_activated(&desc->irq_data)) + err = -EINVAL; + else + err = check_irq_resend(desc, true); + + irq_put_desc_busunlock(desc, flags); + return err; +} +EXPORT_SYMBOL_GPL(irq_inject_interrupt); +#endif diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h new file mode 100644 index 0000000000..7b7efb1a11 --- /dev/null +++ b/kernel/irq/settings.h @@ -0,0 +1,188 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Internal header to deal with irq_desc->status which will be renamed + * to irq_desc->settings. + */ +enum { + _IRQ_DEFAULT_INIT_FLAGS = IRQ_DEFAULT_INIT_FLAGS, + _IRQ_PER_CPU = IRQ_PER_CPU, + _IRQ_LEVEL = IRQ_LEVEL, + _IRQ_NOPROBE = IRQ_NOPROBE, + _IRQ_NOREQUEST = IRQ_NOREQUEST, + _IRQ_NOTHREAD = IRQ_NOTHREAD, + _IRQ_NOAUTOEN = IRQ_NOAUTOEN, + _IRQ_MOVE_PCNTXT = IRQ_MOVE_PCNTXT, + _IRQ_NO_BALANCING = IRQ_NO_BALANCING, + _IRQ_NESTED_THREAD = IRQ_NESTED_THREAD, + _IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID, + _IRQ_IS_POLLED = IRQ_IS_POLLED, + _IRQ_DISABLE_UNLAZY = IRQ_DISABLE_UNLAZY, + _IRQ_HIDDEN = IRQ_HIDDEN, + _IRQ_NO_DEBUG = IRQ_NO_DEBUG, + _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK, +}; + +#define IRQ_PER_CPU GOT_YOU_MORON +#define IRQ_NO_BALANCING GOT_YOU_MORON +#define IRQ_LEVEL GOT_YOU_MORON +#define IRQ_NOPROBE GOT_YOU_MORON +#define IRQ_NOREQUEST GOT_YOU_MORON +#define IRQ_NOTHREAD GOT_YOU_MORON +#define IRQ_NOAUTOEN GOT_YOU_MORON +#define IRQ_NESTED_THREAD GOT_YOU_MORON +#define IRQ_PER_CPU_DEVID GOT_YOU_MORON +#define IRQ_IS_POLLED GOT_YOU_MORON +#define IRQ_DISABLE_UNLAZY GOT_YOU_MORON +#define IRQ_HIDDEN GOT_YOU_MORON +#define IRQ_NO_DEBUG GOT_YOU_MORON +#undef IRQF_MODIFY_MASK +#define IRQF_MODIFY_MASK GOT_YOU_MORON + +static inline void +irq_settings_clr_and_set(struct irq_desc *desc, u32 clr, u32 set) +{ + desc->status_use_accessors &= ~(clr & _IRQF_MODIFY_MASK); + desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK); +} + +static inline bool irq_settings_is_per_cpu(struct irq_desc *desc) +{ + return desc->status_use_accessors & _IRQ_PER_CPU; +} + +static inline bool irq_settings_is_per_cpu_devid(struct irq_desc *desc) +{ + return desc->status_use_accessors & _IRQ_PER_CPU_DEVID; +} + +static inline void irq_settings_set_per_cpu(struct irq_desc *desc) +{ + desc->status_use_accessors |= _IRQ_PER_CPU; +} + +static inline void irq_settings_set_no_balancing(struct irq_desc *desc) +{ + desc->status_use_accessors |= _IRQ_NO_BALANCING; +} + +static inline bool irq_settings_has_no_balance_set(struct irq_desc *desc) +{ + return desc->status_use_accessors & _IRQ_NO_BALANCING; +} + +static inline u32 irq_settings_get_trigger_mask(struct irq_desc *desc) +{ + return desc->status_use_accessors & IRQ_TYPE_SENSE_MASK; +} + +static inline void +irq_settings_set_trigger_mask(struct irq_desc *desc, u32 mask) +{ + desc->status_use_accessors &= ~IRQ_TYPE_SENSE_MASK; + desc->status_use_accessors |= mask & IRQ_TYPE_SENSE_MASK; +} + +static inline bool irq_settings_is_level(struct irq_desc *desc) +{ + return desc->status_use_accessors & _IRQ_LEVEL; +} + +static inline void irq_settings_clr_level(struct irq_desc *desc) +{ + desc->status_use_accessors &= ~_IRQ_LEVEL; +} + +static inline void irq_settings_set_level(struct irq_desc *desc) +{ + desc->status_use_accessors |= _IRQ_LEVEL; +} + +static inline bool irq_settings_can_request(struct irq_desc *desc) +{ + return !(desc->status_use_accessors & _IRQ_NOREQUEST); +} + +static inline void irq_settings_clr_norequest(struct irq_desc *desc) +{ + desc->status_use_accessors &= ~_IRQ_NOREQUEST; +} + +static inline void irq_settings_set_norequest(struct irq_desc *desc) +{ + desc->status_use_accessors |= _IRQ_NOREQUEST; +} + +static inline bool irq_settings_can_thread(struct irq_desc *desc) +{ + return !(desc->status_use_accessors & _IRQ_NOTHREAD); +} + +static inline void irq_settings_clr_nothread(struct irq_desc *desc) +{ + desc->status_use_accessors &= ~_IRQ_NOTHREAD; +} + +static inline void irq_settings_set_nothread(struct irq_desc *desc) +{ + desc->status_use_accessors |= _IRQ_NOTHREAD; +} + +static inline bool irq_settings_can_probe(struct irq_desc *desc) +{ + return !(desc->status_use_accessors & _IRQ_NOPROBE); +} + +static inline void irq_settings_clr_noprobe(struct irq_desc *desc) +{ + desc->status_use_accessors &= ~_IRQ_NOPROBE; +} + +static inline void irq_settings_set_noprobe(struct irq_desc *desc) +{ + desc->status_use_accessors |= _IRQ_NOPROBE; +} + +static inline bool irq_settings_can_move_pcntxt(struct irq_desc *desc) +{ + return desc->status_use_accessors & _IRQ_MOVE_PCNTXT; +} + +static inline bool irq_settings_can_autoenable(struct irq_desc *desc) +{ + return !(desc->status_use_accessors & _IRQ_NOAUTOEN); +} + +static inline bool irq_settings_is_nested_thread(struct irq_desc *desc) +{ + return desc->status_use_accessors & _IRQ_NESTED_THREAD; +} + +static inline bool irq_settings_is_polled(struct irq_desc *desc) +{ + return desc->status_use_accessors & _IRQ_IS_POLLED; +} + +static inline bool irq_settings_disable_unlazy(struct irq_desc *desc) +{ + return desc->status_use_accessors & _IRQ_DISABLE_UNLAZY; +} + +static inline void irq_settings_clr_disable_unlazy(struct irq_desc *desc) +{ + desc->status_use_accessors &= ~_IRQ_DISABLE_UNLAZY; +} + +static inline bool irq_settings_is_hidden(struct irq_desc *desc) +{ + return desc->status_use_accessors & _IRQ_HIDDEN; +} + +static inline void irq_settings_set_no_debug(struct irq_desc *desc) +{ + desc->status_use_accessors |= _IRQ_NO_DEBUG; +} + +static inline bool irq_settings_no_debug(struct irq_desc *desc) +{ + return desc->status_use_accessors & _IRQ_NO_DEBUG; +} diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c new file mode 100644 index 0000000000..02b2daf074 --- /dev/null +++ b/kernel/irq/spurious.c @@ -0,0 +1,478 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar + * + * This file contains spurious interrupt handling. + */ + +#include <linux/jiffies.h> +#include <linux/irq.h> +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/moduleparam.h> +#include <linux/timer.h> + +#include "internals.h" + +static int irqfixup __read_mostly; + +#define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10) +static void poll_spurious_irqs(struct timer_list *unused); +static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs); +static int irq_poll_cpu; +static atomic_t irq_poll_active; + +/* + * We wait here for a poller to finish. + * + * If the poll runs on this CPU, then we yell loudly and return + * false. That will leave the interrupt line disabled in the worst + * case, but it should never happen. + * + * We wait until the poller is done and then recheck disabled and + * action (about to be disabled). Only if it's still active, we return + * true and let the handler run. + */ +bool irq_wait_for_poll(struct irq_desc *desc) + __must_hold(&desc->lock) +{ + if (WARN_ONCE(irq_poll_cpu == smp_processor_id(), + "irq poll in progress on cpu %d for irq %d\n", + smp_processor_id(), desc->irq_data.irq)) + return false; + +#ifdef CONFIG_SMP + do { + raw_spin_unlock(&desc->lock); + while (irqd_irq_inprogress(&desc->irq_data)) + cpu_relax(); + raw_spin_lock(&desc->lock); + } while (irqd_irq_inprogress(&desc->irq_data)); + /* Might have been disabled in meantime */ + return !irqd_irq_disabled(&desc->irq_data) && desc->action; +#else + return false; +#endif +} + + +/* + * Recovery handler for misrouted interrupts. + */ +static int try_one_irq(struct irq_desc *desc, bool force) +{ + irqreturn_t ret = IRQ_NONE; + struct irqaction *action; + + raw_spin_lock(&desc->lock); + + /* + * PER_CPU, nested thread interrupts and interrupts explicitly + * marked polled are excluded from polling. + */ + if (irq_settings_is_per_cpu(desc) || + irq_settings_is_nested_thread(desc) || + irq_settings_is_polled(desc)) + goto out; + + /* + * Do not poll disabled interrupts unless the spurious + * disabled poller asks explicitly. + */ + if (irqd_irq_disabled(&desc->irq_data) && !force) + goto out; + + /* + * All handlers must agree on IRQF_SHARED, so we test just the + * first. + */ + action = desc->action; + if (!action || !(action->flags & IRQF_SHARED) || + (action->flags & __IRQF_TIMER)) + goto out; + + /* Already running on another processor */ + if (irqd_irq_inprogress(&desc->irq_data)) { + /* + * Already running: If it is shared get the other + * CPU to go looking for our mystery interrupt too + */ + desc->istate |= IRQS_PENDING; + goto out; + } + + /* Mark it poll in progress */ + desc->istate |= IRQS_POLL_INPROGRESS; + do { + if (handle_irq_event(desc) == IRQ_HANDLED) + ret = IRQ_HANDLED; + /* Make sure that there is still a valid action */ + action = desc->action; + } while ((desc->istate & IRQS_PENDING) && action); + desc->istate &= ~IRQS_POLL_INPROGRESS; +out: + raw_spin_unlock(&desc->lock); + return ret == IRQ_HANDLED; +} + +static int misrouted_irq(int irq) +{ + struct irq_desc *desc; + int i, ok = 0; + + if (atomic_inc_return(&irq_poll_active) != 1) + goto out; + + irq_poll_cpu = smp_processor_id(); + + for_each_irq_desc(i, desc) { + if (!i) + continue; + + if (i == irq) /* Already tried */ + continue; + + if (try_one_irq(desc, false)) + ok = 1; + } +out: + atomic_dec(&irq_poll_active); + /* So the caller can adjust the irq error counts */ + return ok; +} + +static void poll_spurious_irqs(struct timer_list *unused) +{ + struct irq_desc *desc; + int i; + + if (atomic_inc_return(&irq_poll_active) != 1) + goto out; + irq_poll_cpu = smp_processor_id(); + + for_each_irq_desc(i, desc) { + unsigned int state; + + if (!i) + continue; + + /* Racy but it doesn't matter */ + state = desc->istate; + barrier(); + if (!(state & IRQS_SPURIOUS_DISABLED)) + continue; + + local_irq_disable(); + try_one_irq(desc, true); + local_irq_enable(); + } +out: + atomic_dec(&irq_poll_active); + mod_timer(&poll_spurious_irq_timer, + jiffies + POLL_SPURIOUS_IRQ_INTERVAL); +} + +static inline int bad_action_ret(irqreturn_t action_ret) +{ + unsigned int r = action_ret; + + if (likely(r <= (IRQ_HANDLED | IRQ_WAKE_THREAD))) + return 0; + return 1; +} + +/* + * If 99,900 of the previous 100,000 interrupts have not been handled + * then assume that the IRQ is stuck in some manner. Drop a diagnostic + * and try to turn the IRQ off. + * + * (The other 100-of-100,000 interrupts may have been a correctly + * functioning device sharing an IRQ with the failing one) + */ +static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret) +{ + unsigned int irq = irq_desc_get_irq(desc); + struct irqaction *action; + unsigned long flags; + + if (bad_action_ret(action_ret)) { + printk(KERN_ERR "irq event %d: bogus return value %x\n", + irq, action_ret); + } else { + printk(KERN_ERR "irq %d: nobody cared (try booting with " + "the \"irqpoll\" option)\n", irq); + } + dump_stack(); + printk(KERN_ERR "handlers:\n"); + + /* + * We need to take desc->lock here. note_interrupt() is called + * w/o desc->lock held, but IRQ_PROGRESS set. We might race + * with something else removing an action. It's ok to take + * desc->lock here. See synchronize_irq(). + */ + raw_spin_lock_irqsave(&desc->lock, flags); + for_each_action_of_desc(desc, action) { + printk(KERN_ERR "[<%p>] %ps", action->handler, action->handler); + if (action->thread_fn) + printk(KERN_CONT " threaded [<%p>] %ps", + action->thread_fn, action->thread_fn); + printk(KERN_CONT "\n"); + } + raw_spin_unlock_irqrestore(&desc->lock, flags); +} + +static void report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret) +{ + static int count = 100; + + if (count > 0) { + count--; + __report_bad_irq(desc, action_ret); + } +} + +static inline int +try_misrouted_irq(unsigned int irq, struct irq_desc *desc, + irqreturn_t action_ret) +{ + struct irqaction *action; + + if (!irqfixup) + return 0; + + /* We didn't actually handle the IRQ - see if it was misrouted? */ + if (action_ret == IRQ_NONE) + return 1; + + /* + * But for 'irqfixup == 2' we also do it for handled interrupts if + * they are marked as IRQF_IRQPOLL (or for irq zero, which is the + * traditional PC timer interrupt.. Legacy) + */ + if (irqfixup < 2) + return 0; + + if (!irq) + return 1; + + /* + * Since we don't get the descriptor lock, "action" can + * change under us. We don't really care, but we don't + * want to follow a NULL pointer. So tell the compiler to + * just load it once by using a barrier. + */ + action = desc->action; + barrier(); + return action && (action->flags & IRQF_IRQPOLL); +} + +#define SPURIOUS_DEFERRED 0x80000000 + +void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret) +{ + unsigned int irq; + + if (desc->istate & IRQS_POLL_INPROGRESS || + irq_settings_is_polled(desc)) + return; + + if (bad_action_ret(action_ret)) { + report_bad_irq(desc, action_ret); + return; + } + + /* + * We cannot call note_interrupt from the threaded handler + * because we need to look at the compound of all handlers + * (primary and threaded). Aside of that in the threaded + * shared case we have no serialization against an incoming + * hardware interrupt while we are dealing with a threaded + * result. + * + * So in case a thread is woken, we just note the fact and + * defer the analysis to the next hardware interrupt. + * + * The threaded handlers store whether they successfully + * handled an interrupt and we check whether that number + * changed versus the last invocation. + * + * We could handle all interrupts with the delayed by one + * mechanism, but for the non forced threaded case we'd just + * add pointless overhead to the straight hardirq interrupts + * for the sake of a few lines less code. + */ + if (action_ret & IRQ_WAKE_THREAD) { + /* + * There is a thread woken. Check whether one of the + * shared primary handlers returned IRQ_HANDLED. If + * not we defer the spurious detection to the next + * interrupt. + */ + if (action_ret == IRQ_WAKE_THREAD) { + int handled; + /* + * We use bit 31 of thread_handled_last to + * denote the deferred spurious detection + * active. No locking necessary as + * thread_handled_last is only accessed here + * and we have the guarantee that hard + * interrupts are not reentrant. + */ + if (!(desc->threads_handled_last & SPURIOUS_DEFERRED)) { + desc->threads_handled_last |= SPURIOUS_DEFERRED; + return; + } + /* + * Check whether one of the threaded handlers + * returned IRQ_HANDLED since the last + * interrupt happened. + * + * For simplicity we just set bit 31, as it is + * set in threads_handled_last as well. So we + * avoid extra masking. And we really do not + * care about the high bits of the handled + * count. We just care about the count being + * different than the one we saw before. + */ + handled = atomic_read(&desc->threads_handled); + handled |= SPURIOUS_DEFERRED; + if (handled != desc->threads_handled_last) { + action_ret = IRQ_HANDLED; + /* + * Note: We keep the SPURIOUS_DEFERRED + * bit set. We are handling the + * previous invocation right now. + * Keep it for the current one, so the + * next hardware interrupt will + * account for it. + */ + desc->threads_handled_last = handled; + } else { + /* + * None of the threaded handlers felt + * responsible for the last interrupt + * + * We keep the SPURIOUS_DEFERRED bit + * set in threads_handled_last as we + * need to account for the current + * interrupt as well. + */ + action_ret = IRQ_NONE; + } + } else { + /* + * One of the primary handlers returned + * IRQ_HANDLED. So we don't care about the + * threaded handlers on the same line. Clear + * the deferred detection bit. + * + * In theory we could/should check whether the + * deferred bit is set and take the result of + * the previous run into account here as + * well. But it's really not worth the + * trouble. If every other interrupt is + * handled we never trigger the spurious + * detector. And if this is just the one out + * of 100k unhandled ones which is handled + * then we merily delay the spurious detection + * by one hard interrupt. Not a real problem. + */ + desc->threads_handled_last &= ~SPURIOUS_DEFERRED; + } + } + + if (unlikely(action_ret == IRQ_NONE)) { + /* + * If we are seeing only the odd spurious IRQ caused by + * bus asynchronicity then don't eventually trigger an error, + * otherwise the counter becomes a doomsday timer for otherwise + * working systems + */ + if (time_after(jiffies, desc->last_unhandled + HZ/10)) + desc->irqs_unhandled = 1; + else + desc->irqs_unhandled++; + desc->last_unhandled = jiffies; + } + + irq = irq_desc_get_irq(desc); + if (unlikely(try_misrouted_irq(irq, desc, action_ret))) { + int ok = misrouted_irq(irq); + if (action_ret == IRQ_NONE) + desc->irqs_unhandled -= ok; + } + + if (likely(!desc->irqs_unhandled)) + return; + + /* Now getting into unhandled irq detection */ + desc->irq_count++; + if (likely(desc->irq_count < 100000)) + return; + + desc->irq_count = 0; + if (unlikely(desc->irqs_unhandled > 99900)) { + /* + * The interrupt is stuck + */ + __report_bad_irq(desc, action_ret); + /* + * Now kill the IRQ + */ + printk(KERN_EMERG "Disabling IRQ #%d\n", irq); + desc->istate |= IRQS_SPURIOUS_DISABLED; + desc->depth++; + irq_disable(desc); + + mod_timer(&poll_spurious_irq_timer, + jiffies + POLL_SPURIOUS_IRQ_INTERVAL); + } + desc->irqs_unhandled = 0; +} + +bool noirqdebug __read_mostly; + +int noirqdebug_setup(char *str) +{ + noirqdebug = 1; + printk(KERN_INFO "IRQ lockup detection disabled\n"); + + return 1; +} + +__setup("noirqdebug", noirqdebug_setup); +module_param(noirqdebug, bool, 0644); +MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true"); + +static int __init irqfixup_setup(char *str) +{ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { + pr_warn("irqfixup boot option not supported with PREEMPT_RT\n"); + return 1; + } + irqfixup = 1; + printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n"); + printk(KERN_WARNING "This may impact system performance.\n"); + + return 1; +} + +__setup("irqfixup", irqfixup_setup); +module_param(irqfixup, int, 0644); + +static int __init irqpoll_setup(char *str) +{ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { + pr_warn("irqpoll boot option not supported with PREEMPT_RT\n"); + return 1; + } + irqfixup = 2; + printk(KERN_WARNING "Misrouted IRQ fixup and polling support " + "enabled\n"); + printk(KERN_WARNING "This may significantly impact system " + "performance\n"); + return 1; +} + +__setup("irqpoll", irqpoll_setup); diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c new file mode 100644 index 0000000000..c43e2ac2f8 --- /dev/null +++ b/kernel/irq/timings.c @@ -0,0 +1,958 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#define pr_fmt(fmt) "irq_timings: " fmt + +#include <linux/kernel.h> +#include <linux/percpu.h> +#include <linux/slab.h> +#include <linux/static_key.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/idr.h> +#include <linux/irq.h> +#include <linux/math64.h> +#include <linux/log2.h> + +#include <trace/events/irq.h> + +#include "internals.h" + +DEFINE_STATIC_KEY_FALSE(irq_timing_enabled); + +DEFINE_PER_CPU(struct irq_timings, irq_timings); + +static DEFINE_IDR(irqt_stats); + +void irq_timings_enable(void) +{ + static_branch_enable(&irq_timing_enabled); +} + +void irq_timings_disable(void) +{ + static_branch_disable(&irq_timing_enabled); +} + +/* + * The main goal of this algorithm is to predict the next interrupt + * occurrence on the current CPU. + * + * Currently, the interrupt timings are stored in a circular array + * buffer every time there is an interrupt, as a tuple: the interrupt + * number and the associated timestamp when the event occurred <irq, + * timestamp>. + * + * For every interrupt occurring in a short period of time, we can + * measure the elapsed time between the occurrences for the same + * interrupt and we end up with a suite of intervals. The experience + * showed the interrupts are often coming following a periodic + * pattern. + * + * The objective of the algorithm is to find out this periodic pattern + * in a fastest way and use its period to predict the next irq event. + * + * When the next interrupt event is requested, we are in the situation + * where the interrupts are disabled and the circular buffer + * containing the timings is filled with the events which happened + * after the previous next-interrupt-event request. + * + * At this point, we read the circular buffer and we fill the irq + * related statistics structure. After this step, the circular array + * containing the timings is empty because all the values are + * dispatched in their corresponding buffers. + * + * Now for each interrupt, we can predict the next event by using the + * suffix array, log interval and exponential moving average + * + * 1. Suffix array + * + * Suffix array is an array of all the suffixes of a string. It is + * widely used as a data structure for compression, text search, ... + * For instance for the word 'banana', the suffixes will be: 'banana' + * 'anana' 'nana' 'ana' 'na' 'a' + * + * Usually, the suffix array is sorted but for our purpose it is + * not necessary and won't provide any improvement in the context of + * the solved problem where we clearly define the boundaries of the + * search by a max period and min period. + * + * The suffix array will build a suite of intervals of different + * length and will look for the repetition of each suite. If the suite + * is repeating then we have the period because it is the length of + * the suite whatever its position in the buffer. + * + * 2. Log interval + * + * We saw the irq timings allow to compute the interval of the + * occurrences for a specific interrupt. We can reasonably assume the + * longer is the interval, the higher is the error for the next event + * and we can consider storing those interval values into an array + * where each slot in the array correspond to an interval at the power + * of 2 of the index. For example, index 12 will contain values + * between 2^11 and 2^12. + * + * At the end we have an array of values where at each index defines a + * [2^index - 1, 2 ^ index] interval values allowing to store a large + * number of values inside a small array. + * + * For example, if we have the value 1123, then we store it at + * ilog2(1123) = 10 index value. + * + * Storing those value at the specific index is done by computing an + * exponential moving average for this specific slot. For instance, + * for values 1800, 1123, 1453, ... fall under the same slot (10) and + * the exponential moving average is computed every time a new value + * is stored at this slot. + * + * 3. Exponential Moving Average + * + * The EMA is largely used to track a signal for stocks or as a low + * pass filter. The magic of the formula, is it is very simple and the + * reactivity of the average can be tuned with the factors called + * alpha. + * + * The higher the alphas are, the faster the average respond to the + * signal change. In our case, if a slot in the array is a big + * interval, we can have numbers with a big difference between + * them. The impact of those differences in the average computation + * can be tuned by changing the alpha value. + * + * + * -- The algorithm -- + * + * We saw the different processing above, now let's see how they are + * used together. + * + * For each interrupt: + * For each interval: + * Compute the index = ilog2(interval) + * Compute a new_ema(buffer[index], interval) + * Store the index in a circular buffer + * + * Compute the suffix array of the indexes + * + * For each suffix: + * If the suffix is reverse-found 3 times + * Return suffix + * + * Return Not found + * + * However we can not have endless suffix array to be build, it won't + * make sense and it will add an extra overhead, so we can restrict + * this to a maximum suffix length of 5 and a minimum suffix length of + * 2. The experience showed 5 is the majority of the maximum pattern + * period found for different devices. + * + * The result is a pattern finding less than 1us for an interrupt. + * + * Example based on real values: + * + * Example 1 : MMC write/read interrupt interval: + * + * 223947, 1240, 1384, 1386, 1386, + * 217416, 1236, 1384, 1386, 1387, + * 214719, 1241, 1386, 1387, 1384, + * 213696, 1234, 1384, 1386, 1388, + * 219904, 1240, 1385, 1389, 1385, + * 212240, 1240, 1386, 1386, 1386, + * 214415, 1236, 1384, 1386, 1387, + * 214276, 1234, 1384, 1388, ? + * + * For each element, apply ilog2(value) + * + * 15, 8, 8, 8, 8, + * 15, 8, 8, 8, 8, + * 15, 8, 8, 8, 8, + * 15, 8, 8, 8, 8, + * 15, 8, 8, 8, 8, + * 15, 8, 8, 8, 8, + * 15, 8, 8, 8, 8, + * 15, 8, 8, 8, ? + * + * Max period of 5, we take the last (max_period * 3) 15 elements as + * we can be confident if the pattern repeats itself three times it is + * a repeating pattern. + * + * 8, + * 15, 8, 8, 8, 8, + * 15, 8, 8, 8, 8, + * 15, 8, 8, 8, ? + * + * Suffixes are: + * + * 1) 8, 15, 8, 8, 8 <- max period + * 2) 8, 15, 8, 8 + * 3) 8, 15, 8 + * 4) 8, 15 <- min period + * + * From there we search the repeating pattern for each suffix. + * + * buffer: 8, 15, 8, 8, 8, 8, 15, 8, 8, 8, 8, 15, 8, 8, 8 + * | | | | | | | | | | | | | | | + * 8, 15, 8, 8, 8 | | | | | | | | | | + * 8, 15, 8, 8, 8 | | | | | + * 8, 15, 8, 8, 8 + * + * When moving the suffix, we found exactly 3 matches. + * + * The first suffix with period 5 is repeating. + * + * The next event is (3 * max_period) % suffix_period + * + * In this example, the result 0, so the next event is suffix[0] => 8 + * + * However, 8 is the index in the array of exponential moving average + * which was calculated on the fly when storing the values, so the + * interval is ema[8] = 1366 + * + * + * Example 2: + * + * 4, 3, 5, 100, + * 3, 3, 5, 117, + * 4, 4, 5, 112, + * 4, 3, 4, 110, + * 3, 5, 3, 117, + * 4, 4, 5, 112, + * 4, 3, 4, 110, + * 3, 4, 5, 112, + * 4, 3, 4, 110 + * + * ilog2 + * + * 0, 0, 0, 4, + * 0, 0, 0, 4, + * 0, 0, 0, 4, + * 0, 0, 0, 4, + * 0, 0, 0, 4, + * 0, 0, 0, 4, + * 0, 0, 0, 4, + * 0, 0, 0, 4, + * 0, 0, 0, 4 + * + * Max period 5: + * 0, 0, 4, + * 0, 0, 0, 4, + * 0, 0, 0, 4, + * 0, 0, 0, 4 + * + * Suffixes: + * + * 1) 0, 0, 4, 0, 0 + * 2) 0, 0, 4, 0 + * 3) 0, 0, 4 + * 4) 0, 0 + * + * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4 + * | | | | | | X + * 0, 0, 4, 0, 0, | X + * 0, 0 + * + * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4 + * | | | | | | | | | | | | | | | + * 0, 0, 4, 0, | | | | | | | | | | | + * 0, 0, 4, 0, | | | | | | | + * 0, 0, 4, 0, | | | + * 0 0 4 + * + * Pattern is found 3 times, the remaining is 1 which results from + * (max_period * 3) % suffix_period. This value is the index in the + * suffix arrays. The suffix array for a period 4 has the value 4 + * at index 1. + */ +#define EMA_ALPHA_VAL 64 +#define EMA_ALPHA_SHIFT 7 + +#define PREDICTION_PERIOD_MIN 3 +#define PREDICTION_PERIOD_MAX 5 +#define PREDICTION_FACTOR 4 +#define PREDICTION_MAX 10 /* 2 ^ PREDICTION_MAX useconds */ +#define PREDICTION_BUFFER_SIZE 16 /* slots for EMAs, hardly more than 16 */ + +/* + * Number of elements in the circular buffer: If it happens it was + * flushed before, then the number of elements could be smaller than + * IRQ_TIMINGS_SIZE, so the count is used, otherwise the array size is + * used as we wrapped. The index begins from zero when we did not + * wrap. That could be done in a nicer way with the proper circular + * array structure type but with the cost of extra computation in the + * interrupt handler hot path. We choose efficiency. + */ +#define for_each_irqts(i, irqts) \ + for (i = irqts->count < IRQ_TIMINGS_SIZE ? \ + 0 : irqts->count & IRQ_TIMINGS_MASK, \ + irqts->count = min(IRQ_TIMINGS_SIZE, \ + irqts->count); \ + irqts->count > 0; irqts->count--, \ + i = (i + 1) & IRQ_TIMINGS_MASK) + +struct irqt_stat { + u64 last_ts; + u64 ema_time[PREDICTION_BUFFER_SIZE]; + int timings[IRQ_TIMINGS_SIZE]; + int circ_timings[IRQ_TIMINGS_SIZE]; + int count; +}; + +/* + * Exponential moving average computation + */ +static u64 irq_timings_ema_new(u64 value, u64 ema_old) +{ + s64 diff; + + if (unlikely(!ema_old)) + return value; + + diff = (value - ema_old) * EMA_ALPHA_VAL; + /* + * We can use a s64 type variable to be added with the u64 + * ema_old variable as this one will never have its topmost + * bit set, it will be always smaller than 2^63 nanosec + * interrupt interval (292 years). + */ + return ema_old + (diff >> EMA_ALPHA_SHIFT); +} + +static int irq_timings_next_event_index(int *buffer, size_t len, int period_max) +{ + int period; + + /* + * Move the beginning pointer to the end minus the max period x 3. + * We are at the point we can begin searching the pattern + */ + buffer = &buffer[len - (period_max * 3)]; + + /* Adjust the length to the maximum allowed period x 3 */ + len = period_max * 3; + + /* + * The buffer contains the suite of intervals, in a ilog2 + * basis, we are looking for a repetition. We point the + * beginning of the search three times the length of the + * period beginning at the end of the buffer. We do that for + * each suffix. + */ + for (period = period_max; period >= PREDICTION_PERIOD_MIN; period--) { + + /* + * The first comparison always succeed because the + * suffix is deduced from the first n-period bytes of + * the buffer and we compare the initial suffix with + * itself, so we can skip the first iteration. + */ + int idx = period; + size_t size = period; + + /* + * We look if the suite with period 'i' repeat + * itself. If it is truncated at the end, as it + * repeats we can use the period to find out the next + * element with the modulo. + */ + while (!memcmp(buffer, &buffer[idx], size * sizeof(int))) { + + /* + * Move the index in a period basis + */ + idx += size; + + /* + * If this condition is reached, all previous + * memcmp were successful, so the period is + * found. + */ + if (idx == len) + return buffer[len % period]; + + /* + * If the remaining elements to compare are + * smaller than the period, readjust the size + * of the comparison for the last iteration. + */ + if (len - idx < period) + size = len - idx; + } + } + + return -1; +} + +static u64 __irq_timings_next_event(struct irqt_stat *irqs, int irq, u64 now) +{ + int index, i, period_max, count, start, min = INT_MAX; + + if ((now - irqs->last_ts) >= NSEC_PER_SEC) { + irqs->count = irqs->last_ts = 0; + return U64_MAX; + } + + /* + * As we want to find three times the repetition, we need a + * number of intervals greater or equal to three times the + * maximum period, otherwise we truncate the max period. + */ + period_max = irqs->count > (3 * PREDICTION_PERIOD_MAX) ? + PREDICTION_PERIOD_MAX : irqs->count / 3; + + /* + * If we don't have enough irq timings for this prediction, + * just bail out. + */ + if (period_max <= PREDICTION_PERIOD_MIN) + return U64_MAX; + + /* + * 'count' will depends if the circular buffer wrapped or not + */ + count = irqs->count < IRQ_TIMINGS_SIZE ? + irqs->count : IRQ_TIMINGS_SIZE; + + start = irqs->count < IRQ_TIMINGS_SIZE ? + 0 : (irqs->count & IRQ_TIMINGS_MASK); + + /* + * Copy the content of the circular buffer into another buffer + * in order to linearize the buffer instead of dealing with + * wrapping indexes and shifted array which will be prone to + * error and extremely difficult to debug. + */ + for (i = 0; i < count; i++) { + int index = (start + i) & IRQ_TIMINGS_MASK; + + irqs->timings[i] = irqs->circ_timings[index]; + min = min_t(int, irqs->timings[i], min); + } + + index = irq_timings_next_event_index(irqs->timings, count, period_max); + if (index < 0) + return irqs->last_ts + irqs->ema_time[min]; + + return irqs->last_ts + irqs->ema_time[index]; +} + +static __always_inline int irq_timings_interval_index(u64 interval) +{ + /* + * The PREDICTION_FACTOR increase the interval size for the + * array of exponential average. + */ + u64 interval_us = (interval >> 10) / PREDICTION_FACTOR; + + return likely(interval_us) ? ilog2(interval_us) : 0; +} + +static __always_inline void __irq_timings_store(int irq, struct irqt_stat *irqs, + u64 interval) +{ + int index; + + /* + * Get the index in the ema table for this interrupt. + */ + index = irq_timings_interval_index(interval); + + if (index > PREDICTION_BUFFER_SIZE - 1) { + irqs->count = 0; + return; + } + + /* + * Store the index as an element of the pattern in another + * circular array. + */ + irqs->circ_timings[irqs->count & IRQ_TIMINGS_MASK] = index; + + irqs->ema_time[index] = irq_timings_ema_new(interval, + irqs->ema_time[index]); + + irqs->count++; +} + +static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts) +{ + u64 old_ts = irqs->last_ts; + u64 interval; + + /* + * The timestamps are absolute time values, we need to compute + * the timing interval between two interrupts. + */ + irqs->last_ts = ts; + + /* + * The interval type is u64 in order to deal with the same + * type in our computation, that prevent mindfuck issues with + * overflow, sign and division. + */ + interval = ts - old_ts; + + /* + * The interrupt triggered more than one second apart, that + * ends the sequence as predictable for our purpose. In this + * case, assume we have the beginning of a sequence and the + * timestamp is the first value. As it is impossible to + * predict anything at this point, return. + * + * Note the first timestamp of the sequence will always fall + * in this test because the old_ts is zero. That is what we + * want as we need another timestamp to compute an interval. + */ + if (interval >= NSEC_PER_SEC) { + irqs->count = 0; + return; + } + + __irq_timings_store(irq, irqs, interval); +} + +/** + * irq_timings_next_event - Return when the next event is supposed to arrive + * + * During the last busy cycle, the number of interrupts is incremented + * and stored in the irq_timings structure. This information is + * necessary to: + * + * - know if the index in the table wrapped up: + * + * If more than the array size interrupts happened during the + * last busy/idle cycle, the index wrapped up and we have to + * begin with the next element in the array which is the last one + * in the sequence, otherwise it is at the index 0. + * + * - have an indication of the interrupts activity on this CPU + * (eg. irq/sec) + * + * The values are 'consumed' after inserting in the statistical model, + * thus the count is reinitialized. + * + * The array of values **must** be browsed in the time direction, the + * timestamp must increase between an element and the next one. + * + * Returns a nanosec time based estimation of the earliest interrupt, + * U64_MAX otherwise. + */ +u64 irq_timings_next_event(u64 now) +{ + struct irq_timings *irqts = this_cpu_ptr(&irq_timings); + struct irqt_stat *irqs; + struct irqt_stat __percpu *s; + u64 ts, next_evt = U64_MAX; + int i, irq = 0; + + /* + * This function must be called with the local irq disabled in + * order to prevent the timings circular buffer to be updated + * while we are reading it. + */ + lockdep_assert_irqs_disabled(); + + if (!irqts->count) + return next_evt; + + /* + * Number of elements in the circular buffer: If it happens it + * was flushed before, then the number of elements could be + * smaller than IRQ_TIMINGS_SIZE, so the count is used, + * otherwise the array size is used as we wrapped. The index + * begins from zero when we did not wrap. That could be done + * in a nicer way with the proper circular array structure + * type but with the cost of extra computation in the + * interrupt handler hot path. We choose efficiency. + * + * Inject measured irq/timestamp to the pattern prediction + * model while decrementing the counter because we consume the + * data from our circular buffer. + */ + for_each_irqts(i, irqts) { + irq = irq_timing_decode(irqts->values[i], &ts); + s = idr_find(&irqt_stats, irq); + if (s) + irq_timings_store(irq, this_cpu_ptr(s), ts); + } + + /* + * Look in the list of interrupts' statistics, the earliest + * next event. + */ + idr_for_each_entry(&irqt_stats, s, i) { + + irqs = this_cpu_ptr(s); + + ts = __irq_timings_next_event(irqs, i, now); + if (ts <= now) + return now; + + if (ts < next_evt) + next_evt = ts; + } + + return next_evt; +} + +void irq_timings_free(int irq) +{ + struct irqt_stat __percpu *s; + + s = idr_find(&irqt_stats, irq); + if (s) { + free_percpu(s); + idr_remove(&irqt_stats, irq); + } +} + +int irq_timings_alloc(int irq) +{ + struct irqt_stat __percpu *s; + int id; + + /* + * Some platforms can have the same private interrupt per cpu, + * so this function may be called several times with the + * same interrupt number. Just bail out in case the per cpu + * stat structure is already allocated. + */ + s = idr_find(&irqt_stats, irq); + if (s) + return 0; + + s = alloc_percpu(*s); + if (!s) + return -ENOMEM; + + idr_preload(GFP_KERNEL); + id = idr_alloc(&irqt_stats, s, irq, irq + 1, GFP_NOWAIT); + idr_preload_end(); + + if (id < 0) { + free_percpu(s); + return id; + } + + return 0; +} + +#ifdef CONFIG_TEST_IRQ_TIMINGS +struct timings_intervals { + u64 *intervals; + size_t count; +}; + +/* + * Intervals are given in nanosecond base + */ +static u64 intervals0[] __initdata = { + 10000, 50000, 200000, 500000, + 10000, 50000, 200000, 500000, + 10000, 50000, 200000, 500000, + 10000, 50000, 200000, 500000, + 10000, 50000, 200000, 500000, + 10000, 50000, 200000, 500000, + 10000, 50000, 200000, 500000, + 10000, 50000, 200000, 500000, + 10000, 50000, 200000, +}; + +static u64 intervals1[] __initdata = { + 223947000, 1240000, 1384000, 1386000, 1386000, + 217416000, 1236000, 1384000, 1386000, 1387000, + 214719000, 1241000, 1386000, 1387000, 1384000, + 213696000, 1234000, 1384000, 1386000, 1388000, + 219904000, 1240000, 1385000, 1389000, 1385000, + 212240000, 1240000, 1386000, 1386000, 1386000, + 214415000, 1236000, 1384000, 1386000, 1387000, + 214276000, 1234000, +}; + +static u64 intervals2[] __initdata = { + 4000, 3000, 5000, 100000, + 3000, 3000, 5000, 117000, + 4000, 4000, 5000, 112000, + 4000, 3000, 4000, 110000, + 3000, 5000, 3000, 117000, + 4000, 4000, 5000, 112000, + 4000, 3000, 4000, 110000, + 3000, 4000, 5000, 112000, + 4000, +}; + +static u64 intervals3[] __initdata = { + 1385000, 212240000, 1240000, + 1386000, 214415000, 1236000, + 1384000, 214276000, 1234000, + 1386000, 214415000, 1236000, + 1385000, 212240000, 1240000, + 1386000, 214415000, 1236000, + 1384000, 214276000, 1234000, + 1386000, 214415000, 1236000, + 1385000, 212240000, 1240000, +}; + +static u64 intervals4[] __initdata = { + 10000, 50000, 10000, 50000, + 10000, 50000, 10000, 50000, + 10000, 50000, 10000, 50000, + 10000, 50000, 10000, 50000, + 10000, 50000, 10000, 50000, + 10000, 50000, 10000, 50000, + 10000, 50000, 10000, 50000, + 10000, 50000, 10000, 50000, + 10000, +}; + +static struct timings_intervals tis[] __initdata = { + { intervals0, ARRAY_SIZE(intervals0) }, + { intervals1, ARRAY_SIZE(intervals1) }, + { intervals2, ARRAY_SIZE(intervals2) }, + { intervals3, ARRAY_SIZE(intervals3) }, + { intervals4, ARRAY_SIZE(intervals4) }, +}; + +static int __init irq_timings_test_next_index(struct timings_intervals *ti) +{ + int _buffer[IRQ_TIMINGS_SIZE]; + int buffer[IRQ_TIMINGS_SIZE]; + int index, start, i, count, period_max; + + count = ti->count - 1; + + period_max = count > (3 * PREDICTION_PERIOD_MAX) ? + PREDICTION_PERIOD_MAX : count / 3; + + /* + * Inject all values except the last one which will be used + * to compare with the next index result. + */ + pr_debug("index suite: "); + + for (i = 0; i < count; i++) { + index = irq_timings_interval_index(ti->intervals[i]); + _buffer[i & IRQ_TIMINGS_MASK] = index; + pr_cont("%d ", index); + } + + start = count < IRQ_TIMINGS_SIZE ? 0 : + count & IRQ_TIMINGS_MASK; + + count = min_t(int, count, IRQ_TIMINGS_SIZE); + + for (i = 0; i < count; i++) { + int index = (start + i) & IRQ_TIMINGS_MASK; + buffer[i] = _buffer[index]; + } + + index = irq_timings_next_event_index(buffer, count, period_max); + i = irq_timings_interval_index(ti->intervals[ti->count - 1]); + + if (index != i) { + pr_err("Expected (%d) and computed (%d) next indexes differ\n", + i, index); + return -EINVAL; + } + + return 0; +} + +static int __init irq_timings_next_index_selftest(void) +{ + int i, ret; + + for (i = 0; i < ARRAY_SIZE(tis); i++) { + + pr_info("---> Injecting intervals number #%d (count=%zd)\n", + i, tis[i].count); + + ret = irq_timings_test_next_index(&tis[i]); + if (ret) + break; + } + + return ret; +} + +static int __init irq_timings_test_irqs(struct timings_intervals *ti) +{ + struct irqt_stat __percpu *s; + struct irqt_stat *irqs; + int i, index, ret, irq = 0xACE5; + + ret = irq_timings_alloc(irq); + if (ret) { + pr_err("Failed to allocate irq timings\n"); + return ret; + } + + s = idr_find(&irqt_stats, irq); + if (!s) { + ret = -EIDRM; + goto out; + } + + irqs = this_cpu_ptr(s); + + for (i = 0; i < ti->count; i++) { + + index = irq_timings_interval_index(ti->intervals[i]); + pr_debug("%d: interval=%llu ema_index=%d\n", + i, ti->intervals[i], index); + + __irq_timings_store(irq, irqs, ti->intervals[i]); + if (irqs->circ_timings[i & IRQ_TIMINGS_MASK] != index) { + ret = -EBADSLT; + pr_err("Failed to store in the circular buffer\n"); + goto out; + } + } + + if (irqs->count != ti->count) { + ret = -ERANGE; + pr_err("Count differs\n"); + goto out; + } + + ret = 0; +out: + irq_timings_free(irq); + + return ret; +} + +static int __init irq_timings_irqs_selftest(void) +{ + int i, ret; + + for (i = 0; i < ARRAY_SIZE(tis); i++) { + pr_info("---> Injecting intervals number #%d (count=%zd)\n", + i, tis[i].count); + ret = irq_timings_test_irqs(&tis[i]); + if (ret) + break; + } + + return ret; +} + +static int __init irq_timings_test_irqts(struct irq_timings *irqts, + unsigned count) +{ + int start = count >= IRQ_TIMINGS_SIZE ? count - IRQ_TIMINGS_SIZE : 0; + int i, irq, oirq = 0xBEEF; + u64 ots = 0xDEAD, ts; + + /* + * Fill the circular buffer by using the dedicated function. + */ + for (i = 0; i < count; i++) { + pr_debug("%d: index=%d, ts=%llX irq=%X\n", + i, i & IRQ_TIMINGS_MASK, ots + i, oirq + i); + + irq_timings_push(ots + i, oirq + i); + } + + /* + * Compute the first elements values after the index wrapped + * up or not. + */ + ots += start; + oirq += start; + + /* + * Test the circular buffer count is correct. + */ + pr_debug("---> Checking timings array count (%d) is right\n", count); + if (WARN_ON(irqts->count != count)) + return -EINVAL; + + /* + * Test the macro allowing to browse all the irqts. + */ + pr_debug("---> Checking the for_each_irqts() macro\n"); + for_each_irqts(i, irqts) { + + irq = irq_timing_decode(irqts->values[i], &ts); + + pr_debug("index=%d, ts=%llX / %llX, irq=%X / %X\n", + i, ts, ots, irq, oirq); + + if (WARN_ON(ts != ots || irq != oirq)) + return -EINVAL; + + ots++; oirq++; + } + + /* + * The circular buffer should have be flushed when browsed + * with for_each_irqts + */ + pr_debug("---> Checking timings array is empty after browsing it\n"); + if (WARN_ON(irqts->count)) + return -EINVAL; + + return 0; +} + +static int __init irq_timings_irqts_selftest(void) +{ + struct irq_timings *irqts = this_cpu_ptr(&irq_timings); + int i, ret; + + /* + * Test the circular buffer with different number of + * elements. The purpose is to test at the limits (empty, half + * full, full, wrapped with the cursor at the boundaries, + * wrapped several times, etc ... + */ + int count[] = { 0, + IRQ_TIMINGS_SIZE >> 1, + IRQ_TIMINGS_SIZE, + IRQ_TIMINGS_SIZE + (IRQ_TIMINGS_SIZE >> 1), + 2 * IRQ_TIMINGS_SIZE, + (2 * IRQ_TIMINGS_SIZE) + 3, + }; + + for (i = 0; i < ARRAY_SIZE(count); i++) { + + pr_info("---> Checking the timings with %d/%d values\n", + count[i], IRQ_TIMINGS_SIZE); + + ret = irq_timings_test_irqts(irqts, count[i]); + if (ret) + break; + } + + return ret; +} + +static int __init irq_timings_selftest(void) +{ + int ret; + + pr_info("------------------- selftest start -----------------\n"); + + /* + * At this point, we don't except any subsystem to use the irq + * timings but us, so it should not be enabled. + */ + if (static_branch_unlikely(&irq_timing_enabled)) { + pr_warn("irq timings already initialized, skipping selftest\n"); + return 0; + } + + ret = irq_timings_irqts_selftest(); + if (ret) + goto out; + + ret = irq_timings_irqs_selftest(); + if (ret) + goto out; + + ret = irq_timings_next_index_selftest(); +out: + pr_info("---------- selftest end with %s -----------\n", + ret ? "failure" : "success"); + + return ret; +} +early_initcall(irq_timings_selftest); +#endif diff --git a/kernel/irq_work.c b/kernel/irq_work.c new file mode 100644 index 0000000000..2f4fb336dd --- /dev/null +++ b/kernel/irq_work.c @@ -0,0 +1,327 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra + * + * Provides a framework for enqueueing and running callbacks from hardirq + * context. The enqueueing is NMI-safe. + */ + +#include <linux/bug.h> +#include <linux/kernel.h> +#include <linux/export.h> +#include <linux/irq_work.h> +#include <linux/percpu.h> +#include <linux/hardirq.h> +#include <linux/irqflags.h> +#include <linux/sched.h> +#include <linux/tick.h> +#include <linux/cpu.h> +#include <linux/notifier.h> +#include <linux/smp.h> +#include <linux/smpboot.h> +#include <asm/processor.h> +#include <linux/kasan.h> + +#include <trace/events/ipi.h> + +static DEFINE_PER_CPU(struct llist_head, raised_list); +static DEFINE_PER_CPU(struct llist_head, lazy_list); +static DEFINE_PER_CPU(struct task_struct *, irq_workd); + +static void wake_irq_workd(void) +{ + struct task_struct *tsk = __this_cpu_read(irq_workd); + + if (!llist_empty(this_cpu_ptr(&lazy_list)) && tsk) + wake_up_process(tsk); +} + +#ifdef CONFIG_SMP +static void irq_work_wake(struct irq_work *entry) +{ + wake_irq_workd(); +} + +static DEFINE_PER_CPU(struct irq_work, irq_work_wakeup) = + IRQ_WORK_INIT_HARD(irq_work_wake); +#endif + +static int irq_workd_should_run(unsigned int cpu) +{ + return !llist_empty(this_cpu_ptr(&lazy_list)); +} + +/* + * Claim the entry so that no one else will poke at it. + */ +static bool irq_work_claim(struct irq_work *work) +{ + int oflags; + + oflags = atomic_fetch_or(IRQ_WORK_CLAIMED | CSD_TYPE_IRQ_WORK, &work->node.a_flags); + /* + * If the work is already pending, no need to raise the IPI. + * The pairing smp_mb() in irq_work_single() makes sure + * everything we did before is visible. + */ + if (oflags & IRQ_WORK_PENDING) + return false; + return true; +} + +void __weak arch_irq_work_raise(void) +{ + /* + * Lame architectures will get the timer tick callback + */ +} + +static __always_inline void irq_work_raise(struct irq_work *work) +{ + if (trace_ipi_send_cpu_enabled() && arch_irq_work_has_interrupt()) + trace_ipi_send_cpu(smp_processor_id(), _RET_IP_, work->func); + + arch_irq_work_raise(); +} + +/* Enqueue on current CPU, work must already be claimed and preempt disabled */ +static void __irq_work_queue_local(struct irq_work *work) +{ + struct llist_head *list; + bool rt_lazy_work = false; + bool lazy_work = false; + int work_flags; + + work_flags = atomic_read(&work->node.a_flags); + if (work_flags & IRQ_WORK_LAZY) + lazy_work = true; + else if (IS_ENABLED(CONFIG_PREEMPT_RT) && + !(work_flags & IRQ_WORK_HARD_IRQ)) + rt_lazy_work = true; + + if (lazy_work || rt_lazy_work) + list = this_cpu_ptr(&lazy_list); + else + list = this_cpu_ptr(&raised_list); + + if (!llist_add(&work->node.llist, list)) + return; + + /* If the work is "lazy", handle it from next tick if any */ + if (!lazy_work || tick_nohz_tick_stopped()) + irq_work_raise(work); +} + +/* Enqueue the irq work @work on the current CPU */ +bool irq_work_queue(struct irq_work *work) +{ + /* Only queue if not already pending */ + if (!irq_work_claim(work)) + return false; + + /* Queue the entry and raise the IPI if needed. */ + preempt_disable(); + __irq_work_queue_local(work); + preempt_enable(); + + return true; +} +EXPORT_SYMBOL_GPL(irq_work_queue); + +/* + * Enqueue the irq_work @work on @cpu unless it's already pending + * somewhere. + * + * Can be re-enqueued while the callback is still in progress. + */ +bool irq_work_queue_on(struct irq_work *work, int cpu) +{ +#ifndef CONFIG_SMP + return irq_work_queue(work); + +#else /* CONFIG_SMP: */ + /* All work should have been flushed before going offline */ + WARN_ON_ONCE(cpu_is_offline(cpu)); + + /* Only queue if not already pending */ + if (!irq_work_claim(work)) + return false; + + kasan_record_aux_stack_noalloc(work); + + preempt_disable(); + if (cpu != smp_processor_id()) { + /* Arch remote IPI send/receive backend aren't NMI safe */ + WARN_ON_ONCE(in_nmi()); + + /* + * On PREEMPT_RT the items which are not marked as + * IRQ_WORK_HARD_IRQ are added to the lazy list and a HARD work + * item is used on the remote CPU to wake the thread. + */ + if (IS_ENABLED(CONFIG_PREEMPT_RT) && + !(atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ)) { + + if (!llist_add(&work->node.llist, &per_cpu(lazy_list, cpu))) + goto out; + + work = &per_cpu(irq_work_wakeup, cpu); + if (!irq_work_claim(work)) + goto out; + } + + __smp_call_single_queue(cpu, &work->node.llist); + } else { + __irq_work_queue_local(work); + } +out: + preempt_enable(); + + return true; +#endif /* CONFIG_SMP */ +} + +bool irq_work_needs_cpu(void) +{ + struct llist_head *raised, *lazy; + + raised = this_cpu_ptr(&raised_list); + lazy = this_cpu_ptr(&lazy_list); + + if (llist_empty(raised) || arch_irq_work_has_interrupt()) + if (llist_empty(lazy)) + return false; + + /* All work should have been flushed before going offline */ + WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); + + return true; +} + +void irq_work_single(void *arg) +{ + struct irq_work *work = arg; + int flags; + + /* + * Clear the PENDING bit, after this point the @work can be re-used. + * The PENDING bit acts as a lock, and we own it, so we can clear it + * without atomic ops. + */ + flags = atomic_read(&work->node.a_flags); + flags &= ~IRQ_WORK_PENDING; + atomic_set(&work->node.a_flags, flags); + + /* + * See irq_work_claim(). + */ + smp_mb(); + + lockdep_irq_work_enter(flags); + work->func(work); + lockdep_irq_work_exit(flags); + + /* + * Clear the BUSY bit, if set, and return to the free state if no-one + * else claimed it meanwhile. + */ + (void)atomic_cmpxchg(&work->node.a_flags, flags, flags & ~IRQ_WORK_BUSY); + + if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) || + !arch_irq_work_has_interrupt()) + rcuwait_wake_up(&work->irqwait); +} + +static void irq_work_run_list(struct llist_head *list) +{ + struct irq_work *work, *tmp; + struct llist_node *llnode; + + /* + * On PREEMPT_RT IRQ-work which is not marked as HARD will be processed + * in a per-CPU thread in preemptible context. Only the items which are + * marked as IRQ_WORK_HARD_IRQ will be processed in hardirq context. + */ + BUG_ON(!irqs_disabled() && !IS_ENABLED(CONFIG_PREEMPT_RT)); + + if (llist_empty(list)) + return; + + llnode = llist_del_all(list); + llist_for_each_entry_safe(work, tmp, llnode, node.llist) + irq_work_single(work); +} + +/* + * hotplug calls this through: + * hotplug_cfd() -> flush_smp_call_function_queue() + */ +void irq_work_run(void) +{ + irq_work_run_list(this_cpu_ptr(&raised_list)); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + irq_work_run_list(this_cpu_ptr(&lazy_list)); + else + wake_irq_workd(); +} +EXPORT_SYMBOL_GPL(irq_work_run); + +void irq_work_tick(void) +{ + struct llist_head *raised = this_cpu_ptr(&raised_list); + + if (!llist_empty(raised) && !arch_irq_work_has_interrupt()) + irq_work_run_list(raised); + + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + irq_work_run_list(this_cpu_ptr(&lazy_list)); + else + wake_irq_workd(); +} + +/* + * Synchronize against the irq_work @entry, ensures the entry is not + * currently in use. + */ +void irq_work_sync(struct irq_work *work) +{ + lockdep_assert_irqs_enabled(); + might_sleep(); + + if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) || + !arch_irq_work_has_interrupt()) { + rcuwait_wait_event(&work->irqwait, !irq_work_is_busy(work), + TASK_UNINTERRUPTIBLE); + return; + } + + while (irq_work_is_busy(work)) + cpu_relax(); +} +EXPORT_SYMBOL_GPL(irq_work_sync); + +static void run_irq_workd(unsigned int cpu) +{ + irq_work_run_list(this_cpu_ptr(&lazy_list)); +} + +static void irq_workd_setup(unsigned int cpu) +{ + sched_set_fifo_low(current); +} + +static struct smp_hotplug_thread irqwork_threads = { + .store = &irq_workd, + .setup = irq_workd_setup, + .thread_should_run = irq_workd_should_run, + .thread_fn = run_irq_workd, + .thread_comm = "irq_work/%u", +}; + +static __init int irq_work_init_threads(void) +{ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + BUG_ON(smpboot_register_percpu_thread(&irqwork_threads)); + return 0; +} +early_initcall(irq_work_init_threads); |