diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 10:10:18 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 10:10:18 +0000 |
commit | c6fca85f2153de8b2451269de43abcc2aa0213ab (patch) | |
tree | ba8548f2df113c7da8a46563947536f455f9a1c0 | |
parent | Adding debian version 6.1.76-1. (diff) | |
download | linux-c6fca85f2153de8b2451269de43abcc2aa0213ab.tar.xz linux-c6fca85f2153de8b2451269de43abcc2aa0213ab.zip |
Merging upstream version 6.1.82.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
991 files changed, 12200 insertions, 9561 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-net-queues b/Documentation/ABI/testing/sysfs-class-net-queues index 978b76358..40d5aab84 100644 --- a/Documentation/ABI/testing/sysfs-class-net-queues +++ b/Documentation/ABI/testing/sysfs-class-net-queues @@ -1,4 +1,4 @@ -What: /sys/class/<iface>/queues/rx-<queue>/rps_cpus +What: /sys/class/net/<iface>/queues/rx-<queue>/rps_cpus Date: March 2010 KernelVersion: 2.6.35 Contact: netdev@vger.kernel.org @@ -8,7 +8,7 @@ Description: network device queue. Possible values depend on the number of available CPU(s) in the system. -What: /sys/class/<iface>/queues/rx-<queue>/rps_flow_cnt +What: /sys/class/net/<iface>/queues/rx-<queue>/rps_flow_cnt Date: April 2010 KernelVersion: 2.6.35 Contact: netdev@vger.kernel.org @@ -16,7 +16,7 @@ Description: Number of Receive Packet Steering flows being currently processed by this particular network device receive queue. -What: /sys/class/<iface>/queues/tx-<queue>/tx_timeout +What: /sys/class/net/<iface>/queues/tx-<queue>/tx_timeout Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -24,7 +24,7 @@ Description: Indicates the number of transmit timeout events seen by this network interface transmit queue. -What: /sys/class/<iface>/queues/tx-<queue>/tx_maxrate +What: /sys/class/net/<iface>/queues/tx-<queue>/tx_maxrate Date: March 2015 KernelVersion: 4.1 Contact: netdev@vger.kernel.org @@ -32,7 +32,7 @@ Description: A Mbps max-rate set for the queue, a value of zero means disabled, default is disabled. -What: /sys/class/<iface>/queues/tx-<queue>/xps_cpus +What: /sys/class/net/<iface>/queues/tx-<queue>/xps_cpus Date: November 2010 KernelVersion: 2.6.38 Contact: netdev@vger.kernel.org @@ -42,7 +42,7 @@ Description: network device transmit queue. Possible vaules depend on the number of available CPU(s) in the system. -What: /sys/class/<iface>/queues/tx-<queue>/xps_rxqs +What: /sys/class/net/<iface>/queues/tx-<queue>/xps_rxqs Date: June 2018 KernelVersion: 4.18.0 Contact: netdev@vger.kernel.org @@ -53,7 +53,7 @@ Description: number of available receive queue(s) in the network device. Default is disabled. -What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/hold_time +What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/hold_time Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -62,7 +62,7 @@ Description: of this particular network device transmit queue. Default value is 1000. -What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/inflight +What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/inflight Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -70,7 +70,7 @@ Description: Indicates the number of bytes (objects) in flight on this network device transmit queue. -What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/limit +What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/limit Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -79,7 +79,7 @@ Description: on this network device transmit queue. This value is clamped to be within the bounds defined by limit_max and limit_min. -What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/limit_max +What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/limit_max Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -88,7 +88,7 @@ Description: queued on this network device transmit queue. See include/linux/dynamic_queue_limits.h for the default value. -What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/limit_min +What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/limit_min Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org diff --git a/Documentation/ABI/testing/sysfs-class-net-statistics b/Documentation/ABI/testing/sysfs-class-net-statistics index 55db27815..53e508c69 100644 --- a/Documentation/ABI/testing/sysfs-class-net-statistics +++ b/Documentation/ABI/testing/sysfs-class-net-statistics @@ -1,4 +1,4 @@ -What: /sys/class/<iface>/statistics/collisions +What: /sys/class/net/<iface>/statistics/collisions Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -6,7 +6,7 @@ Description: Indicates the number of collisions seen by this network device. This value might not be relevant with all MAC layers. -What: /sys/class/<iface>/statistics/multicast +What: /sys/class/net/<iface>/statistics/multicast Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -14,7 +14,7 @@ Description: Indicates the number of multicast packets received by this network device. -What: /sys/class/<iface>/statistics/rx_bytes +What: /sys/class/net/<iface>/statistics/rx_bytes Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -23,7 +23,7 @@ Description: See the network driver for the exact meaning of when this value is incremented. -What: /sys/class/<iface>/statistics/rx_compressed +What: /sys/class/net/<iface>/statistics/rx_compressed Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -32,7 +32,7 @@ Description: network device. This value might only be relevant for interfaces that support packet compression (e.g: PPP). -What: /sys/class/<iface>/statistics/rx_crc_errors +What: /sys/class/net/<iface>/statistics/rx_crc_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -41,7 +41,7 @@ Description: by this network device. Note that the specific meaning might depend on the MAC layer used by the interface. -What: /sys/class/<iface>/statistics/rx_dropped +What: /sys/class/net/<iface>/statistics/rx_dropped Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -51,7 +51,7 @@ Description: packet processing. See the network driver for the exact meaning of this value. -What: /sys/class/<iface>/statistics/rx_errors +What: /sys/class/net/<iface>/statistics/rx_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -59,7 +59,7 @@ Description: Indicates the number of receive errors on this network device. See the network driver for the exact meaning of this value. -What: /sys/class/<iface>/statistics/rx_fifo_errors +What: /sys/class/net/<iface>/statistics/rx_fifo_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -68,7 +68,7 @@ Description: network device. See the network driver for the exact meaning of this value. -What: /sys/class/<iface>/statistics/rx_frame_errors +What: /sys/class/net/<iface>/statistics/rx_frame_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -78,7 +78,7 @@ Description: on the MAC layer protocol used. See the network driver for the exact meaning of this value. -What: /sys/class/<iface>/statistics/rx_length_errors +What: /sys/class/net/<iface>/statistics/rx_length_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -87,7 +87,7 @@ Description: error, oversized or undersized. See the network driver for the exact meaning of this value. -What: /sys/class/<iface>/statistics/rx_missed_errors +What: /sys/class/net/<iface>/statistics/rx_missed_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -96,7 +96,7 @@ Description: due to lack of capacity in the receive side. See the network driver for the exact meaning of this value. -What: /sys/class/<iface>/statistics/rx_nohandler +What: /sys/class/net/<iface>/statistics/rx_nohandler Date: February 2016 KernelVersion: 4.6 Contact: netdev@vger.kernel.org @@ -104,7 +104,7 @@ Description: Indicates the number of received packets that were dropped on an inactive device by the network core. -What: /sys/class/<iface>/statistics/rx_over_errors +What: /sys/class/net/<iface>/statistics/rx_over_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -114,7 +114,7 @@ Description: (e.g: larger than MTU). See the network driver for the exact meaning of this value. -What: /sys/class/<iface>/statistics/rx_packets +What: /sys/class/net/<iface>/statistics/rx_packets Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -122,7 +122,7 @@ Description: Indicates the total number of good packets received by this network device. -What: /sys/class/<iface>/statistics/tx_aborted_errors +What: /sys/class/net/<iface>/statistics/tx_aborted_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -132,7 +132,7 @@ Description: a medium collision). See the network driver for the exact meaning of this value. -What: /sys/class/<iface>/statistics/tx_bytes +What: /sys/class/net/<iface>/statistics/tx_bytes Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -143,7 +143,7 @@ Description: transmitted packets or all packets that have been queued for transmission. -What: /sys/class/<iface>/statistics/tx_carrier_errors +What: /sys/class/net/<iface>/statistics/tx_carrier_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -152,7 +152,7 @@ Description: because of carrier errors (e.g: physical link down). See the network driver for the exact meaning of this value. -What: /sys/class/<iface>/statistics/tx_compressed +What: /sys/class/net/<iface>/statistics/tx_compressed Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -161,7 +161,7 @@ Description: this might only be relevant for devices that support compression (e.g: PPP). -What: /sys/class/<iface>/statistics/tx_dropped +What: /sys/class/net/<iface>/statistics/tx_dropped Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -170,7 +170,7 @@ Description: See the driver for the exact reasons as to why the packets were dropped. -What: /sys/class/<iface>/statistics/tx_errors +What: /sys/class/net/<iface>/statistics/tx_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -179,7 +179,7 @@ Description: a network device. See the driver for the exact reasons as to why the packets were dropped. -What: /sys/class/<iface>/statistics/tx_fifo_errors +What: /sys/class/net/<iface>/statistics/tx_fifo_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -188,7 +188,7 @@ Description: FIFO error. See the driver for the exact reasons as to why the packets were dropped. -What: /sys/class/<iface>/statistics/tx_heartbeat_errors +What: /sys/class/net/<iface>/statistics/tx_heartbeat_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -197,7 +197,7 @@ Description: reported as heartbeat errors. See the driver for the exact reasons as to why the packets were dropped. -What: /sys/class/<iface>/statistics/tx_packets +What: /sys/class/net/<iface>/statistics/tx_packets Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -206,7 +206,7 @@ Description: device. See the driver for whether this reports the number of all attempted or successful transmissions. -What: /sys/class/<iface>/statistics/tx_window_errors +What: /sys/class/net/<iface>/statistics/tx_window_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 13c01b641..78c26280c 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -519,6 +519,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/mds /sys/devices/system/cpu/vulnerabilities/meltdown /sys/devices/system/cpu/vulnerabilities/mmio_stale_data + /sys/devices/system/cpu/vulnerabilities/reg_file_data_sampling /sys/devices/system/cpu/vulnerabilities/retbleed /sys/devices/system/cpu/vulnerabilities/spec_store_bypass /sys/devices/system/cpu/vulnerabilities/spectre_v1 diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index 6828102ba..3e4a14e38 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -21,3 +21,4 @@ are configurable at compile, boot or run time. cross-thread-rsb.rst gather_data_sampling.rst srso + reg-file-data-sampling diff --git a/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst b/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst new file mode 100644 index 000000000..0585d02b9 --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst @@ -0,0 +1,104 @@ +================================== +Register File Data Sampling (RFDS) +================================== + +Register File Data Sampling (RFDS) is a microarchitectural vulnerability that +only affects Intel Atom parts(also branded as E-cores). RFDS may allow +a malicious actor to infer data values previously used in floating point +registers, vector registers, or integer registers. RFDS does not provide the +ability to choose which data is inferred. CVE-2023-28746 is assigned to RFDS. + +Affected Processors +=================== +Below is the list of affected Intel processors [#f1]_: + + =================== ============ + Common name Family_Model + =================== ============ + ATOM_GOLDMONT 06_5CH + ATOM_GOLDMONT_D 06_5FH + ATOM_GOLDMONT_PLUS 06_7AH + ATOM_TREMONT_D 06_86H + ATOM_TREMONT 06_96H + ALDERLAKE 06_97H + ALDERLAKE_L 06_9AH + ATOM_TREMONT_L 06_9CH + RAPTORLAKE 06_B7H + RAPTORLAKE_P 06_BAH + ATOM_GRACEMONT 06_BEH + RAPTORLAKE_S 06_BFH + =================== ============ + +As an exception to this table, Intel Xeon E family parts ALDERLAKE(06_97H) and +RAPTORLAKE(06_B7H) codenamed Catlow are not affected. They are reported as +vulnerable in Linux because they share the same family/model with an affected +part. Unlike their affected counterparts, they do not enumerate RFDS_CLEAR or +CPUID.HYBRID. This information could be used to distinguish between the +affected and unaffected parts, but it is deemed not worth adding complexity as +the reporting is fixed automatically when these parts enumerate RFDS_NO. + +Mitigation +========== +Intel released a microcode update that enables software to clear sensitive +information using the VERW instruction. Like MDS, RFDS deploys the same +mitigation strategy to force the CPU to clear the affected buffers before an +attacker can extract the secrets. This is achieved by using the otherwise +unused and obsolete VERW instruction in combination with a microcode update. +The microcode clears the affected CPU buffers when the VERW instruction is +executed. + +Mitigation points +----------------- +VERW is executed by the kernel before returning to user space, and by KVM +before VMentry. None of the affected cores support SMT, so VERW is not required +at C-state transitions. + +New bits in IA32_ARCH_CAPABILITIES +---------------------------------- +Newer processors and microcode update on existing affected processors added new +bits to IA32_ARCH_CAPABILITIES MSR. These bits can be used to enumerate +vulnerability and mitigation capability: + +- Bit 27 - RFDS_NO - When set, processor is not affected by RFDS. +- Bit 28 - RFDS_CLEAR - When set, processor is affected by RFDS, and has the + microcode that clears the affected buffers on VERW execution. + +Mitigation control on the kernel command line +--------------------------------------------- +The kernel command line allows to control RFDS mitigation at boot time with the +parameter "reg_file_data_sampling=". The valid arguments are: + + ========== ================================================================= + on If the CPU is vulnerable, enable mitigation; CPU buffer clearing + on exit to userspace and before entering a VM. + off Disables mitigation. + ========== ================================================================= + +Mitigation default is selected by CONFIG_MITIGATION_RFDS. + +Mitigation status information +----------------------------- +The Linux kernel provides a sysfs interface to enumerate the current +vulnerability status of the system: whether the system is vulnerable, and +which mitigations are active. The relevant sysfs file is: + + /sys/devices/system/cpu/vulnerabilities/reg_file_data_sampling + +The possible values in this file are: + + .. list-table:: + + * - 'Not affected' + - The processor is not vulnerable + * - 'Vulnerable' + - The processor is vulnerable, but no mitigation enabled + * - 'Vulnerable: No microcode' + - The processor is vulnerable but microcode is not updated. + * - 'Mitigation: Clear Register File' + - The processor is vulnerable and the CPU buffer clearing mitigation is + enabled. + +References +---------- +.. [#f1] Affected Processors + https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 4ad60e127..2dfe75104 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1107,6 +1107,26 @@ The filter can be disabled or changed to another driver later using sysfs. + reg_file_data_sampling= + [X86] Controls mitigation for Register File Data + Sampling (RFDS) vulnerability. RFDS is a CPU + vulnerability which may allow userspace to infer + kernel data values previously stored in floating point + registers, vector registers, or integer registers. + RFDS only affects Intel Atom processors. + + on: Turns ON the mitigation. + off: Turns OFF the mitigation. + + This parameter overrides the compile time default set + by CONFIG_MITIGATION_RFDS. Mitigation cannot be + disabled when other VERW based mitigations (like MDS) + are enabled. In order to disable RFDS mitigation all + VERW based mitigations need to be disabled. + + For details see: + Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst + driver_async_probe= [KNL] List of driver names to be probed asynchronously. * matches with all driver names. If * is specified, the @@ -3262,6 +3282,7 @@ nospectre_bhb [ARM64] nospectre_v1 [X86,PPC] nospectre_v2 [X86,PPC,S390,ARM64] + reg_file_data_sampling=off [X86] retbleed=off [X86] spec_store_bypass_disable=off [X86,PPC] spectre_v2_user=off [X86] diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index d9fce65b2..27135b9c0 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -221,3 +221,10 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Fujitsu | A64FX | E#010001 | FUJITSU_ERRATUM_010001 | +----------------+-----------------+-----------------+-----------------------------+ ++----------------+-----------------+-----------------+-----------------------------+ +| Microsoft | Azure Cobalt 100| #2139208 | ARM64_ERRATUM_2139208 | ++----------------+-----------------+-----------------+-----------------------------+ +| Microsoft | Azure Cobalt 100| #2067961 | ARM64_ERRATUM_2067961 | ++----------------+-----------------+-----------------+-----------------------------+ +| Microsoft | Azure Cobalt 100| #2253138 | ARM64_ERRATUM_2253138 | ++----------------+-----------------+-----------------+-----------------------------+ diff --git a/Documentation/devicetree/bindings/net/wireless/marvell-8xxx.txt b/Documentation/devicetree/bindings/net/wireless/marvell-8xxx.txt index 9bf9bbac1..cdc303caf 100644 --- a/Documentation/devicetree/bindings/net/wireless/marvell-8xxx.txt +++ b/Documentation/devicetree/bindings/net/wireless/marvell-8xxx.txt @@ -1,4 +1,4 @@ -Marvell 8787/8897/8997 (sd8787/sd8897/sd8997/pcie8997) SDIO/PCIE devices +Marvell 8787/8897/8978/8997 (sd8787/sd8897/sd8978/sd8997/pcie8997) SDIO/PCIE devices ------ This node provides properties for controlling the Marvell SDIO/PCIE wireless device. @@ -10,7 +10,9 @@ Required properties: - compatible : should be one of the following: * "marvell,sd8787" * "marvell,sd8897" + * "marvell,sd8978" * "marvell,sd8997" + * "nxp,iw416" * "pci11ab,2b42" * "pci1b4b,2b42" diff --git a/Documentation/sound/soc/dapm.rst b/Documentation/sound/soc/dapm.rst index 8e4410793..c3154ce6e 100644 --- a/Documentation/sound/soc/dapm.rst +++ b/Documentation/sound/soc/dapm.rst @@ -234,7 +234,7 @@ corresponding soft power control. In this case it is necessary to create a virtual widget - a widget with no control bits e.g. :: - SND_SOC_DAPM_MIXER("AC97 Mixer", SND_SOC_DAPM_NOPM, 0, 0, NULL, 0), + SND_SOC_DAPM_MIXER("AC97 Mixer", SND_SOC_NOPM, 0, 0, NULL, 0), This can be used to merge to signal paths together in software. diff --git a/Documentation/x86/boot.rst b/Documentation/x86/boot.rst index 894a19897..bac3789f3 100644 --- a/Documentation/x86/boot.rst +++ b/Documentation/x86/boot.rst @@ -1416,7 +1416,7 @@ execution context provided by the EFI firmware. The function prototype for the handover entry point looks like this:: - efi_main(void *handle, efi_system_table_t *table, struct boot_params *bp) + efi_stub_entry(void *handle, efi_system_table_t *table, struct boot_params *bp) 'handle' is the EFI image handle passed to the boot loader by the EFI firmware, 'table' is the EFI system table - these are the first two diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst index 5d4330be2..e801df0bb 100644 --- a/Documentation/x86/mds.rst +++ b/Documentation/x86/mds.rst @@ -95,6 +95,9 @@ The kernel provides a function to invoke the buffer clearing: mds_clear_cpu_buffers() +Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path. +Other than CFLAGS.ZF, this macro doesn't clobber any registers. + The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state (idle) transitions. @@ -138,17 +141,30 @@ Mitigation points When transitioning from kernel to user space the CPU buffers are flushed on affected CPUs when the mitigation is not disabled on the kernel - command line. The migitation is enabled through the static key - mds_user_clear. - - The mitigation is invoked in prepare_exit_to_usermode() which covers - all but one of the kernel to user space transitions. The exception - is when we return from a Non Maskable Interrupt (NMI), which is - handled directly in do_nmi(). - - (The reason that NMI is special is that prepare_exit_to_usermode() can - enable IRQs. In NMI context, NMIs are blocked, and we don't want to - enable IRQs with NMIs blocked.) + command line. The mitigation is enabled through the feature flag + X86_FEATURE_CLEAR_CPU_BUF. + + The mitigation is invoked just before transitioning to userspace after + user registers are restored. This is done to minimize the window in + which kernel data could be accessed after VERW e.g. via an NMI after + VERW. + + **Corner case not handled** + Interrupts returning to kernel don't clear CPUs buffers since the + exit-to-user path is expected to do that anyways. But, there could be + a case when an NMI is generated in kernel after the exit-to-user path + has cleared the buffers. This case is not handled and NMI returning to + kernel don't clear CPU buffers because: + + 1. It is rare to get an NMI after VERW, but before returning to userspace. + 2. For an unprivileged user, there is no known way to make that NMI + less rare or target it. + 3. It would take a large number of these precisely-timed NMIs to mount + an actual attack. There's presumably not enough bandwidth. + 4. The NMI in question occurs after a VERW, i.e. when user state is + restored and most interesting data is already scrubbed. Whats left + is only the data that NMI touches, and that may or may not be of + any interest. 2. C-State transition diff --git a/MAINTAINERS b/MAINTAINERS index 13d107880..bbfedb0b2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10051,6 +10051,7 @@ F: drivers/infiniband/ F: include/rdma/ F: include/trace/events/ib_mad.h F: include/trace/events/ib_umad.h +F: include/trace/misc/rdma.h F: include/uapi/linux/if_infiniband.h F: include/uapi/rdma/ F: samples/bpf/ibumad_kern.c @@ -11139,6 +11140,12 @@ F: fs/nfs_common/ F: fs/nfsd/ F: include/linux/lockd/ F: include/linux/sunrpc/ +F: include/trace/events/rpcgss.h +F: include/trace/events/rpcrdma.h +F: include/trace/events/sunrpc.h +F: include/trace/misc/fs.h +F: include/trace/misc/nfs.h +F: include/trace/misc/sunrpc.h F: include/uapi/linux/nfsd/ F: include/uapi/linux/sunrpc/ F: net/sunrpc/ @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 76 +SUBLEVEL = 82 EXTRAVERSION = NAME = Curry Ramen @@ -459,8 +459,7 @@ HOSTRUSTC = rustc HOSTPKG_CONFIG = pkg-config KBUILD_USERHOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes \ - -O2 -fomit-frame-pointer -std=gnu11 \ - -Wdeclaration-after-statement + -O2 -fomit-frame-pointer -std=gnu11 KBUILD_USERCFLAGS := $(KBUILD_USERHOSTCFLAGS) $(USERCFLAGS) KBUILD_USERLDFLAGS := $(USERLDFLAGS) @@ -1018,9 +1017,6 @@ endif # arch Makefile may override CC so keep this after arch Makefile is included NOSTDINC_FLAGS += -nostdinc -# warn about C99 declaration after statement -KBUILD_CFLAGS += -Wdeclaration-after-statement - # Variable Length Arrays (VLAs) should not be used anywhere in the kernel KBUILD_CFLAGS += -Wvla diff --git a/arch/Kconfig b/arch/Kconfig index 14273a620..f99fd9a4c 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -642,6 +642,7 @@ config SHADOW_CALL_STACK bool "Shadow Call Stack" depends on ARCH_SUPPORTS_SHADOW_CALL_STACK depends on DYNAMIC_FTRACE_WITH_ARGS || DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER + depends on MMU help This option enables the compiler's Shadow Call Stack, which uses a shadow stack to protect function return addresses from diff --git a/arch/arc/include/asm/jump_label.h b/arch/arc/include/asm/jump_label.h index 9d9618079..a339223d9 100644 --- a/arch/arc/include/asm/jump_label.h +++ b/arch/arc/include/asm/jump_label.h @@ -31,7 +31,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" + asm goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" "1: \n" "nop \n" ".pushsection __jump_table, \"aw\" \n" @@ -47,7 +47,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" + asm goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" "1: \n" "b %l[l_yes] \n" ".pushsection __jump_table, \"aw\" \n" diff --git a/arch/arm/boot/dts/imx1-ads.dts b/arch/arm/boot/dts/imx1-ads.dts index 5833fb6f1..2c817c4a4 100644 --- a/arch/arm/boot/dts/imx1-ads.dts +++ b/arch/arm/boot/dts/imx1-ads.dts @@ -65,7 +65,7 @@ pinctrl-0 = <&pinctrl_weim>; status = "okay"; - nor: nor@0,0 { + nor: flash@0,0 { compatible = "cfi-flash"; reg = <0 0x00000000 0x02000000>; bank-width = <4>; diff --git a/arch/arm/boot/dts/imx1-apf9328.dts b/arch/arm/boot/dts/imx1-apf9328.dts index 77b21aa7a..27e72b07b 100644 --- a/arch/arm/boot/dts/imx1-apf9328.dts +++ b/arch/arm/boot/dts/imx1-apf9328.dts @@ -45,7 +45,7 @@ pinctrl-0 = <&pinctrl_weim>; status = "okay"; - nor: nor@0,0 { + nor: flash@0,0 { compatible = "cfi-flash"; reg = <0 0x00000000 0x02000000>; bank-width = <2>; diff --git a/arch/arm/boot/dts/imx1.dtsi b/arch/arm/boot/dts/imx1.dtsi index e312f1e74..4aeb74479 100644 --- a/arch/arm/boot/dts/imx1.dtsi +++ b/arch/arm/boot/dts/imx1.dtsi @@ -268,9 +268,12 @@ status = "disabled"; }; - esram: esram@300000 { + esram: sram@300000 { compatible = "mmio-sram"; reg = <0x00300000 0x20000>; + ranges = <0 0x00300000 0x20000>; + #address-cells = <1>; + #size-cells = <1>; }; }; }; diff --git a/arch/arm/boot/dts/imx23-sansa.dts b/arch/arm/boot/dts/imx23-sansa.dts index 46057d9bf..c2efcc20a 100644 --- a/arch/arm/boot/dts/imx23-sansa.dts +++ b/arch/arm/boot/dts/imx23-sansa.dts @@ -175,10 +175,8 @@ #address-cells = <1>; #size-cells = <0>; compatible = "i2c-gpio"; - gpios = < - &gpio1 24 0 /* SDA */ - &gpio1 22 0 /* SCL */ - >; + sda-gpios = <&gpio1 24 0>; + scl-gpios = <&gpio1 22 0>; i2c-gpio,delay-us = <2>; /* ~100 kHz */ }; @@ -186,10 +184,8 @@ #address-cells = <1>; #size-cells = <0>; compatible = "i2c-gpio"; - gpios = < - &gpio0 31 0 /* SDA */ - &gpio0 30 0 /* SCL */ - >; + sda-gpios = <&gpio0 31 0>; + scl-gpios = <&gpio0 30 0>; i2c-gpio,delay-us = <2>; /* ~100 kHz */ touch: touch@20 { diff --git a/arch/arm/boot/dts/imx23.dtsi b/arch/arm/boot/dts/imx23.dtsi index 7f4c60245..b236d23f8 100644 --- a/arch/arm/boot/dts/imx23.dtsi +++ b/arch/arm/boot/dts/imx23.dtsi @@ -59,7 +59,7 @@ reg = <0x80000000 0x2000>; }; - dma_apbh: dma-apbh@80004000 { + dma_apbh: dma-controller@80004000 { compatible = "fsl,imx23-dma-apbh"; reg = <0x80004000 0x2000>; interrupts = <0 14 20 0 @@ -414,7 +414,7 @@ status = "disabled"; }; - dma_apbx: dma-apbx@80024000 { + dma_apbx: dma-controller@80024000 { compatible = "fsl,imx23-dma-apbx"; reg = <0x80024000 0x2000>; interrupts = <7 5 9 26 diff --git a/arch/arm/boot/dts/imx25-eukrea-cpuimx25.dtsi b/arch/arm/boot/dts/imx25-eukrea-cpuimx25.dtsi index 0703f62d1..93a6e4e68 100644 --- a/arch/arm/boot/dts/imx25-eukrea-cpuimx25.dtsi +++ b/arch/arm/boot/dts/imx25-eukrea-cpuimx25.dtsi @@ -27,7 +27,7 @@ pinctrl-0 = <&pinctrl_i2c1>; status = "okay"; - pcf8563@51 { + rtc@51 { compatible = "nxp,pcf8563"; reg = <0x51>; }; diff --git a/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-cmo-qvga.dts b/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-cmo-qvga.dts index 7d4301b22..1ed3fb7b9 100644 --- a/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-cmo-qvga.dts +++ b/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-cmo-qvga.dts @@ -16,7 +16,7 @@ bus-width = <18>; display-timings { native-mode = <&qvga_timings>; - qvga_timings: 320x240 { + qvga_timings: timing0 { clock-frequency = <6500000>; hactive = <320>; vactive = <240>; diff --git a/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-dvi-svga.dts b/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-dvi-svga.dts index 80a7f96de..64b2ffac4 100644 --- a/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-dvi-svga.dts +++ b/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-dvi-svga.dts @@ -16,7 +16,7 @@ bus-width = <18>; display-timings { native-mode = <&dvi_svga_timings>; - dvi_svga_timings: 800x600 { + dvi_svga_timings: timing0 { clock-frequency = <40000000>; hactive = <800>; vactive = <600>; diff --git a/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-dvi-vga.dts b/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-dvi-vga.dts index 24027a1fb..fb074bfda 100644 --- a/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-dvi-vga.dts +++ b/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard-dvi-vga.dts @@ -16,7 +16,7 @@ bus-width = <18>; display-timings { native-mode = <&dvi_vga_timings>; - dvi_vga_timings: 640x480 { + dvi_vga_timings: timing0 { clock-frequency = <31250000>; hactive = <640>; vactive = <480>; diff --git a/arch/arm/boot/dts/imx25-pdk.dts b/arch/arm/boot/dts/imx25-pdk.dts index fb66884d8..59b40d13a 100644 --- a/arch/arm/boot/dts/imx25-pdk.dts +++ b/arch/arm/boot/dts/imx25-pdk.dts @@ -78,7 +78,7 @@ bus-width = <18>; display-timings { native-mode = <&wvga_timings>; - wvga_timings: 640x480 { + wvga_timings: timing0 { hactive = <640>; vactive = <480>; hback-porch = <45>; diff --git a/arch/arm/boot/dts/imx25.dtsi b/arch/arm/boot/dts/imx25.dtsi index 5f90d72b8..5ac454928 100644 --- a/arch/arm/boot/dts/imx25.dtsi +++ b/arch/arm/boot/dts/imx25.dtsi @@ -543,7 +543,7 @@ }; iim: efuse@53ff0000 { - compatible = "fsl,imx25-iim", "fsl,imx27-iim"; + compatible = "fsl,imx25-iim"; reg = <0x53ff0000 0x4000>; interrupts = <19>; clocks = <&clks 99>; diff --git a/arch/arm/boot/dts/imx27-apf27dev.dts b/arch/arm/boot/dts/imx27-apf27dev.dts index 6f1e8ce9e..3d9bb7fc3 100644 --- a/arch/arm/boot/dts/imx27-apf27dev.dts +++ b/arch/arm/boot/dts/imx27-apf27dev.dts @@ -16,7 +16,7 @@ fsl,pcr = <0xfae80083>; /* non-standard but required */ display-timings { native-mode = <&timing0>; - timing0: 800x480 { + timing0: timing0 { clock-frequency = <33000033>; hactive = <800>; vactive = <480>; @@ -47,7 +47,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpio_leds>; - user { + led-user { label = "Heartbeat"; gpios = <&gpio6 14 GPIO_ACTIVE_HIGH>; linux,default-trigger = "heartbeat"; diff --git a/arch/arm/boot/dts/imx27-eukrea-cpuimx27.dtsi b/arch/arm/boot/dts/imx27-eukrea-cpuimx27.dtsi index 74110bbcd..c7e923584 100644 --- a/arch/arm/boot/dts/imx27-eukrea-cpuimx27.dtsi +++ b/arch/arm/boot/dts/imx27-eukrea-cpuimx27.dtsi @@ -33,7 +33,7 @@ pinctrl-0 = <&pinctrl_i2c1>; status = "okay"; - pcf8563@51 { + rtc@51 { compatible = "nxp,pcf8563"; reg = <0x51>; }; @@ -90,7 +90,7 @@ &weim { status = "okay"; - nor: nor@0,0 { + nor: flash@0,0 { #address-cells = <1>; #size-cells = <1>; compatible = "cfi-flash"; diff --git a/arch/arm/boot/dts/imx27-eukrea-mbimxsd27-baseboard.dts b/arch/arm/boot/dts/imx27-eukrea-mbimxsd27-baseboard.dts index 9c3ec82ec..50fa0bd4c 100644 --- a/arch/arm/boot/dts/imx27-eukrea-mbimxsd27-baseboard.dts +++ b/arch/arm/boot/dts/imx27-eukrea-mbimxsd27-baseboard.dts @@ -16,7 +16,7 @@ display-timings { native-mode = <&timing0>; - timing0: 320x240 { + timing0: timing0 { clock-frequency = <6500000>; hactive = <320>; vactive = <240>; diff --git a/arch/arm/boot/dts/imx27-phytec-phycard-s-rdk.dts b/arch/arm/boot/dts/imx27-phytec-phycard-s-rdk.dts index 188639738..7f36af150 100644 --- a/arch/arm/boot/dts/imx27-phytec-phycard-s-rdk.dts +++ b/arch/arm/boot/dts/imx27-phytec-phycard-s-rdk.dts @@ -19,7 +19,7 @@ fsl,pcr = <0xf0c88080>; /* non-standard but required */ display-timings { native-mode = <&timing0>; - timing0: 640x480 { + timing0: timing0 { hactive = <640>; vactive = <480>; hback-porch = <112>; diff --git a/arch/arm/boot/dts/imx27-phytec-phycore-rdk.dts b/arch/arm/boot/dts/imx27-phytec-phycore-rdk.dts index 344e77790..d133b9f08 100644 --- a/arch/arm/boot/dts/imx27-phytec-phycore-rdk.dts +++ b/arch/arm/boot/dts/imx27-phytec-phycore-rdk.dts @@ -19,7 +19,7 @@ display-timings { native-mode = <&timing0>; - timing0: 240x320 { + timing0: timing0 { clock-frequency = <5500000>; hactive = <240>; vactive = <320>; diff --git a/arch/arm/boot/dts/imx27-phytec-phycore-som.dtsi b/arch/arm/boot/dts/imx27-phytec-phycore-som.dtsi index 3d1027317..a5fdc2fd4 100644 --- a/arch/arm/boot/dts/imx27-phytec-phycore-som.dtsi +++ b/arch/arm/boot/dts/imx27-phytec-phycore-som.dtsi @@ -322,7 +322,7 @@ &weim { status = "okay"; - nor: nor@0,0 { + nor: flash@0,0 { compatible = "cfi-flash"; reg = <0 0x00000000 0x02000000>; bank-width = <2>; diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi index e140307be..eb6daf224 100644 --- a/arch/arm/boot/dts/imx27.dtsi +++ b/arch/arm/boot/dts/imx27.dtsi @@ -588,6 +588,9 @@ iram: sram@ffff4c00 { compatible = "mmio-sram"; reg = <0xffff4c00 0xb400>; + ranges = <0 0xffff4c00 0xb400>; + #address-cells = <1>; + #size-cells = <1>; }; }; }; diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi index 130b4145a..b81592a61 100644 --- a/arch/arm/boot/dts/imx28.dtsi +++ b/arch/arm/boot/dts/imx28.dtsi @@ -78,7 +78,7 @@ status = "disabled"; }; - dma_apbh: dma-apbh@80004000 { + dma_apbh: dma-controller@80004000 { compatible = "fsl,imx28-dma-apbh"; reg = <0x80004000 0x2000>; interrupts = <82 83 84 85 @@ -994,7 +994,7 @@ status = "disabled"; }; - dma_apbx: dma-apbx@80024000 { + dma_apbx: dma-controller@80024000 { compatible = "fsl,imx28-dma-apbx"; reg = <0x80024000 0x2000>; interrupts = <78 79 66 0 diff --git a/arch/arm/boot/dts/imx6q-apalis-ixora-v1.2.dts b/arch/arm/boot/dts/imx6q-apalis-ixora-v1.2.dts index f9f7d99bd..76f3e07bc 100644 --- a/arch/arm/boot/dts/imx6q-apalis-ixora-v1.2.dts +++ b/arch/arm/boot/dts/imx6q-apalis-ixora-v1.2.dts @@ -76,6 +76,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enable_can1_power>; regulator-name = "can1_supply"; + startup-delay-us = <1000>; }; reg_can2_supply: regulator-can2-supply { @@ -85,6 +86,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enable_can2_power>; regulator-name = "can2_supply"; + startup-delay-us = <1000>; }; }; diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index ff1e0173b..2c6eada01 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -150,7 +150,7 @@ interrupt-parent = <&gpc>; ranges; - dma_apbh: dma-apbh@110000 { + dma_apbh: dma-controller@110000 { compatible = "fsl,imx6q-dma-apbh", "fsl,imx28-dma-apbh"; reg = <0x00110000 0x2000>; interrupts = <0 13 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi index 1f1053a89..67d344ae7 100644 --- a/arch/arm/boot/dts/imx6sx.dtsi +++ b/arch/arm/boot/dts/imx6sx.dtsi @@ -209,7 +209,7 @@ power-domains = <&pd_pu>; }; - dma_apbh: dma-apbh@1804000 { + dma_apbh: dma-controller@1804000 { compatible = "fsl,imx6sx-dma-apbh", "fsl,imx28-dma-apbh"; reg = <0x01804000 0x2000>; interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm/boot/dts/imx6ul.dtsi b/arch/arm/boot/dts/imx6ul.dtsi index 2b5996395..aac081b6d 100644 --- a/arch/arm/boot/dts/imx6ul.dtsi +++ b/arch/arm/boot/dts/imx6ul.dtsi @@ -164,7 +164,7 @@ <0x00a06000 0x2000>; }; - dma_apbh: dma-apbh@1804000 { + dma_apbh: dma-controller@1804000 { compatible = "fsl,imx6q-dma-apbh", "fsl,imx28-dma-apbh"; reg = <0x01804000 0x2000>; interrupts = <0 13 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm/boot/dts/imx7d.dtsi b/arch/arm/boot/dts/imx7d.dtsi index 7ceb7c09f..7ef685fdd 100644 --- a/arch/arm/boot/dts/imx7d.dtsi +++ b/arch/arm/boot/dts/imx7d.dtsi @@ -208,9 +208,6 @@ }; &ca_funnel_in_ports { - #address-cells = <1>; - #size-cells = <0>; - port@1 { reg = <1>; ca_funnel_in_port1: endpoint { diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index 459477071..69aebc691 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -190,7 +190,11 @@ clock-names = "apb_pclk"; ca_funnel_in_ports: in-ports { - port { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; ca_funnel_in_port0: endpoint { remote-endpoint = <&etm0_out_port>; }; @@ -814,7 +818,7 @@ }; lcdif: lcdif@30730000 { - compatible = "fsl,imx7d-lcdif", "fsl,imx28-lcdif"; + compatible = "fsl,imx7d-lcdif", "fsl,imx6sx-lcdif"; reg = <0x30730000 0x10000>; interrupts = <GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX7D_LCDIF_PIXEL_ROOT_CLK>, @@ -1263,14 +1267,13 @@ }; }; - dma_apbh: dma-apbh@33000000 { + dma_apbh: dma-controller@33000000 { compatible = "fsl,imx7d-dma-apbh", "fsl,imx28-dma-apbh"; reg = <0x33000000 0x2000>; interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>; - interrupt-names = "gpmi0", "gpmi1", "gpmi2", "gpmi3"; #dma-cells = <1>; dma-channels = <4>; clocks = <&clks IMX7D_NAND_USDHC_BUS_RAWNAND_CLK>; @@ -1279,7 +1282,7 @@ gpmi: nand-controller@33002000{ compatible = "fsl,imx7d-gpmi-nand"; #address-cells = <1>; - #size-cells = <1>; + #size-cells = <0>; reg = <0x33002000 0x2000>, <0x33004000 0x4000>; reg-names = "gpmi-nand", "bch"; interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm/boot/dts/rk3036.dtsi b/arch/arm/boot/dts/rk3036.dtsi index 78686fc72..c420c7c64 100644 --- a/arch/arm/boot/dts/rk3036.dtsi +++ b/arch/arm/boot/dts/rk3036.dtsi @@ -402,12 +402,20 @@ pinctrl-0 = <&hdmi_ctl>; status = "disabled"; - hdmi_in: port { + ports { #address-cells = <1>; #size-cells = <0>; - hdmi_in_vop: endpoint@0 { + + hdmi_in: port@0 { reg = <0>; - remote-endpoint = <&vop_out_hdmi>; + + hdmi_in_vop: endpoint { + remote-endpoint = <&vop_out_hdmi>; + }; + }; + + hdmi_out: port@1 { + reg = <1>; }; }; }; diff --git a/arch/arm/include/asm/irq_work.h b/arch/arm/include/asm/irq_work.h index 3149e4dc1..889599983 100644 --- a/arch/arm/include/asm/irq_work.h +++ b/arch/arm/include/asm/irq_work.h @@ -9,6 +9,4 @@ static inline bool arch_irq_work_has_interrupt(void) return is_smp(); } -extern void arch_irq_work_raise(void); - #endif /* _ASM_ARM_IRQ_WORK_H */ diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h index e12d7d096..e4eb54f6c 100644 --- a/arch/arm/include/asm/jump_label.h +++ b/arch/arm/include/asm/jump_label.h @@ -11,7 +11,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" WASM(nop) "\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".word 1b, %l[l_yes], %c0\n\t" @@ -25,7 +25,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" WASM(b) " %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".word 1b, %l[l_yes], %c0\n\t" diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c index 95e731676..961daac65 100644 --- a/arch/arm/mach-ep93xx/core.c +++ b/arch/arm/mach-ep93xx/core.c @@ -339,6 +339,7 @@ static struct gpiod_lookup_table ep93xx_i2c_gpiod_table = { GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), GPIO_LOOKUP_IDX("G", 0, NULL, 1, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), + { } }, }; diff --git a/arch/arm64/boot/dts/amlogic/meson-s4-s805x2-aq222.dts b/arch/arm64/boot/dts/amlogic/meson-s4-s805x2-aq222.dts index 8ffbcb2b1..bbd3c05cb 100644 --- a/arch/arm64/boot/dts/amlogic/meson-s4-s805x2-aq222.dts +++ b/arch/arm64/boot/dts/amlogic/meson-s4-s805x2-aq222.dts @@ -15,7 +15,7 @@ #size-cells = <2>; aliases { - serial0 = &uart_B; + serial0 = &uart_b; }; memory@0 { @@ -25,6 +25,6 @@ }; -&uart_B { +&uart_b { status = "okay"; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-s4.dtsi b/arch/arm64/boot/dts/amlogic/meson-s4.dtsi index ad50cba42..372a03762 100644 --- a/arch/arm64/boot/dts/amlogic/meson-s4.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-s4.dtsi @@ -118,14 +118,14 @@ <10 11 12 13 14 15 16 17 18 19 20 21>; }; - uart_B: serial@7a000 { + uart_b: serial@7a000 { compatible = "amlogic,meson-s4-uart", "amlogic,meson-ao-uart"; reg = <0x0 0x7a000 0x0 0x18>; interrupts = <GIC_SPI 169 IRQ_TYPE_EDGE_RISING>; - status = "disabled"; clocks = <&xtal>, <&xtal>, <&xtal>; clock-names = "xtal", "pclk", "baud"; + status = "disabled"; }; reset: reset-controller@2000 { diff --git a/arch/arm64/boot/dts/qcom/apq8016-sbc.dts b/arch/arm64/boot/dts/qcom/apq8016-sbc.dts index 9d116e1fb..1ac4f8c24 100644 --- a/arch/arm64/boot/dts/qcom/apq8016-sbc.dts +++ b/arch/arm64/boot/dts/qcom/apq8016-sbc.dts @@ -169,10 +169,6 @@ }; }; -&blsp_dma { - status = "okay"; -}; - &blsp_i2c2 { /* On Low speed expansion */ status = "okay"; diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi index bafac2cf7..987cebbda 100644 --- a/arch/arm64/boot/dts/qcom/msm8916.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi @@ -1522,7 +1522,7 @@ clock-names = "bam_clk"; #dma-cells = <1>; qcom,ee = <0>; - status = "disabled"; + qcom,controlled-remotely; }; blsp1_uart1: serial@78af000 { diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 9de2248a3..789121171 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -390,6 +390,19 @@ reg = <0x0 0x80000000 0x0 0x0>; }; + etm { + compatible = "qcom,coresight-remote-etm"; + + out-ports { + port { + modem_etm_out_funnel_in2: endpoint { + remote-endpoint = + <&funnel_in2_in_modem_etm>; + }; + }; + }; + }; + psci { compatible = "arm,psci-1.0"; method = "smc"; @@ -2565,6 +2578,14 @@ clocks = <&rpmcc RPM_QDSS_CLK>, <&rpmcc RPM_QDSS_A_CLK>; clock-names = "apb_pclk", "atclk"; + in-ports { + port { + funnel_in2_in_modem_etm: endpoint { + remote-endpoint = + <&modem_etm_out_funnel_in2>; + }; + }; + }; out-ports { port { diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi index b00b8164c..7a4125053 100644 --- a/arch/arm64/boot/dts/qcom/msm8998.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi @@ -1903,9 +1903,11 @@ cpu = <&CPU4>; - port{ - etm4_out: endpoint { - remote-endpoint = <&apss_funnel_in4>; + out-ports { + port{ + etm4_out: endpoint { + remote-endpoint = <&apss_funnel_in4>; + }; }; }; }; @@ -1920,9 +1922,11 @@ cpu = <&CPU5>; - port{ - etm5_out: endpoint { - remote-endpoint = <&apss_funnel_in5>; + out-ports { + port{ + etm5_out: endpoint { + remote-endpoint = <&apss_funnel_in5>; + }; }; }; }; @@ -1937,9 +1941,11 @@ cpu = <&CPU6>; - port{ - etm6_out: endpoint { - remote-endpoint = <&apss_funnel_in6>; + out-ports { + port{ + etm6_out: endpoint { + remote-endpoint = <&apss_funnel_in6>; + }; }; }; }; @@ -1954,9 +1960,11 @@ cpu = <&CPU7>; - port{ - etm7_out: endpoint { - remote-endpoint = <&apss_funnel_in7>; + out-ports { + port{ + etm7_out: endpoint { + remote-endpoint = <&apss_funnel_in7>; + }; }; }; }; diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index 4d5905ef0..95c515da9 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -4049,7 +4049,7 @@ assigned-clock-rates = <19200000>, <150000000>; interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, - <&intc GIC_SPI 486 IRQ_TYPE_LEVEL_HIGH>, + <&pdc_intc 6 IRQ_TYPE_LEVEL_HIGH>, <&pdc_intc 8 IRQ_TYPE_EDGE_BOTH>, <&pdc_intc 9 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", @@ -4100,7 +4100,7 @@ assigned-clock-rates = <19200000>, <150000000>; interrupts-extended = <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>, - <&intc GIC_SPI 487 IRQ_TYPE_LEVEL_HIGH>, + <&pdc_intc 7 IRQ_TYPE_LEVEL_HIGH>, <&pdc_intc 10 IRQ_TYPE_EDGE_BOTH>, <&pdc_intc 11 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index 8efd0e227..eb1a93699 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -3629,7 +3629,7 @@ assigned-clock-rates = <19200000>, <200000000>; interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, - <&intc GIC_SPI 486 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 6 IRQ_TYPE_LEVEL_HIGH>, <&pdc 8 IRQ_TYPE_EDGE_BOTH>, <&pdc 9 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", @@ -3678,7 +3678,7 @@ assigned-clock-rates = <19200000>, <200000000>; interrupts-extended = <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>, - <&intc GIC_SPI 487 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 7 IRQ_TYPE_LEVEL_HIGH>, <&pdc 10 IRQ_TYPE_EDGE_BOTH>, <&pdc 11 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", diff --git a/arch/arm64/boot/dts/rockchip/px30.dtsi b/arch/arm64/boot/dts/rockchip/px30.dtsi index bfa358042..61f018644 100644 --- a/arch/arm64/boot/dts/rockchip/px30.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30.dtsi @@ -607,6 +607,7 @@ clock-names = "spiclk", "apb_pclk"; dmas = <&dmac 12>, <&dmac 13>; dma-names = "tx", "rx"; + num-cs = <2>; pinctrl-names = "default"; pinctrl-0 = <&spi0_clk &spi0_csn &spi0_miso &spi0_mosi>; #address-cells = <1>; @@ -622,6 +623,7 @@ clock-names = "spiclk", "apb_pclk"; dmas = <&dmac 14>, <&dmac 15>; dma-names = "tx", "rx"; + num-cs = <2>; pinctrl-names = "default"; pinctrl-0 = <&spi1_clk &spi1_csn0 &spi1_csn1 &spi1_miso &spi1_mosi>; #address-cells = <1>; diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index bac4cabef..467ac2f76 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -227,8 +227,19 @@ static int ctr_encrypt(struct skcipher_request *req) src += blocks * AES_BLOCK_SIZE; } if (nbytes && walk.nbytes == walk.total) { + u8 buf[AES_BLOCK_SIZE]; + u8 *d = dst; + + if (unlikely(nbytes < AES_BLOCK_SIZE)) + src = dst = memcpy(buf + sizeof(buf) - nbytes, + src, nbytes); + neon_aes_ctr_encrypt(dst, src, ctx->enc, ctx->key.rounds, nbytes, walk.iv); + + if (unlikely(nbytes < AES_BLOCK_SIZE)) + memcpy(d, dst, nbytes); + nbytes = 0; } kernel_neon_end(); diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h index 3622e9f4f..51738c56e 100644 --- a/arch/arm64/include/asm/alternative-macros.h +++ b/arch/arm64/include/asm/alternative-macros.h @@ -229,7 +229,7 @@ alternative_has_feature_likely(unsigned long feature) compiletime_assert(feature < ARM64_NCAPS, "feature must be < ARM64_NCAPS"); - asm_volatile_goto( + asm goto( ALTERNATIVE_CB("b %l[l_no]", %[feature], alt_cb_patch_nops) : : [feature] "i" (feature) @@ -247,7 +247,7 @@ alternative_has_feature_unlikely(unsigned long feature) compiletime_assert(feature < ARM64_NCAPS, "feature must be < ARM64_NCAPS"); - asm_volatile_goto( + asm goto( ALTERNATIVE("nop", "b %l[l_yes]", %[feature]) : : [feature] "i" (feature) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 7dce9c0aa..af3a678a7 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -61,6 +61,7 @@ #define ARM_CPU_IMP_HISI 0x48 #define ARM_CPU_IMP_APPLE 0x61 #define ARM_CPU_IMP_AMPERE 0xC0 +#define ARM_CPU_IMP_MICROSOFT 0x6D #define ARM_CPU_PART_AEM_V8 0xD0F #define ARM_CPU_PART_FOUNDATION 0xD00 @@ -128,6 +129,8 @@ #define AMPERE_CPU_PART_AMPERE1 0xAC3 +#define MICROSOFT_CPU_PART_AZURE_COBALT_100 0xD49 /* Based on r0p0 of ARM Neoverse N2 */ + #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) @@ -179,6 +182,7 @@ #define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX) #define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX) #define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1) +#define MIDR_MICROSOFT_AZURE_COBALT_100 MIDR_CPU_MODEL(ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_AZURE_COBALT_100) /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ #define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 62c846be2..a75c0772e 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -103,6 +103,7 @@ static inline void free_screen_info(struct screen_info *si) } #define EFI_ALLOC_ALIGN SZ_64K +#define EFI_ALLOC_LIMIT ((1UL << 48) - 1) /* * On ARM systems, virtually remapped UEFI runtime services are set up in two diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index d720b6f7e..da1841371 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -343,6 +343,7 @@ extern void sme_alloc(struct task_struct *task, bool flush); extern unsigned int sme_get_vl(void); extern int sme_set_current_vl(unsigned long arg); extern int sme_get_current_vl(void); +extern void sme_suspend_exit(void); /* * Return how many bytes of memory are required to store the full SME @@ -372,6 +373,7 @@ static inline int sme_max_vl(void) { return 0; } static inline int sme_max_virtualisable_vl(void) { return 0; } static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; } static inline int sme_get_current_vl(void) { return -EINVAL; } +static inline void sme_suspend_exit(void) { } static inline size_t za_state_size(struct task_struct const *task) { diff --git a/arch/arm64/include/asm/irq_work.h b/arch/arm64/include/asm/irq_work.h index 81bbfa3a0..a1020285e 100644 --- a/arch/arm64/include/asm/irq_work.h +++ b/arch/arm64/include/asm/irq_work.h @@ -2,8 +2,6 @@ #ifndef __ASM_IRQ_WORK_H #define __ASM_IRQ_WORK_H -extern void arch_irq_work_raise(void); - static inline bool arch_irq_work_has_interrupt(void) { return true; diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index cea441b6a..b5bd3c38a 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -18,7 +18,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto( + asm goto( "1: nop \n\t" " .pushsection __jump_table, \"aw\" \n\t" " .align 3 \n\t" @@ -35,7 +35,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto( + asm goto( "1: b %l[l_yes] \n\t" " .pushsection __jump_table, \"aw\" \n\t" " .align 3 \n\t" diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 61f22e9c9..74584597b 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -390,6 +390,7 @@ static const struct midr_range erratum_1463225[] = { static const struct midr_range trbe_overwrite_fill_mode_cpus[] = { #ifdef CONFIG_ARM64_ERRATUM_2139208 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100), #endif #ifdef CONFIG_ARM64_ERRATUM_2119858 MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), @@ -403,6 +404,7 @@ static const struct midr_range trbe_overwrite_fill_mode_cpus[] = { static const struct midr_range tsb_flush_fail_cpus[] = { #ifdef CONFIG_ARM64_ERRATUM_2067961 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100), #endif #ifdef CONFIG_ARM64_ERRATUM_2054223 MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), @@ -415,6 +417,7 @@ static const struct midr_range tsb_flush_fail_cpus[] = { static struct midr_range trbe_write_out_of_range_cpus[] = { #ifdef CONFIG_ARM64_ERRATUM_2253138 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100), #endif #ifdef CONFIG_ARM64_ERRATUM_2224489 MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 8c226d79a..59b5a16ba 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1347,6 +1347,20 @@ void __init sme_setup(void) get_sme_default_vl()); } +void sme_suspend_exit(void) +{ + u64 smcr = 0; + + if (!system_supports_sme()) + return; + + if (system_supports_fa64()) + smcr |= SMCR_ELx_FA64; + + write_sysreg_s(smcr, SYS_SMCR_EL1); + write_sysreg_s(0, SYS_SMPRI_EL1); +} + #endif /* CONFIG_ARM64_SME */ static void sve_init_regs(void) diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 38dbd3828..d2850b49a 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -22,6 +22,7 @@ #include <linux/vmalloc.h> #include <asm/daifflags.h> #include <asm/exception.h> +#include <asm/numa.h> #include <asm/vmap_stack.h> #include <asm/softirq_stack.h> @@ -46,17 +47,17 @@ static void init_irq_scs(void) for_each_possible_cpu(cpu) per_cpu(irq_shadow_call_stack_ptr, cpu) = - scs_alloc(cpu_to_node(cpu)); + scs_alloc(early_cpu_to_node(cpu)); } #ifdef CONFIG_VMAP_STACK -static void init_irq_stacks(void) +static void __init init_irq_stacks(void) { int cpu; unsigned long *p; for_each_possible_cpu(cpu) { - p = arch_alloc_vmap_stack(IRQ_STACK_SIZE, cpu_to_node(cpu)); + p = arch_alloc_vmap_stack(IRQ_STACK_SIZE, early_cpu_to_node(cpu)); per_cpu(irq_stack_ptr, cpu) = p; } } diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 7b0643fe2..214b1805e 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -168,7 +168,11 @@ armv8pmu_events_sysfs_show(struct device *dev, PMU_EVENT_ATTR_ID(name, armv8pmu_events_sysfs_show, config) static struct attribute *armv8_pmuv3_event_attrs[] = { - ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR), + /* + * Don't expose the sw_incr event in /sys. It's not usable as writes to + * PMSWINC_EL0 will trap as PMUSERENR.{SW,EN}=={0,0} and event rotation + * means we don't have a fixed event<->counter relationship regardless. + */ ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL), ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL), ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL), diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 8b02d3108..064d996cc 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -11,6 +11,7 @@ #include <asm/daifflags.h> #include <asm/debug-monitors.h> #include <asm/exec.h> +#include <asm/fpsimd.h> #include <asm/mte.h> #include <asm/memory.h> #include <asm/mmu_context.h> @@ -77,6 +78,8 @@ void notrace __cpu_suspend_exit(void) */ spectre_v4_enable_mitigation(NULL); + sme_suspend_exit(); + /* Restore additional feature-specific configuration */ ptrauth_suspend_exit(); } diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 36c8f66ca..d513533cc 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -68,11 +68,9 @@ VDSO_CFLAGS += -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -fno-common \ -Werror-implicit-function-declaration \ -Wno-format-security \ - -Wdeclaration-after-statement \ -std=gnu11 VDSO_CFLAGS += -O2 # Some useful compiler-dependent flags from top-level Makefile -VDSO_CFLAGS += $(call cc32-option,-Wdeclaration-after-statement,) VDSO_CFLAGS += $(call cc32-option,-Wno-pointer-sign) VDSO_CFLAGS += -fno-strict-overflow VDSO_CFLAGS += $(call cc32-option,-Werror=strict-prototypes) diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 3c344e4cd..092327665 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -462,6 +462,9 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) } irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]); + if (!irq) + continue; + raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->pending_latch = pendmask & (1U << bit_nr); vgic_queue_irq_unlock(vcpu->kvm, irq, flags); @@ -1427,6 +1430,8 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, for (i = 0; i < irq_count; i++) { irq = vgic_get_irq(kvm, NULL, intids[i]); + if (!irq) + continue; update_affinity(irq, vcpu2); diff --git a/arch/csky/include/asm/irq_work.h b/arch/csky/include/asm/irq_work.h index 33aaf39d6..d39fcc1f5 100644 --- a/arch/csky/include/asm/irq_work.h +++ b/arch/csky/include/asm/irq_work.h @@ -7,5 +7,5 @@ static inline bool arch_irq_work_has_interrupt(void) { return true; } -extern void arch_irq_work_raise(void); + #endif /* __ASM_CSKY_IRQ_WORK_H */ diff --git a/arch/csky/include/asm/jump_label.h b/arch/csky/include/asm/jump_label.h index 98a3f4b16..ef2e37a10 100644 --- a/arch/csky/include/asm/jump_label.h +++ b/arch/csky/include/asm/jump_label.h @@ -12,7 +12,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto( + asm goto( "1: nop32 \n" " .pushsection __jump_table, \"aw\" \n" " .align 2 \n" @@ -29,7 +29,7 @@ label: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto( + asm goto( "1: bsr32 %l[label] \n" " .pushsection __jump_table, \"aw\" \n" " .align 2 \n" diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index e737dc8cd..fa3171f56 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -9,6 +9,7 @@ config LOONGARCH select ARCH_BINFMT_ELF_STATE select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTREMOVE + select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PTE_SPECIAL @@ -80,6 +81,7 @@ config LOONGARCH select GPIOLIB select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_MMAP_RND_BITS if MMU + select HAVE_ARCH_SECCOMP select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE @@ -461,23 +463,6 @@ config PHYSICAL_START specified in the "crashkernel=YM@XM" command line boot parameter passed to the panic-ed kernel). -config SECCOMP - bool "Enable seccomp to safely compute untrusted bytecode" - depends on PROC_FS - default y - help - This kernel feature is useful for number crunching applications - that may need to compute untrusted bytecode during their - execution. By using pipes or other transports made available to - the process as file descriptors supporting the read/write - syscalls, it's possible to isolate those applications in - their own address space using seccomp. Once seccomp is - enabled via /proc/<pid>/seccomp, it cannot be disabled - and the task is only allowed to execute a few safe syscalls - defined by each seccomp mode. - - If unsure, say Y. Only embedded should say N here. - endmenu config ARCH_SELECT_MEMORY_MODEL @@ -495,10 +480,6 @@ config ARCH_SPARSEMEM_ENABLE or have huge holes in the physical address space for other reasons. See <file:Documentation/mm/numa.rst> for more. -config ARCH_ENABLE_THP_MIGRATION - def_bool y - depends on TRANSPARENT_HUGEPAGE - config ARCH_MEMORY_PROBE def_bool y depends on MEMORY_HOTPLUG diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 434bfc1cd..18a2b37f4 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -297,6 +297,7 @@ void play_dead(void) addr = iocsr_read64(LOONGARCH_IOCSR_MBUF0); } while (addr == 0); + local_irq_disable(); init_fn = (void *)TO_CACHE(addr); iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_CLEAR); @@ -473,7 +474,6 @@ asmlinkage void start_secondary(void) sync_counter(); cpu = raw_smp_processor_id(); set_my_cpu_offset(per_cpu_offset(cpu)); - rcu_cpu_starting(cpu); cpu_probe(); constant_clockevent_init(); diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c index da3681f13..eeb2d815c 100644 --- a/arch/loongarch/mm/tlb.c +++ b/arch/loongarch/mm/tlb.c @@ -271,12 +271,16 @@ void setup_tlb_handler(int cpu) set_handler(EXCCODE_TLBNR * VECSIZE, handle_tlb_protect, VECSIZE); set_handler(EXCCODE_TLBNX * VECSIZE, handle_tlb_protect, VECSIZE); set_handler(EXCCODE_TLBPE * VECSIZE, handle_tlb_protect, VECSIZE); - } + } else { + int vec_sz __maybe_unused; + void *addr __maybe_unused; + struct page *page __maybe_unused; + + /* Avoid lockdep warning */ + rcu_cpu_starting(cpu); + #ifdef CONFIG_NUMA - else { - void *addr; - struct page *page; - const int vec_sz = sizeof(exception_handlers); + vec_sz = sizeof(exception_handlers); if (pcpu_handlers[cpu]) return; @@ -292,8 +296,8 @@ void setup_tlb_handler(int cpu) csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_EENTRY); csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_MERRENTRY); csr_write64(pcpu_handlers[cpu] + 80*VECSIZE, LOONGARCH_CSR_TLBRENTRY); - } #endif + } } void tlb_init(int cpu) diff --git a/arch/mips/include/asm/checksum.h b/arch/mips/include/asm/checksum.h index 4044eaf98..0921ddda1 100644 --- a/arch/mips/include/asm/checksum.h +++ b/arch/mips/include/asm/checksum.h @@ -241,7 +241,8 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, " .set pop" : "=&r" (sum), "=&r" (tmp) : "r" (saddr), "r" (daddr), - "0" (htonl(len)), "r" (htonl(proto)), "r" (sum)); + "0" (htonl(len)), "r" (htonl(proto)), "r" (sum) + : "memory"); return csum_fold(sum); } diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index c5c6864e6..405c85173 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -36,7 +36,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\t" B_INSN " 2f\n\t" + asm goto("1:\t" B_INSN " 2f\n\t" "2:\t.insn\n\t" ".pushsection __jump_table, \"aw\"\n\t" WORD_INSN " 1b, %l[l_yes], %0\n\t" @@ -50,7 +50,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\t" J_INSN " %l[l_yes]\n\t" + asm goto("1:\t" J_INSN " %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" WORD_INSN " 1b, %l[l_yes], %0\n\t" ".popsection\n\t" diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 246c6a6b0..5b778995d 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -2007,7 +2007,13 @@ unsigned long vi_handlers[64]; void reserve_exception_space(phys_addr_t addr, unsigned long size) { - memblock_reserve(addr, size); + /* + * reserve exception space on CPUs other than CPU0 + * is too late, since memblock is unavailable when APs + * up + */ + if (smp_processor_id() == 0) + memblock_reserve(addr, size); } void __init *set_except_vector(int n, void *addr) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 345d5e021..abf39ecda 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -24,7 +24,6 @@ config PARISC select RTC_DRV_GENERIC select INIT_ALL_POSSIBLE select BUG - select BUILDTIME_TABLE_SORT select HAVE_PCI select HAVE_PERF_EVENTS select HAVE_KERNEL_BZIP2 diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index 74d17d7e7..5937d5eda 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -576,6 +576,7 @@ .section __ex_table,"aw" ! \ .align 4 ! \ .word (fault_addr - .), (except_addr - .) ! \ + or %r0,%r0,%r0 ! \ .previous diff --git a/arch/parisc/include/asm/extable.h b/arch/parisc/include/asm/extable.h new file mode 100644 index 000000000..4ea23e3d7 --- /dev/null +++ b/arch/parisc/include/asm/extable.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PARISC_EXTABLE_H +#define __PARISC_EXTABLE_H + +#include <asm/ptrace.h> +#include <linux/compiler.h> + +/* + * The exception table consists of three addresses: + * + * - A relative address to the instruction that is allowed to fault. + * - A relative address at which the program should continue (fixup routine) + * - An asm statement which specifies which CPU register will + * receive -EFAULT when an exception happens if the lowest bit in + * the fixup address is set. + * + * Note: The register specified in the err_opcode instruction will be + * modified at runtime if a fault happens. Register %r0 will be ignored. + * + * Since relative addresses are used, 32bit values are sufficient even on + * 64bit kernel. + */ + +struct pt_regs; +int fixup_exception(struct pt_regs *regs); + +#define ARCH_HAS_RELATIVE_EXTABLE +struct exception_table_entry { + int insn; /* relative address of insn that is allowed to fault. */ + int fixup; /* relative address of fixup routine */ + int err_opcode; /* sample opcode with register which holds error code */ +}; + +#define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr, opcode )\ + ".section __ex_table,\"aw\"\n" \ + ".align 4\n" \ + ".word (" #fault_addr " - .), (" #except_addr " - .)\n" \ + opcode "\n" \ + ".previous\n" + +/* + * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry + * (with lowest bit set) for which the fault handler in fixup_exception() will + * load -EFAULT on fault into the register specified by the err_opcode instruction, + * and zeroes the target register in case of a read fault in get_user(). + */ +#define ASM_EXCEPTIONTABLE_VAR(__err_var) \ + int __err_var = 0 +#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr, register )\ + ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1, "or %%r0,%%r0," register) + +static inline void swap_ex_entry_fixup(struct exception_table_entry *a, + struct exception_table_entry *b, + struct exception_table_entry tmp, + int delta) +{ + a->fixup = b->fixup + delta; + b->fixup = tmp.fixup - delta; + a->err_opcode = b->err_opcode; + b->err_opcode = tmp.err_opcode; +} +#define swap_ex_entry_fixup swap_ex_entry_fixup + +#endif diff --git a/arch/parisc/include/asm/jump_label.h b/arch/parisc/include/asm/jump_label.h index 94428798b..317ebc5ed 100644 --- a/arch/parisc/include/asm/jump_label.h +++ b/arch/parisc/include/asm/jump_label.h @@ -12,7 +12,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".align %1\n\t" @@ -29,7 +29,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "b,n %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".align %1\n\t" diff --git a/arch/parisc/include/asm/special_insns.h b/arch/parisc/include/asm/special_insns.h index c822bd0c0..51f40eaf7 100644 --- a/arch/parisc/include/asm/special_insns.h +++ b/arch/parisc/include/asm/special_insns.h @@ -8,7 +8,8 @@ "copy %%r0,%0\n" \ "8:\tlpa %%r0(%1),%0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY(8b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY(8b, 9b, \ + "or %%r0,%%r0,%%r0") \ : "=&r" (pa) \ : "r" (va) \ : "memory" \ @@ -22,7 +23,8 @@ "copy %%r0,%0\n" \ "8:\tlpa %%r0(%%sr3,%1),%0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY(8b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY(8b, 9b, \ + "or %%r0,%%r0,%%r0") \ : "=&r" (pa) \ : "r" (va) \ : "memory" \ diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index 416507989..88d0ae576 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -7,6 +7,7 @@ */ #include <asm/page.h> #include <asm/cache.h> +#include <asm/extable.h> #include <linux/bug.h> #include <linux/string.h> @@ -26,37 +27,6 @@ #define STD_USER(sr, x, ptr) __put_user_asm(sr, "std", x, ptr) #endif -/* - * The exception table contains two values: the first is the relative offset to - * the address of the instruction that is allowed to fault, and the second is - * the relative offset to the address of the fixup routine. Since relative - * addresses are used, 32bit values are sufficient even on 64bit kernel. - */ - -#define ARCH_HAS_RELATIVE_EXTABLE -struct exception_table_entry { - int insn; /* relative address of insn that is allowed to fault. */ - int fixup; /* relative address of fixup routine */ -}; - -#define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr )\ - ".section __ex_table,\"aw\"\n" \ - ".align 4\n" \ - ".word (" #fault_addr " - .), (" #except_addr " - .)\n\t" \ - ".previous\n" - -/* - * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry - * (with lowest bit set) for which the fault handler in fixup_exception() will - * load -EFAULT into %r29 for a read or write fault, and zeroes the target - * register in case of a read fault in get_user(). - */ -#define ASM_EXCEPTIONTABLE_REG 29 -#define ASM_EXCEPTIONTABLE_VAR(__variable) \ - register long __variable __asm__ ("r29") = 0 -#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr )\ - ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1) - #define __get_user_internal(sr, val, ptr) \ ({ \ ASM_EXCEPTIONTABLE_VAR(__gu_err); \ @@ -83,7 +53,7 @@ struct exception_table_entry { \ __asm__("1: " ldx " 0(%%sr%2,%3),%0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%1") \ : "=r"(__gu_val), "+r"(__gu_err) \ : "i"(sr), "r"(ptr)); \ \ @@ -115,8 +85,8 @@ struct exception_table_entry { "1: ldw 0(%%sr%2,%3),%0\n" \ "2: ldw 4(%%sr%2,%3),%R0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%1") \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b, "%1") \ : "=&r"(__gu_tmp.l), "+r"(__gu_err) \ : "i"(sr), "r"(ptr)); \ \ @@ -174,7 +144,7 @@ struct exception_table_entry { __asm__ __volatile__ ( \ "1: " stx " %1,0(%%sr%2,%3)\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%0") \ : "+r"(__pu_err) \ : "r"(x), "i"(sr), "r"(ptr)) @@ -186,15 +156,14 @@ struct exception_table_entry { "1: stw %1,0(%%sr%2,%3)\n" \ "2: stw %R1,4(%%sr%2,%3)\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%0") \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b, "%0") \ : "+r"(__pu_err) \ : "r"(__val), "i"(sr), "r"(ptr)); \ } while (0) #endif /* !defined(CONFIG_64BIT) */ - /* * Complex access routines -- external declarations */ @@ -216,7 +185,4 @@ unsigned long __must_check raw_copy_from_user(void *dst, const void __user *src, #define INLINE_COPY_TO_USER #define INLINE_COPY_FROM_USER -struct pt_regs; -int fixup_exception(struct pt_regs *regs); - #endif /* __PARISC_UACCESS_H */ diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index 8f12b9f31..a58292873 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -1003,6 +1003,9 @@ static __init int qemu_print_iodc_data(struct device *lin_dev, void *data) pr_info("\n"); + /* Prevent hung task messages when printing on serial console */ + cond_resched(); + pr_info("#define HPA_%08lx_DESCRIPTION \"%s\"\n", hpa, parisc_hardware_description(&dev->id)); diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index 1f6c776d8..c67883487 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -171,7 +171,6 @@ static int __init processor_probe(struct parisc_device *dev) p->cpu_num = cpu_info.cpu_num; p->cpu_loc = cpu_info.cpu_loc; - set_cpu_possible(cpuid, true); store_cpu_topology(cpuid); #ifdef CONFIG_SMP @@ -466,13 +465,6 @@ static struct parisc_driver cpu_driver __refdata = { */ void __init processor_init(void) { - unsigned int cpu; - reset_cpu_topology(); - - /* reset possible mask. We will mark those which are possible. */ - for_each_possible_cpu(cpu) - set_cpu_possible(cpu, false); - register_parisc_driver(&cpu_driver); } diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index e8a4d77cf..8a8e7d722 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -118,8 +118,8 @@ static int emulate_ldh(struct pt_regs *regs, int toreg) "2: ldbs 1(%%sr1,%3), %0\n" " depw %2, 23, 24, %0\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") : "+r" (val), "+r" (ret), "=&r" (temp1) : "r" (saddr), "r" (regs->isr) ); @@ -150,8 +150,8 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop) " mtctl %2,11\n" " vshd %0,%3,%0\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") : "+r" (val), "+r" (ret), "=&r" (temp1), "=&r" (temp2) : "r" (saddr), "r" (regs->isr) ); @@ -187,8 +187,8 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) " mtsar %%r19\n" " shrpd %0,%%r20,%%sar,%0\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") : "=r" (val), "+r" (ret) : "0" (val), "r" (saddr), "r" (regs->isr) : "r19", "r20" ); @@ -207,9 +207,9 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) " vshd %0,%R0,%0\n" " vshd %R0,%4,%R0\n" "4: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b, "%1") : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1) : "r" (regs->isr) ); } @@ -242,8 +242,8 @@ static int emulate_sth(struct pt_regs *regs, int frreg) "1: stb %1, 0(%%sr1, %3)\n" "2: stb %2, 1(%%sr1, %3)\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%0") : "+r" (ret), "=&r" (temp1) : "r" (val), "r" (regs->ior), "r" (regs->isr) ); @@ -283,8 +283,8 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop) " stw %%r20,0(%%sr1,%2)\n" " stw %%r21,4(%%sr1,%2)\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%0") : "+r" (ret) : "r" (val), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r22", "r1" ); @@ -327,10 +327,10 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) "3: std %%r20,0(%%sr1,%2)\n" "4: std %%r21,8(%%sr1,%2)\n" "5: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 5b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 5b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 5b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 5b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 5b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 5b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 5b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 5b, "%0") : "+r" (ret) : "r" (val), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r22", "r1" ); @@ -356,11 +356,11 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) "4: stw %%r1,4(%%sr1,%3)\n" "5: stw %2,8(%%sr1,%3)\n" "6: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b, "%0") : "+r" (ret) : "r" (valh), "r" (vall), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r1" ); diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index b00aa98b5..fbd9ada5e 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -150,11 +150,16 @@ int fixup_exception(struct pt_regs *regs) * Fix up get_user() and put_user(). * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() sets the least-significant * bit in the relative address of the fixup routine to indicate - * that gr[ASM_EXCEPTIONTABLE_REG] should be loaded with - * -EFAULT to report a userspace access error. + * that the register encoded in the "or %r0,%r0,register" + * opcode should be loaded with -EFAULT to report a userspace + * access error. */ if (fix->fixup & 1) { - regs->gr[ASM_EXCEPTIONTABLE_REG] = -EFAULT; + int fault_error_reg = fix->err_opcode & 0x1f; + if (!WARN_ON(!fault_error_reg)) + regs->gr[fault_error_reg] = -EFAULT; + pr_debug("Unalignment fixup of register %d at %pS\n", + fault_error_reg, (void*)regs->iaoq[0]); /* zero target register for get_user() */ if (parisc_acctyp(0, regs->iir) == VM_READ) { diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 61a473635..20d5052e2 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -74,7 +74,7 @@ ##__VA_ARGS__) #define WARN_ENTRY(insn, flags, label, ...) \ - asm_volatile_goto( \ + asm goto( \ "1: " insn "\n" \ EX_TABLE(1b, %l[label]) \ _EMIT_BUG_ENTRY \ diff --git a/arch/powerpc/include/asm/irq_work.h b/arch/powerpc/include/asm/irq_work.h index b8b0be8f1..c6d3078bd 100644 --- a/arch/powerpc/include/asm/irq_work.h +++ b/arch/powerpc/include/asm/irq_work.h @@ -6,6 +6,5 @@ static inline bool arch_irq_work_has_interrupt(void) { return true; } -extern void arch_irq_work_raise(void); #endif /* _ASM_POWERPC_IRQ_WORK_H */ diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index 93ce3ec25..2f2a86ed2 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -17,7 +17,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "nop # arch_static_branch\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".long 1b - ., %l[l_yes] - .\n\t" @@ -32,7 +32,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "b %l[l_yes] # arch_static_branch_jump\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".long 1b - ., %l[l_yes] - .\n\t" diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 94b981152..07fabb054 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -417,5 +417,9 @@ extern void *abatron_pteptrs[2]; #include <asm/nohash/mmu.h> #endif +#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP) +#define __HAVE_ARCH_RESERVED_KERNEL_PAGES +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_MMU_H_ */ diff --git a/arch/powerpc/include/asm/mmzone.h b/arch/powerpc/include/asm/mmzone.h index 4c6c6dbd1..da827d2d0 100644 --- a/arch/powerpc/include/asm/mmzone.h +++ b/arch/powerpc/include/asm/mmzone.h @@ -42,14 +42,6 @@ u64 memory_hotplug_max(void); #else #define memory_hotplug_max() memblock_end_of_DRAM() #endif /* CONFIG_NUMA */ -#ifdef CONFIG_FA_DUMP -#define __HAVE_ARCH_RESERVED_KERNEL_PAGES -#endif - -#ifdef CONFIG_MEMORY_HOTPLUG -extern int create_section_mapping(unsigned long start, unsigned long end, - int nid, pgprot_t prot); -#endif #endif /* __KERNEL__ */ #endif /* _ASM_MMZONE_H_ */ diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index af58f1ed3..c4b798aa6 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -14,7 +14,7 @@ #ifdef __KERNEL__ -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN) && CONFIG_THREAD_SHIFT < 15 #define MIN_THREAD_SHIFT (CONFIG_THREAD_SHIFT + 1) #else #define MIN_THREAD_SHIFT CONFIG_THREAD_SHIFT diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 3ddc65c63..45d4c9cf3 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -72,7 +72,7 @@ __pu_failed: \ * are no aliasing issues. */ #define __put_user_asm_goto(x, addr, label, op) \ - asm_volatile_goto( \ + asm goto( \ "1: " op "%U1%X1 %0,%1 # put_user\n" \ EX_TABLE(1b, %l2) \ : \ @@ -85,7 +85,7 @@ __pu_failed: \ __put_user_asm_goto(x, ptr, label, "std") #else /* __powerpc64__ */ #define __put_user_asm2_goto(x, addr, label) \ - asm_volatile_goto( \ + asm goto( \ "1: stw%X1 %0, %1\n" \ "2: stw%X1 %L0, %L1\n" \ EX_TABLE(1b, %l2) \ @@ -132,7 +132,7 @@ do { \ #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT #define __get_user_asm_goto(x, addr, label, op) \ - asm_volatile_goto( \ + asm_goto_output( \ "1: "op"%U1%X1 %0, %1 # get_user\n" \ EX_TABLE(1b, %l2) \ : "=r" (x) \ @@ -145,7 +145,7 @@ do { \ __get_user_asm_goto(x, addr, label, "ld") #else /* __powerpc64__ */ #define __get_user_asm2_goto(x, addr, label) \ - asm_volatile_goto( \ + asm_goto_output( \ "1: lwz%X1 %0, %1\n" \ "2: lwz%X1 %L0, %L1\n" \ EX_TABLE(1b, %l2) \ diff --git a/arch/powerpc/kernel/cpu_specs_e500mc.h b/arch/powerpc/kernel/cpu_specs_e500mc.h index ceb06b109..2ae8e9a7b 100644 --- a/arch/powerpc/kernel/cpu_specs_e500mc.h +++ b/arch/powerpc/kernel/cpu_specs_e500mc.h @@ -8,7 +8,8 @@ #ifdef CONFIG_PPC64 #define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ - PPC_FEATURE_HAS_FPU | PPC_FEATURE_64) + PPC_FEATURE_HAS_FPU | PPC_FEATURE_64 | \ + PPC_FEATURE_BOOKE) #else #define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ PPC_FEATURE_BOOKE) diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index a019ed6fc..26c151e2a 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -52,7 +52,8 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) mr r10,r1 ld r1,PACAKSAVE(r13) std r10,0(r1) - std r11,_NIP(r1) + std r11,_LINK(r1) + std r11,_NIP(r1) /* Saved LR is also the next instruction */ std r12,_MSR(r1) std r0,GPR0(r1) std r10,GPR1(r1) @@ -70,7 +71,6 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) std r9,GPR13(r1) SAVE_NVGPRS(r1) std r11,_XER(r1) - std r11,_LINK(r1) std r11,_CTR(r1) li r11,\trapnr diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c index 9dc0ad3c5..5a6e44e4d 100644 --- a/arch/powerpc/kernel/irq_64.c +++ b/arch/powerpc/kernel/irq_64.c @@ -230,7 +230,7 @@ again: * This allows interrupts to be unmasked without hard disabling, and * also without new hard interrupts coming in ahead of pending ones. */ - asm_volatile_goto( + asm goto( "1: \n" " lbz 9,%0(13) \n" " cmpwi 9,0 \n" diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 3956f3268..362b71238 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1439,10 +1439,12 @@ static int emulate_instruction(struct pt_regs *regs) return -EINVAL; } +#ifdef CONFIG_GENERIC_BUG int is_valid_bugaddr(unsigned long addr) { return is_kernel_addr(addr); } +#endif #ifdef CONFIG_MATH_EMULATION static int emulate_math(struct pt_regs *regs) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 398b5694a..ec30af8ea 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -586,6 +586,8 @@ static int do_fp_load(struct instruction_op *op, unsigned long ea, } u; nb = GETSIZE(op->type); + if (nb > sizeof(u)) + return -EINVAL; if (!address_ok(regs, ea, nb)) return -EFAULT; rn = op->reg; @@ -636,6 +638,8 @@ static int do_fp_store(struct instruction_op *op, unsigned long ea, } u; nb = GETSIZE(op->type); + if (nb > sizeof(u)) + return -EINVAL; if (!address_ok(regs, ea, nb)) return -EFAULT; rn = op->reg; @@ -680,6 +684,9 @@ static nokprobe_inline int do_vec_load(int rn, unsigned long ea, u8 b[sizeof(__vector128)]; } u = {}; + if (size > sizeof(u)) + return -EINVAL; + if (!address_ok(regs, ea & ~0xfUL, 16)) return -EFAULT; /* align to multiple of size */ @@ -707,6 +714,9 @@ static nokprobe_inline int do_vec_store(int rn, unsigned long ea, u8 b[sizeof(__vector128)]; } u; + if (size > sizeof(u)) + return -EINVAL; + if (!address_ok(regs, ea & ~0xfUL, 16)) return -EFAULT; /* align to multiple of size */ diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index f6151a589..87aa76c73 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -463,6 +463,7 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, set_pte_at(vma->vm_mm, addr, ptep, pte); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * For hash translation mode, we use the deposited table to store hash slot * information and they are stored at PTRS_PER_PMD offset from related pmd @@ -484,6 +485,7 @@ int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, return true; } +#endif /* * Does the CPU support tlbie? diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index 119ef491f..d3a7726ec 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -126,7 +126,7 @@ void pgtable_cache_add(unsigned int shift) * as to leave enough 0 bits in the address to contain it. */ unsigned long minalign = max(MAX_PGTABLE_INDEX_SIZE + 1, HUGEPD_SHIFT_MASK + 1); - struct kmem_cache *new; + struct kmem_cache *new = NULL; /* It would be nice if this was a BUILD_BUG_ON(), but at the * moment, gcc doesn't seem to recognize is_power_of_2 as a @@ -139,7 +139,8 @@ void pgtable_cache_add(unsigned int shift) align = max_t(unsigned long, align, minalign); name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift); - new = kmem_cache_create(name, table_size, align, 0, ctor(shift)); + if (name) + new = kmem_cache_create(name, table_size, align, 0, ctor(shift)); if (!new) panic("Could not allocate pgtable cache for order %d", shift); diff --git a/arch/powerpc/mm/kasan/init_32.c b/arch/powerpc/mm/kasan/init_32.c index a70828a6d..aa9aa1192 100644 --- a/arch/powerpc/mm/kasan/init_32.c +++ b/arch/powerpc/mm/kasan/init_32.c @@ -64,6 +64,7 @@ int __init __weak kasan_init_region(void *start, size_t size) if (ret) return ret; + k_start = k_start & PAGE_MASK; block = memblock_alloc(k_end - k_start, PAGE_SIZE); if (!block) return -ENOMEM; diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index bd9784f77..71250605b 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -179,3 +179,8 @@ static inline bool debug_pagealloc_enabled_or_kfence(void) { return IS_ENABLED(CONFIG_KFENCE) || debug_pagealloc_enabled(); } + +#ifdef CONFIG_MEMORY_HOTPLUG +int create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot); +#endif diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 97b026130..1e5f083cd 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -569,29 +569,6 @@ static void iommu_table_setparms(struct pci_controller *phb, struct iommu_table_ops iommu_table_lpar_multi_ops; -/* - * iommu_table_setparms_lpar - * - * Function: On pSeries LPAR systems, return TCE table info, given a pci bus. - */ -static void iommu_table_setparms_lpar(struct pci_controller *phb, - struct device_node *dn, - struct iommu_table *tbl, - struct iommu_table_group *table_group, - const __be32 *dma_window) -{ - unsigned long offset, size, liobn; - - of_parse_dma_window(dn, dma_window, &liobn, &offset, &size); - - iommu_table_setparms_common(tbl, phb->bus->number, liobn, offset, size, IOMMU_PAGE_SHIFT_4K, NULL, - &iommu_table_lpar_multi_ops); - - - table_group->tce32_start = offset; - table_group->tce32_size = size; -} - struct iommu_table_ops iommu_table_pseries_ops = { .set = tce_build_pSeries, .clear = tce_free_pSeries, @@ -719,26 +696,71 @@ struct iommu_table_ops iommu_table_lpar_multi_ops = { * dynamic 64bit DMA window, walking up the device tree. */ static struct device_node *pci_dma_find(struct device_node *dn, - const __be32 **dma_window) + struct dynamic_dma_window_prop *prop) { - const __be32 *dw = NULL; + const __be32 *default_prop = NULL; + const __be32 *ddw_prop = NULL; + struct device_node *rdn = NULL; + bool default_win = false, ddw_win = false; for ( ; dn && PCI_DN(dn); dn = dn->parent) { - dw = of_get_property(dn, "ibm,dma-window", NULL); - if (dw) { - if (dma_window) - *dma_window = dw; - return dn; + default_prop = of_get_property(dn, "ibm,dma-window", NULL); + if (default_prop) { + rdn = dn; + default_win = true; + } + ddw_prop = of_get_property(dn, DIRECT64_PROPNAME, NULL); + if (ddw_prop) { + rdn = dn; + ddw_win = true; + break; + } + ddw_prop = of_get_property(dn, DMA64_PROPNAME, NULL); + if (ddw_prop) { + rdn = dn; + ddw_win = true; + break; } - dw = of_get_property(dn, DIRECT64_PROPNAME, NULL); - if (dw) - return dn; - dw = of_get_property(dn, DMA64_PROPNAME, NULL); - if (dw) - return dn; + + /* At least found default window, which is the case for normal boot */ + if (default_win) + break; } - return NULL; + /* For PCI devices there will always be a DMA window, either on the device + * or parent bus + */ + WARN_ON(!(default_win | ddw_win)); + + /* caller doesn't want to get DMA window property */ + if (!prop) + return rdn; + + /* parse DMA window property. During normal system boot, only default + * DMA window is passed in OF. But, for kdump, a dedicated adapter might + * have both default and DDW in FDT. In this scenario, DDW takes precedence + * over default window. + */ + if (ddw_win) { + struct dynamic_dma_window_prop *p; + + p = (struct dynamic_dma_window_prop *)ddw_prop; + prop->liobn = p->liobn; + prop->dma_base = p->dma_base; + prop->tce_shift = p->tce_shift; + prop->window_shift = p->window_shift; + } else if (default_win) { + unsigned long offset, size, liobn; + + of_parse_dma_window(rdn, default_prop, &liobn, &offset, &size); + + prop->liobn = cpu_to_be32((u32)liobn); + prop->dma_base = cpu_to_be64(offset); + prop->tce_shift = cpu_to_be32(IOMMU_PAGE_SHIFT_4K); + prop->window_shift = cpu_to_be32(order_base_2(size)); + } + + return rdn; } static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) @@ -746,17 +768,20 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) struct iommu_table *tbl; struct device_node *dn, *pdn; struct pci_dn *ppci; - const __be32 *dma_window = NULL; + struct dynamic_dma_window_prop prop; dn = pci_bus_to_OF_node(bus); pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n", dn); - pdn = pci_dma_find(dn, &dma_window); + pdn = pci_dma_find(dn, &prop); - if (dma_window == NULL) - pr_debug(" no ibm,dma-window property !\n"); + /* In PPC architecture, there will always be DMA window on bus or one of the + * parent bus. During reboot, there will be ibm,dma-window property to + * define DMA window. For kdump, there will at least be default window or DDW + * or both. + */ ppci = PCI_DN(pdn); @@ -766,13 +791,24 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) if (!ppci->table_group) { ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node); tbl = ppci->table_group->tables[0]; - if (dma_window) { - iommu_table_setparms_lpar(ppci->phb, pdn, tbl, - ppci->table_group, dma_window); - if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) - panic("Failed to initialize iommu table"); - } + iommu_table_setparms_common(tbl, ppci->phb->bus->number, + be32_to_cpu(prop.liobn), + be64_to_cpu(prop.dma_base), + 1ULL << be32_to_cpu(prop.window_shift), + be32_to_cpu(prop.tce_shift), NULL, + &iommu_table_lpar_multi_ops); + + /* Only for normal boot with default window. Doesn't matter even + * if we set these with DDW which is 64bit during kdump, since + * these will not be used during kdump. + */ + ppci->table_group->tce32_start = be64_to_cpu(prop.dma_base); + ppci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift); + + if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) + panic("Failed to initialize iommu table"); + iommu_register_group(ppci->table_group, pci_domain_nr(bus), 0); pr_debug(" created table: %p\n", ppci->table_group); @@ -960,6 +996,12 @@ static void find_existing_ddw_windows_named(const char *name) continue; } + /* If at the time of system initialization, there are DDWs in OF, + * it means this is during kexec. DDW could be direct or dynamic. + * We will just mark DDWs as "dynamic" since this is kdump path, + * no need to worry about perforance. ddw_list_new_entry() will + * set window->direct = false. + */ window = ddw_list_new_entry(pdn, dma64); if (!window) { of_node_put(pdn); @@ -1525,8 +1567,8 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) { struct device_node *pdn, *dn; struct iommu_table *tbl; - const __be32 *dma_window = NULL; struct pci_dn *pci; + struct dynamic_dma_window_prop prop; pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev)); @@ -1539,7 +1581,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) dn = pci_device_to_OF_node(dev); pr_debug(" node is %pOF\n", dn); - pdn = pci_dma_find(dn, &dma_window); + pdn = pci_dma_find(dn, &prop); if (!pdn || !PCI_DN(pdn)) { printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: " "no DMA window found for pci dev=%s dn=%pOF\n", @@ -1552,8 +1594,20 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) if (!pci->table_group) { pci->table_group = iommu_pseries_alloc_group(pci->phb->node); tbl = pci->table_group->tables[0]; - iommu_table_setparms_lpar(pci->phb, pdn, tbl, - pci->table_group, dma_window); + + iommu_table_setparms_common(tbl, pci->phb->bus->number, + be32_to_cpu(prop.liobn), + be64_to_cpu(prop.dma_base), + 1ULL << be32_to_cpu(prop.window_shift), + be32_to_cpu(prop.tce_shift), NULL, + &iommu_table_lpar_multi_ops); + + /* Only for normal boot with default window. Doesn't matter even + * if we set these with DDW which is 64bit during kdump, since + * these will not be used during kdump. + */ + pci->table_group->tce32_start = be64_to_cpu(prop.dma_base); + pci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift); iommu_init_table(tbl, pci->phb->node, 0, 0); iommu_register_group(pci->table_group, diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 541199c6a..5186d65d7 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -660,8 +660,12 @@ u64 pseries_paravirt_steal_clock(int cpu) { struct lppaca *lppaca = &lppaca_of(cpu); - return be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)) + - be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb)); + /* + * VPA steal time counters are reported at TB frequency. Hence do a + * conversion to ns before returning + */ + return tb_to_ns(be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)) + + be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb))); } #endif diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index d47d87c2d..dcf1bc9de 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -25,6 +25,11 @@ #define ARCH_SUPPORTS_FTRACE_OPS 1 #ifndef __ASSEMBLY__ + +extern void *return_address(unsigned int level); + +#define ftrace_return_address(n) return_address(n) + void MCOUNT_NAME(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) { diff --git a/arch/riscv/include/asm/irq_work.h b/arch/riscv/include/asm/irq_work.h index b53891964..b27a4d64f 100644 --- a/arch/riscv/include/asm/irq_work.h +++ b/arch/riscv/include/asm/irq_work.h @@ -6,5 +6,5 @@ static inline bool arch_irq_work_has_interrupt(void) { return IS_ENABLED(CONFIG_SMP); } -extern void arch_irq_work_raise(void); + #endif /* _ASM_RISCV_IRQ_WORK_H */ diff --git a/arch/riscv/include/asm/jump_label.h b/arch/riscv/include/asm/jump_label.h index 14a5ea8d8..4a35d787c 100644 --- a/arch/riscv/include/asm/jump_label.h +++ b/arch/riscv/include/asm/jump_label.h @@ -17,7 +17,7 @@ static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { - asm_volatile_goto( + asm goto( " .align 2 \n\t" " .option push \n\t" " .option norelax \n\t" @@ -39,7 +39,7 @@ label: static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) { - asm_volatile_goto( + asm goto( " .align 2 \n\t" " .option push \n\t" " .option norelax \n\t" diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 59bb53da4..63055c6ad 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -79,7 +79,7 @@ * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled. */ -#define vmemmap ((struct page *)VMEMMAP_START) +#define vmemmap ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT)) #define PCI_IO_SIZE SZ_16M #define PCI_IO_END VMEMMAP_START diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index ab333cb79..4c0805d26 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -7,6 +7,7 @@ ifdef CONFIG_FTRACE CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_sbi.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE) endif CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,) CFLAGS_compat_syscall_table.o += $(call cc-option,-Wno-override-init,) @@ -41,6 +42,7 @@ obj-y += irq.o obj-y += process.o obj-y += ptrace.o obj-y += reset.o +obj-y += return_address.o obj-y += setup.o obj-y += signal.o obj-y += syscall_table.o diff --git a/arch/riscv/kernel/return_address.c b/arch/riscv/kernel/return_address.c new file mode 100644 index 000000000..c8115ec8f --- /dev/null +++ b/arch/riscv/kernel/return_address.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This code come from arch/arm64/kernel/return_address.c + * + * Copyright (C) 2023 SiFive. + */ + +#include <linux/export.h> +#include <linux/kprobes.h> +#include <linux/stacktrace.h> + +struct return_address_data { + unsigned int level; + void *addr; +}; + +static bool save_return_addr(void *d, unsigned long pc) +{ + struct return_address_data *data = d; + + if (!data->level) { + data->addr = (void *)pc; + return false; + } + + --data->level; + + return true; +} +NOKPROBE_SYMBOL(save_return_addr); + +noinline void *return_address(unsigned int level) +{ + struct return_address_data data; + + data.level = level + 3; + data.addr = NULL; + + arch_stack_walk(save_return_addr, &data, current, NULL); + + if (!data.level) + return data.addr; + else + return NULL; + +} +EXPORT_SYMBOL_GPL(return_address); +NOKPROBE_SYMBOL(return_address); diff --git a/arch/s390/include/asm/irq_work.h b/arch/s390/include/asm/irq_work.h index 603783766..f00c9f610 100644 --- a/arch/s390/include/asm/irq_work.h +++ b/arch/s390/include/asm/irq_work.h @@ -7,6 +7,4 @@ static inline bool arch_irq_work_has_interrupt(void) return true; } -void arch_irq_work_raise(void); - #endif /* _ASM_S390_IRQ_WORK_H */ diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index 895f774bb..bf78cf381 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -25,7 +25,7 @@ */ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("0: brcl 0,%l[label]\n" + asm goto("0: brcl 0,%l[label]\n" ".pushsection __jump_table,\"aw\"\n" ".balign 8\n" ".long 0b-.,%l[label]-.\n" @@ -39,7 +39,7 @@ label: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("0: brcl 15,%l[label]\n" + asm goto("0: brcl 15,%l[label]\n" ".pushsection __jump_table,\"aw\"\n" ".balign 8\n" ".long 0b-.,%l[label]-.\n" diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index b1e98a9ed..09abf0003 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -777,6 +777,13 @@ struct kvm_vm_stat { u64 inject_service_signal; u64 inject_virtio; u64 aen_forward; + u64 gmap_shadow_create; + u64 gmap_shadow_reuse; + u64 gmap_shadow_r1_entry; + u64 gmap_shadow_r2_entry; + u64 gmap_shadow_r3_entry; + u64 gmap_shadow_sg_entry; + u64 gmap_shadow_pg_entry; }; struct kvm_arch_memory_slot { diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 092b16b4d..6b442edb3 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -385,6 +385,7 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) /* * floating point control reg. is in the thread structure */ + save_fpu_regs(); if ((unsigned int) data != 0 || test_fp_ctl(data >> (BITS_PER_LONG - 32))) return -EINVAL; @@ -741,6 +742,7 @@ static int __poke_user_compat(struct task_struct *child, /* * floating point control reg. is in the thread structure */ + save_fpu_regs(); if (test_fp_ctl(tmp)) return -EINVAL; child->thread.fpu.fpc = data; @@ -904,9 +906,7 @@ static int s390_fpregs_set(struct task_struct *target, int rc = 0; freg_t fprs[__NUM_FPRS]; - if (target == current) - save_fpu_regs(); - + save_fpu_regs(); if (MACHINE_HAS_VX) convert_vx_to_fp(fprs, target->thread.fpu.vxrs); else diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 0243b6e38..3beceff5f 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -1273,6 +1273,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, unsigned long *pgt, int *dat_protection, int *fake) { + struct kvm *kvm; struct gmap *parent; union asce asce; union vaddress vaddr; @@ -1281,6 +1282,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, *fake = 0; *dat_protection = 0; + kvm = sg->private; parent = sg->parent; vaddr.addr = saddr; asce.val = sg->orig_asce; @@ -1341,6 +1343,7 @@ shadow_r2t: rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake); if (rc) return rc; + kvm->stat.gmap_shadow_r1_entry++; } fallthrough; case ASCE_TYPE_REGION2: { @@ -1369,6 +1372,7 @@ shadow_r3t: rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake); if (rc) return rc; + kvm->stat.gmap_shadow_r2_entry++; } fallthrough; case ASCE_TYPE_REGION3: { @@ -1406,6 +1410,7 @@ shadow_sgt: rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake); if (rc) return rc; + kvm->stat.gmap_shadow_r3_entry++; } fallthrough; case ASCE_TYPE_SEGMENT: { @@ -1439,6 +1444,7 @@ shadow_pgt: rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake); if (rc) return rc; + kvm->stat.gmap_shadow_sg_entry++; } } /* Return the parent address of the page table */ @@ -1509,6 +1515,7 @@ shadow_page: pte.p |= dat_protection; if (!rc) rc = gmap_shadow_page(sg, saddr, __pte(pte.val)); + vcpu->kvm->stat.gmap_shadow_pg_entry++; ipte_unlock(vcpu->kvm); mmap_read_unlock(sg->mm); return rc; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 377536347..348d49268 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -66,7 +66,14 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = { STATS_DESC_COUNTER(VM, inject_pfault_done), STATS_DESC_COUNTER(VM, inject_service_signal), STATS_DESC_COUNTER(VM, inject_virtio), - STATS_DESC_COUNTER(VM, aen_forward) + STATS_DESC_COUNTER(VM, aen_forward), + STATS_DESC_COUNTER(VM, gmap_shadow_reuse), + STATS_DESC_COUNTER(VM, gmap_shadow_create), + STATS_DESC_COUNTER(VM, gmap_shadow_r1_entry), + STATS_DESC_COUNTER(VM, gmap_shadow_r2_entry), + STATS_DESC_COUNTER(VM, gmap_shadow_r3_entry), + STATS_DESC_COUNTER(VM, gmap_shadow_sg_entry), + STATS_DESC_COUNTER(VM, gmap_shadow_pg_entry), }; const struct kvm_stats_header kvm_vm_stats_header = { @@ -4138,10 +4145,6 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) vcpu_load(vcpu); - if (test_fp_ctl(fpu->fpc)) { - ret = -EINVAL; - goto out; - } vcpu->run->s.regs.fpc = fpu->fpc; if (MACHINE_HAS_VX) convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, @@ -4149,7 +4152,6 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) else memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); -out: vcpu_put(vcpu); return ret; } diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 740f8b56e..d90c818a9 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -1206,15 +1206,17 @@ static int acquire_gmap_shadow(struct kvm_vcpu *vcpu, * we're holding has been unshadowed. If the gmap is still valid, * we can safely reuse it. */ - if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat)) + if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat)) { + vcpu->kvm->stat.gmap_shadow_reuse++; return 0; + } /* release the old shadow - if any, and mark the prefix as unmapped */ release_gmap_shadow(vsie_page); gmap = gmap_shadow(vcpu->arch.gmap, asce, edat); if (IS_ERR(gmap)) return PTR_ERR(gmap); - gmap->private = vcpu->kvm; + vcpu->kvm->stat.gmap_shadow_create++; WRITE_ONCE(vsie_page->gmap, gmap); return 0; } diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 243f673fa..662cf23a1 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -1675,6 +1675,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, return ERR_PTR(-ENOMEM); new->mm = parent->mm; new->parent = gmap_get(parent); + new->private = parent->private; new->orig_asce = asce; new->edat_level = edat_level; new->initialized = false; diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 2c99f9552..394c69fda 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -241,7 +241,7 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res, /* combine single writes by using store-block insn */ void __iowrite64_copy(void __iomem *to, const void *from, size_t count) { - zpci_memcpy_toio(to, from, count); + zpci_memcpy_toio(to, from, count * 8); } static void __iomem *__ioremap(phys_addr_t addr, size_t size, pgprot_t prot) diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index 94eb529dc..2718cbea8 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h @@ -10,7 +10,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "nop\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" @@ -26,7 +26,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "b %l[l_yes]\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" diff --git a/arch/um/Makefile b/arch/um/Makefile index 3dbd0e3b6..778c50f27 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -118,7 +118,9 @@ archprepare: $(Q)$(MAKE) $(build)=$(HOST_DIR)/um include/generated/user_constants.h LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static -LINK-$(CONFIG_LD_SCRIPT_DYN) += $(call cc-option, -no-pie) +ifdef CONFIG_LD_SCRIPT_DYN +LINK-$(call gcc-min-version, 60100)$(CONFIG_CC_IS_CLANG) += -no-pie +endif LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \ diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 3d7836c46..cabcc501b 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -204,7 +204,7 @@ static int uml_net_close(struct net_device *dev) return 0; } -static int uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct uml_net_private *lp = netdev_priv(dev); unsigned long flags; diff --git a/arch/um/include/asm/cpufeature.h b/arch/um/include/asm/cpufeature.h index 4b6d1b526..66fe06db8 100644 --- a/arch/um/include/asm/cpufeature.h +++ b/arch/um/include/asm/cpufeature.h @@ -75,7 +75,7 @@ extern void setup_clear_cpu_cap(unsigned int bit); */ static __always_inline bool _static_cpu_has(u16 bit) { - asm_volatile_goto("1: jmp 6f\n" + asm goto("1: jmp 6f\n" "2:\n" ".skip -(((5f-4f) - (2b-1b)) > 0) * " "((5f-4f) - (2b-1b)),0x90\n" diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h index d8b8b4f07..444bae755 100644 --- a/arch/um/include/shared/kern_util.h +++ b/arch/um/include/shared/kern_util.h @@ -50,7 +50,7 @@ extern void do_uml_exitcalls(void); * Are we disallowed to sleep? Used to choose between GFP_KERNEL and * GFP_ATOMIC. */ -extern int __cant_sleep(void); +extern int __uml_cant_sleep(void); extern int get_current_pid(void); extern int copy_from_user_proc(void *to, void *from, int size); extern char *uml_strdup(const char *string); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 010bc422a..a351c87db 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -220,7 +220,7 @@ void arch_cpu_idle(void) raw_local_irq_enable(); } -int __cant_sleep(void) { +int __uml_cant_sleep(void) { return in_atomic() || irqs_disabled() || in_interrupt(); /* Is in_interrupt() really needed? */ } diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index fddd1dec2..3e270da6b 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -432,9 +432,29 @@ static void time_travel_update_time(unsigned long long next, bool idle) time_travel_del_event(&ne); } +static void time_travel_update_time_rel(unsigned long long offs) +{ + unsigned long flags; + + /* + * Disable interrupts before calculating the new time so + * that a real timer interrupt (signal) can't happen at + * a bad time e.g. after we read time_travel_time but + * before we've completed updating the time. + */ + local_irq_save(flags); + time_travel_update_time(time_travel_time + offs, false); + local_irq_restore(flags); +} + void time_travel_ndelay(unsigned long nsec) { - time_travel_update_time(time_travel_time + nsec, false); + /* + * Not strictly needed to use _rel() version since this is + * only used in INFCPU/EXT modes, but it doesn't hurt and + * is more readable too. + */ + time_travel_update_time_rel(nsec); } EXPORT_SYMBOL(time_travel_ndelay); @@ -568,7 +588,11 @@ static void time_travel_set_start(void) #define time_travel_time 0 #define time_travel_ext_waiting 0 -static inline void time_travel_update_time(unsigned long long ns, bool retearly) +static inline void time_travel_update_time(unsigned long long ns, bool idle) +{ +} + +static inline void time_travel_update_time_rel(unsigned long long offs) { } @@ -720,9 +744,7 @@ static u64 timer_read(struct clocksource *cs) */ if (!irqs_disabled() && !in_interrupt() && !in_softirq() && !time_travel_ext_waiting) - time_travel_update_time(time_travel_time + - TIMER_MULTIPLIER, - false); + time_travel_update_time_rel(TIMER_MULTIPLIER); return time_travel_time / TIMER_MULTIPLIER; } diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c index b459745f5..3cb8ac63b 100644 --- a/arch/um/os-Linux/helper.c +++ b/arch/um/os-Linux/helper.c @@ -46,7 +46,7 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv) unsigned long stack, sp; int pid, fds[2], ret, n; - stack = alloc_stack(0, __cant_sleep()); + stack = alloc_stack(0, __uml_cant_sleep()); if (stack == 0) return -ENOMEM; @@ -70,7 +70,7 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv) data.pre_data = pre_data; data.argv = argv; data.fd = fds[1]; - data.buf = __cant_sleep() ? uml_kmalloc(PATH_MAX, UM_GFP_ATOMIC) : + data.buf = __uml_cant_sleep() ? uml_kmalloc(PATH_MAX, UM_GFP_ATOMIC) : uml_kmalloc(PATH_MAX, UM_GFP_KERNEL); pid = clone(helper_child, (void *) sp, CLONE_VM, &data); if (pid < 0) { @@ -121,7 +121,7 @@ int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags, unsigned long stack, sp; int pid, status, err; - stack = alloc_stack(0, __cant_sleep()); + stack = alloc_stack(0, __uml_cant_sleep()); if (stack == 0) return -ENOMEM; diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c index fc0f2a9de..1dca4ffbd 100644 --- a/arch/um/os-Linux/util.c +++ b/arch/um/os-Linux/util.c @@ -173,23 +173,38 @@ __uml_setup("quiet", quiet_cmd_param, "quiet\n" " Turns off information messages during boot.\n\n"); +/* + * The os_info/os_warn functions will be called by helper threads. These + * have a very limited stack size and using the libc formatting functions + * may overflow the stack. + * So pull in the kernel vscnprintf and use that instead with a fixed + * on-stack buffer. + */ +int vscnprintf(char *buf, size_t size, const char *fmt, va_list args); + void os_info(const char *fmt, ...) { + char buf[256]; va_list list; + int len; if (quiet_info) return; va_start(list, fmt); - vfprintf(stderr, fmt, list); + len = vscnprintf(buf, sizeof(buf), fmt, list); + fwrite(buf, len, 1, stderr); va_end(list); } void os_warn(const char *fmt, ...) { + char buf[256]; va_list list; + int len; va_start(list, fmt); - vfprintf(stderr, fmt, list); + len = vscnprintf(buf, sizeof(buf), fmt, list); + fwrite(buf, len, 1, stderr); va_end(list); } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4c9bfc4be..5caa023e9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1982,6 +1982,23 @@ config EFI_STUB See Documentation/admin-guide/efi-stub.rst for more information. +config EFI_HANDOVER_PROTOCOL + bool "EFI handover protocol (DEPRECATED)" + depends on EFI_STUB + default y + help + Select this in order to include support for the deprecated EFI + handover protocol, which defines alternative entry points into the + EFI stub. This is a practice that has no basis in the UEFI + specification, and requires a priori knowledge on the part of the + bootloader about Linux/x86 specific ways of passing the command line + and initrd, and where in memory those assets may be loaded. + + If in doubt, say Y. Even though the corresponding support is not + present in upstream GRUB or other bootloaders, most distros build + GRUB with numerous downstream patches applied, and may rely on the + handover protocol as as result. + config EFI_MIXED bool "EFI mixed-mode support" depends on EFI_STUB && X86_64 @@ -2548,6 +2565,17 @@ config GDS_FORCE_MITIGATION If in doubt, say N. +config MITIGATION_RFDS + bool "RFDS Mitigation" + depends on CPU_SUP_INTEL + default y + help + Enable mitigation for Register File Data Sampling (RFDS) by default. + RFDS is a hardware vulnerability which affects Intel Atom CPUs. It + allows unprivileged speculative access to stale data previously + stored in floating point, vector and integer registers. + See also <file:Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst> + endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 542377cd4..ce5ed2c2d 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -375,7 +375,7 @@ config X86_CMOV config X86_MINIMUM_CPU_FAMILY int default "64" if X86_64 - default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8) + default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8) default "5" if X86_32 && X86_CMPXCHG64 default "4" diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 15b7b403a..3965b2c9e 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -74,6 +74,11 @@ LDFLAGS_vmlinux += -z noexecstack ifeq ($(CONFIG_LD_IS_BFD),y) LDFLAGS_vmlinux += $(call ld-option,--no-warn-rwx-segments) endif +ifeq ($(CONFIG_EFI_STUB),y) +# ensure that the static EFI stub library will be pulled in, even if it is +# never referenced explicitly from the startup code +LDFLAGS_vmlinux += -u efi_pe_entry +endif LDFLAGS_vmlinux += -T hostprogs := mkpiggy @@ -100,7 +105,7 @@ vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o ifdef CONFIG_X86_64 vmlinux-objs-y += $(obj)/ident_map_64.o vmlinux-objs-y += $(obj)/idt_64.o $(obj)/idt_handlers_64.o - vmlinux-objs-y += $(obj)/mem_encrypt.o + vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/mem_encrypt.o vmlinux-objs-y += $(obj)/pgtable_64.o vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev.o endif @@ -108,11 +113,11 @@ endif vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdcall.o -vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o -efi-obj-$(CONFIG_EFI_STUB) = $(objtree)/drivers/firmware/efi/libstub/lib.a +vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_mixed.o +vmlinux-objs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a -$(obj)/vmlinux: $(vmlinux-objs-y) $(efi-obj-y) FORCE +$(obj)/vmlinux: $(vmlinux-objs-y) FORCE $(call if_changed,ld) OBJCOPYFLAGS_vmlinux.bin := -R .comment -S diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c index 9caf89063..55c98fdd6 100644 --- a/arch/x86/boot/compressed/acpi.c +++ b/arch/x86/boot/compressed/acpi.c @@ -30,13 +30,13 @@ __efi_get_rsdp_addr(unsigned long cfg_tbl_pa, unsigned int cfg_tbl_len) * Search EFI system tables for RSDP. Preferred is ACPI_20_TABLE_GUID to * ACPI_TABLE_GUID because it has more features. */ - rsdp_addr = efi_find_vendor_table(boot_params, cfg_tbl_pa, cfg_tbl_len, + rsdp_addr = efi_find_vendor_table(boot_params_ptr, cfg_tbl_pa, cfg_tbl_len, ACPI_20_TABLE_GUID); if (rsdp_addr) return (acpi_physical_address)rsdp_addr; /* No ACPI_20_TABLE_GUID found, fallback to ACPI_TABLE_GUID. */ - rsdp_addr = efi_find_vendor_table(boot_params, cfg_tbl_pa, cfg_tbl_len, + rsdp_addr = efi_find_vendor_table(boot_params_ptr, cfg_tbl_pa, cfg_tbl_len, ACPI_TABLE_GUID); if (rsdp_addr) return (acpi_physical_address)rsdp_addr; @@ -56,15 +56,15 @@ static acpi_physical_address efi_get_rsdp_addr(void) enum efi_type et; int ret; - et = efi_get_type(boot_params); + et = efi_get_type(boot_params_ptr); if (et == EFI_TYPE_NONE) return 0; - systab_pa = efi_get_system_table(boot_params); + systab_pa = efi_get_system_table(boot_params_ptr); if (!systab_pa) error("EFI support advertised, but unable to locate system table."); - ret = efi_get_conf_table(boot_params, &cfg_tbl_pa, &cfg_tbl_len); + ret = efi_get_conf_table(boot_params_ptr, &cfg_tbl_pa, &cfg_tbl_len); if (ret || !cfg_tbl_pa) error("EFI config table not found."); @@ -156,7 +156,7 @@ acpi_physical_address get_rsdp_addr(void) { acpi_physical_address pa; - pa = boot_params->acpi_rsdp_addr; + pa = boot_params_ptr->acpi_rsdp_addr; if (!pa) pa = efi_get_rsdp_addr(); @@ -210,7 +210,7 @@ static unsigned long get_acpi_srat_table(void) rsdp = (struct acpi_table_rsdp *)get_cmdline_acpi_rsdp(); if (!rsdp) rsdp = (struct acpi_table_rsdp *)(long) - boot_params->acpi_rsdp_addr; + boot_params_ptr->acpi_rsdp_addr; if (!rsdp) return 0; diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c index f1add5d85..c1bb18097 100644 --- a/arch/x86/boot/compressed/cmdline.c +++ b/arch/x86/boot/compressed/cmdline.c @@ -14,9 +14,9 @@ static inline char rdfs8(addr_t addr) #include "../cmdline.c" unsigned long get_cmd_line_ptr(void) { - unsigned long cmd_line_ptr = boot_params->hdr.cmd_line_ptr; + unsigned long cmd_line_ptr = boot_params_ptr->hdr.cmd_line_ptr; - cmd_line_ptr |= (u64)boot_params->ext_cmd_line_ptr << 32; + cmd_line_ptr |= (u64)boot_params_ptr->ext_cmd_line_ptr << 32; return cmd_line_ptr; } diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S new file mode 100644 index 000000000..8232c5b2a --- /dev/null +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -0,0 +1,328 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming + * + * Early support for invoking 32-bit EFI services from a 64-bit kernel. + * + * Because this thunking occurs before ExitBootServices() we have to + * restore the firmware's 32-bit GDT and IDT before we make EFI service + * calls. + * + * On the plus side, we don't have to worry about mangling 64-bit + * addresses into 32-bits because we're executing with an identity + * mapped pagetable and haven't transitioned to 64-bit virtual addresses + * yet. + */ + +#include <linux/linkage.h> +#include <asm/msr.h> +#include <asm/page_types.h> +#include <asm/processor-flags.h> +#include <asm/segment.h> + + .code64 + .text +/* + * When booting in 64-bit mode on 32-bit EFI firmware, startup_64_mixed_mode() + * is the first thing that runs after switching to long mode. Depending on + * whether the EFI handover protocol or the compat entry point was used to + * enter the kernel, it will either branch to the common 64-bit EFI stub + * entrypoint efi_stub_entry() directly, or via the 64-bit EFI PE/COFF + * entrypoint efi_pe_entry(). In the former case, the bootloader must provide a + * struct bootparams pointer as the third argument, so the presence of such a + * pointer is used to disambiguate. + * + * +--------------+ + * +------------------+ +------------+ +------>| efi_pe_entry | + * | efi32_pe_entry |---->| | | +-----------+--+ + * +------------------+ | | +------+----------------+ | + * | startup_32 |---->| startup_64_mixed_mode | | + * +------------------+ | | +------+----------------+ | + * | efi32_stub_entry |---->| | | | + * +------------------+ +------------+ | | + * V | + * +------------+ +----------------+ | + * | startup_64 |<----| efi_stub_entry |<--------+ + * +------------+ +----------------+ + */ +SYM_FUNC_START(startup_64_mixed_mode) + lea efi32_boot_args(%rip), %rdx + mov 0(%rdx), %edi + mov 4(%rdx), %esi +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL + mov 8(%rdx), %edx // saved bootparams pointer + test %edx, %edx + jnz efi_stub_entry +#endif + /* + * efi_pe_entry uses MS calling convention, which requires 32 bytes of + * shadow space on the stack even if all arguments are passed in + * registers. We also need an additional 8 bytes for the space that + * would be occupied by the return address, and this also results in + * the correct stack alignment for entry. + */ + sub $40, %rsp + mov %rdi, %rcx // MS calling convention + mov %rsi, %rdx + jmp efi_pe_entry +SYM_FUNC_END(startup_64_mixed_mode) + +SYM_FUNC_START(__efi64_thunk) + push %rbp + push %rbx + + movl %ds, %eax + push %rax + movl %es, %eax + push %rax + movl %ss, %eax + push %rax + + /* Copy args passed on stack */ + movq 0x30(%rsp), %rbp + movq 0x38(%rsp), %rbx + movq 0x40(%rsp), %rax + + /* + * Convert x86-64 ABI params to i386 ABI + */ + subq $64, %rsp + movl %esi, 0x0(%rsp) + movl %edx, 0x4(%rsp) + movl %ecx, 0x8(%rsp) + movl %r8d, 0xc(%rsp) + movl %r9d, 0x10(%rsp) + movl %ebp, 0x14(%rsp) + movl %ebx, 0x18(%rsp) + movl %eax, 0x1c(%rsp) + + leaq 0x20(%rsp), %rbx + sgdt (%rbx) + sidt 16(%rbx) + + leaq 1f(%rip), %rbp + + /* + * Switch to IDT and GDT with 32-bit segments. These are the firmware + * GDT and IDT that were installed when the kernel started executing. + * The pointers were saved by the efi32_entry() routine below. + * + * Pass the saved DS selector to the 32-bit code, and use far return to + * restore the saved CS selector. + */ + lidt efi32_boot_idt(%rip) + lgdt efi32_boot_gdt(%rip) + + movzwl efi32_boot_ds(%rip), %edx + movzwq efi32_boot_cs(%rip), %rax + pushq %rax + leaq efi_enter32(%rip), %rax + pushq %rax + lretq + +1: addq $64, %rsp + movq %rdi, %rax + + pop %rbx + movl %ebx, %ss + pop %rbx + movl %ebx, %es + pop %rbx + movl %ebx, %ds + /* Clear out 32-bit selector from FS and GS */ + xorl %ebx, %ebx + movl %ebx, %fs + movl %ebx, %gs + + /* + * Convert 32-bit status code into 64-bit. + */ + roll $1, %eax + rorq $1, %rax + + pop %rbx + pop %rbp + RET +SYM_FUNC_END(__efi64_thunk) + + .code32 +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL +SYM_FUNC_START(efi32_stub_entry) + call 1f +1: popl %ecx + + /* Clear BSS */ + xorl %eax, %eax + leal (_bss - 1b)(%ecx), %edi + leal (_ebss - 1b)(%ecx), %ecx + subl %edi, %ecx + shrl $2, %ecx + cld + rep stosl + + add $0x4, %esp /* Discard return address */ + popl %ecx + popl %edx + popl %esi + jmp efi32_entry +SYM_FUNC_END(efi32_stub_entry) +#endif + +/* + * EFI service pointer must be in %edi. + * + * The stack should represent the 32-bit calling convention. + */ +SYM_FUNC_START_LOCAL(efi_enter32) + /* Load firmware selector into data and stack segment registers */ + movl %edx, %ds + movl %edx, %es + movl %edx, %fs + movl %edx, %gs + movl %edx, %ss + + /* Reload pgtables */ + movl %cr3, %eax + movl %eax, %cr3 + + /* Disable paging */ + movl %cr0, %eax + btrl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + + /* Disable long mode via EFER */ + movl $MSR_EFER, %ecx + rdmsr + btrl $_EFER_LME, %eax + wrmsr + + call *%edi + + /* We must preserve return value */ + movl %eax, %edi + + /* + * Some firmware will return with interrupts enabled. Be sure to + * disable them before we switch GDTs and IDTs. + */ + cli + + lidtl 16(%ebx) + lgdtl (%ebx) + + movl %cr4, %eax + btsl $(X86_CR4_PAE_BIT), %eax + movl %eax, %cr4 + + movl %cr3, %eax + movl %eax, %cr3 + + movl $MSR_EFER, %ecx + rdmsr + btsl $_EFER_LME, %eax + wrmsr + + xorl %eax, %eax + lldt %ax + + pushl $__KERNEL_CS + pushl %ebp + + /* Enable paging */ + movl %cr0, %eax + btsl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + lret +SYM_FUNC_END(efi_enter32) + +/* + * This is the common EFI stub entry point for mixed mode. + * + * Arguments: %ecx image handle + * %edx EFI system table pointer + * %esi struct bootparams pointer (or NULL when not using + * the EFI handover protocol) + * + * Since this is the point of no return for ordinary execution, no registers + * are considered live except for the function parameters. [Note that the EFI + * stub may still exit and return to the firmware using the Exit() EFI boot + * service.] + */ +SYM_FUNC_START_LOCAL(efi32_entry) + call 1f +1: pop %ebx + + /* Save firmware GDTR and code/data selectors */ + sgdtl (efi32_boot_gdt - 1b)(%ebx) + movw %cs, (efi32_boot_cs - 1b)(%ebx) + movw %ds, (efi32_boot_ds - 1b)(%ebx) + + /* Store firmware IDT descriptor */ + sidtl (efi32_boot_idt - 1b)(%ebx) + + /* Store boot arguments */ + leal (efi32_boot_args - 1b)(%ebx), %ebx + movl %ecx, 0(%ebx) + movl %edx, 4(%ebx) + movl %esi, 8(%ebx) + movb $0x0, 12(%ebx) // efi_is64 + + /* Disable paging */ + movl %cr0, %eax + btrl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + + jmp startup_32 +SYM_FUNC_END(efi32_entry) + +/* + * efi_status_t efi32_pe_entry(efi_handle_t image_handle, + * efi_system_table_32_t *sys_table) + */ +SYM_FUNC_START(efi32_pe_entry) + pushl %ebp + movl %esp, %ebp + pushl %ebx // save callee-save registers + pushl %edi + + call verify_cpu // check for long mode support + testl %eax, %eax + movl $0x80000003, %eax // EFI_UNSUPPORTED + jnz 2f + + movl 8(%ebp), %ecx // image_handle + movl 12(%ebp), %edx // sys_table + xorl %esi, %esi + jmp efi32_entry // pass %ecx, %edx, %esi + // no other registers remain live + +2: popl %edi // restore callee-save registers + popl %ebx + leave + RET +SYM_FUNC_END(efi32_pe_entry) + +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL + .org efi32_stub_entry + 0x200 + .code64 +SYM_FUNC_START_NOALIGN(efi64_stub_entry) + jmp efi_handover_entry +SYM_FUNC_END(efi64_stub_entry) +#endif + + .data + .balign 8 +SYM_DATA_START_LOCAL(efi32_boot_gdt) + .word 0 + .quad 0 +SYM_DATA_END(efi32_boot_gdt) + +SYM_DATA_START_LOCAL(efi32_boot_idt) + .word 0 + .quad 0 +SYM_DATA_END(efi32_boot_idt) + +SYM_DATA_LOCAL(efi32_boot_cs, .word 0) +SYM_DATA_LOCAL(efi32_boot_ds, .word 0) +SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0) +SYM_DATA(efi_is64, .byte 1) diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S deleted file mode 100644 index 67e7edcdf..000000000 --- a/arch/x86/boot/compressed/efi_thunk_64.S +++ /dev/null @@ -1,195 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming - * - * Early support for invoking 32-bit EFI services from a 64-bit kernel. - * - * Because this thunking occurs before ExitBootServices() we have to - * restore the firmware's 32-bit GDT and IDT before we make EFI service - * calls. - * - * On the plus side, we don't have to worry about mangling 64-bit - * addresses into 32-bits because we're executing with an identity - * mapped pagetable and haven't transitioned to 64-bit virtual addresses - * yet. - */ - -#include <linux/linkage.h> -#include <asm/msr.h> -#include <asm/page_types.h> -#include <asm/processor-flags.h> -#include <asm/segment.h> - - .code64 - .text -SYM_FUNC_START(__efi64_thunk) - push %rbp - push %rbx - - movl %ds, %eax - push %rax - movl %es, %eax - push %rax - movl %ss, %eax - push %rax - - /* Copy args passed on stack */ - movq 0x30(%rsp), %rbp - movq 0x38(%rsp), %rbx - movq 0x40(%rsp), %rax - - /* - * Convert x86-64 ABI params to i386 ABI - */ - subq $64, %rsp - movl %esi, 0x0(%rsp) - movl %edx, 0x4(%rsp) - movl %ecx, 0x8(%rsp) - movl %r8d, 0xc(%rsp) - movl %r9d, 0x10(%rsp) - movl %ebp, 0x14(%rsp) - movl %ebx, 0x18(%rsp) - movl %eax, 0x1c(%rsp) - - leaq 0x20(%rsp), %rbx - sgdt (%rbx) - - addq $16, %rbx - sidt (%rbx) - - leaq 1f(%rip), %rbp - - /* - * Switch to IDT and GDT with 32-bit segments. This is the firmware GDT - * and IDT that was installed when the kernel started executing. The - * pointers were saved at the EFI stub entry point in head_64.S. - * - * Pass the saved DS selector to the 32-bit code, and use far return to - * restore the saved CS selector. - */ - leaq efi32_boot_idt(%rip), %rax - lidt (%rax) - leaq efi32_boot_gdt(%rip), %rax - lgdt (%rax) - - movzwl efi32_boot_ds(%rip), %edx - movzwq efi32_boot_cs(%rip), %rax - pushq %rax - leaq efi_enter32(%rip), %rax - pushq %rax - lretq - -1: addq $64, %rsp - movq %rdi, %rax - - pop %rbx - movl %ebx, %ss - pop %rbx - movl %ebx, %es - pop %rbx - movl %ebx, %ds - /* Clear out 32-bit selector from FS and GS */ - xorl %ebx, %ebx - movl %ebx, %fs - movl %ebx, %gs - - /* - * Convert 32-bit status code into 64-bit. - */ - roll $1, %eax - rorq $1, %rax - - pop %rbx - pop %rbp - RET -SYM_FUNC_END(__efi64_thunk) - - .code32 -/* - * EFI service pointer must be in %edi. - * - * The stack should represent the 32-bit calling convention. - */ -SYM_FUNC_START_LOCAL(efi_enter32) - /* Load firmware selector into data and stack segment registers */ - movl %edx, %ds - movl %edx, %es - movl %edx, %fs - movl %edx, %gs - movl %edx, %ss - - /* Reload pgtables */ - movl %cr3, %eax - movl %eax, %cr3 - - /* Disable paging */ - movl %cr0, %eax - btrl $X86_CR0_PG_BIT, %eax - movl %eax, %cr0 - - /* Disable long mode via EFER */ - movl $MSR_EFER, %ecx - rdmsr - btrl $_EFER_LME, %eax - wrmsr - - call *%edi - - /* We must preserve return value */ - movl %eax, %edi - - /* - * Some firmware will return with interrupts enabled. Be sure to - * disable them before we switch GDTs and IDTs. - */ - cli - - lidtl (%ebx) - subl $16, %ebx - - lgdtl (%ebx) - - movl %cr4, %eax - btsl $(X86_CR4_PAE_BIT), %eax - movl %eax, %cr4 - - movl %cr3, %eax - movl %eax, %cr3 - - movl $MSR_EFER, %ecx - rdmsr - btsl $_EFER_LME, %eax - wrmsr - - xorl %eax, %eax - lldt %ax - - pushl $__KERNEL_CS - pushl %ebp - - /* Enable paging */ - movl %cr0, %eax - btsl $X86_CR0_PG_BIT, %eax - movl %eax, %cr0 - lret -SYM_FUNC_END(efi_enter32) - - .data - .balign 8 -SYM_DATA_START(efi32_boot_gdt) - .word 0 - .quad 0 -SYM_DATA_END(efi32_boot_gdt) - -SYM_DATA_START(efi32_boot_idt) - .word 0 - .quad 0 -SYM_DATA_END(efi32_boot_idt) - -SYM_DATA_START(efi32_boot_cs) - .word 0 -SYM_DATA_END(efi32_boot_cs) - -SYM_DATA_START(efi32_boot_ds) - .word 0 -SYM_DATA_END(efi32_boot_ds) diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 3b354eb95..1cfe9802a 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -84,19 +84,6 @@ SYM_FUNC_START(startup_32) #ifdef CONFIG_RELOCATABLE leal startup_32@GOTOFF(%edx), %ebx - -#ifdef CONFIG_EFI_STUB -/* - * If we were loaded via the EFI LoadImage service, startup_32() will be at an - * offset to the start of the space allocated for the image. efi_pe_entry() will - * set up image_offset to tell us where the image actually starts, so that we - * can use the full available buffer. - * image_offset = startup_32 - image_base - * Otherwise image_offset will be zero and has no effect on the calculations. - */ - subl image_offset@GOTOFF(%edx), %ebx -#endif - movl BP_kernel_alignment(%esi), %eax decl %eax addl %eax, %ebx @@ -150,17 +137,6 @@ SYM_FUNC_START(startup_32) jmp *%eax SYM_FUNC_END(startup_32) -#ifdef CONFIG_EFI_STUB -SYM_FUNC_START(efi32_stub_entry) - add $0x4, %esp - movl 8(%esp), %esi /* save boot_params pointer */ - call efi_main - /* efi_main returns the possibly relocated address of startup_32 */ - jmp *%eax -SYM_FUNC_END(efi32_stub_entry) -SYM_FUNC_ALIAS(efi_stub_entry, efi32_stub_entry) -#endif - .text SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) @@ -179,15 +155,9 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) */ /* push arguments for extract_kernel: */ - pushl output_len@GOTOFF(%ebx) /* decompressed length, end of relocs */ pushl %ebp /* output address */ - pushl input_len@GOTOFF(%ebx) /* input_len */ - leal input_data@GOTOFF(%ebx), %eax - pushl %eax /* input_data */ - leal boot_heap@GOTOFF(%ebx), %eax - pushl %eax /* heap area */ pushl %esi /* real mode pointer */ - call extract_kernel /* returns kernel location in %eax */ + call extract_kernel /* returns kernel entry point in %eax */ addl $24, %esp /* @@ -208,17 +178,11 @@ SYM_DATA_START_LOCAL(gdt) .quad 0x00cf92000000ffff /* __KERNEL_DS */ SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end) -#ifdef CONFIG_EFI_STUB -SYM_DATA(image_offset, .long 0) -#endif - /* * Stack and heap for uncompression */ .bss .balign 4 -boot_heap: - .fill BOOT_HEAP_SIZE, 1, 0 boot_stack: .fill BOOT_STACK_SIZE, 1, 0 boot_stack_end: diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index b4bd6df29..0d7aef10b 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -118,7 +118,9 @@ SYM_FUNC_START(startup_32) 1: /* Setup Exception handling for SEV-ES */ +#ifdef CONFIG_AMD_MEM_ENCRYPT call startup32_load_idt +#endif /* Make sure cpu supports long mode. */ call verify_cpu @@ -136,19 +138,6 @@ SYM_FUNC_START(startup_32) #ifdef CONFIG_RELOCATABLE movl %ebp, %ebx - -#ifdef CONFIG_EFI_STUB -/* - * If we were loaded via the EFI LoadImage service, startup_32 will be at an - * offset to the start of the space allocated for the image. efi_pe_entry will - * set up image_offset to tell us where the image actually starts, so that we - * can use the full available buffer. - * image_offset = startup_32 - image_base - * Otherwise image_offset will be zero and has no effect on the calculations. - */ - subl rva(image_offset)(%ebp), %ebx -#endif - movl BP_kernel_alignment(%esi), %eax decl %eax addl %eax, %ebx @@ -178,12 +167,13 @@ SYM_FUNC_START(startup_32) */ /* * If SEV is active then set the encryption mask in the page tables. - * This will insure that when the kernel is copied and decompressed + * This will ensure that when the kernel is copied and decompressed * it will be done so encrypted. */ - call get_sev_encryption_bit xorl %edx, %edx #ifdef CONFIG_AMD_MEM_ENCRYPT + call get_sev_encryption_bit + xorl %edx, %edx testl %eax, %eax jz 1f subl $32, %eax /* Encryption bit is always above bit 31 */ @@ -249,6 +239,11 @@ SYM_FUNC_START(startup_32) movl $__BOOT_TSS, %eax ltr %ax +#ifdef CONFIG_AMD_MEM_ENCRYPT + /* Check if the C-bit position is correct when SEV is active */ + call startup32_check_sev_cbit +#endif + /* * Setup for the jump to 64bit mode * @@ -261,29 +256,11 @@ SYM_FUNC_START(startup_32) */ leal rva(startup_64)(%ebp), %eax #ifdef CONFIG_EFI_MIXED - movl rva(efi32_boot_args)(%ebp), %edi - testl %edi, %edi - jz 1f - leal rva(efi64_stub_entry)(%ebp), %eax - movl rva(efi32_boot_args+4)(%ebp), %esi - movl rva(efi32_boot_args+8)(%ebp), %edx // saved bootparams pointer - testl %edx, %edx - jnz 1f - /* - * efi_pe_entry uses MS calling convention, which requires 32 bytes of - * shadow space on the stack even if all arguments are passed in - * registers. We also need an additional 8 bytes for the space that - * would be occupied by the return address, and this also results in - * the correct stack alignment for entry. - */ - subl $40, %esp - leal rva(efi_pe_entry)(%ebp), %eax - movl %edi, %ecx // MS calling convention - movl %esi, %edx + cmpb $1, rva(efi_is64)(%ebp) + je 1f + leal rva(startup_64_mixed_mode)(%ebp), %eax 1: #endif - /* Check if the C-bit position is correct when SEV is active */ - call startup32_check_sev_cbit pushl $__KERNEL_CS pushl %eax @@ -296,41 +273,6 @@ SYM_FUNC_START(startup_32) lret SYM_FUNC_END(startup_32) -#ifdef CONFIG_EFI_MIXED - .org 0x190 -SYM_FUNC_START(efi32_stub_entry) - add $0x4, %esp /* Discard return address */ - popl %ecx - popl %edx - popl %esi - - call 1f -1: pop %ebp - subl $ rva(1b), %ebp - - movl %esi, rva(efi32_boot_args+8)(%ebp) -SYM_INNER_LABEL(efi32_pe_stub_entry, SYM_L_LOCAL) - movl %ecx, rva(efi32_boot_args)(%ebp) - movl %edx, rva(efi32_boot_args+4)(%ebp) - movb $0, rva(efi_is64)(%ebp) - - /* Save firmware GDTR and code/data selectors */ - sgdtl rva(efi32_boot_gdt)(%ebp) - movw %cs, rva(efi32_boot_cs)(%ebp) - movw %ds, rva(efi32_boot_ds)(%ebp) - - /* Store firmware IDT descriptor */ - sidtl rva(efi32_boot_idt)(%ebp) - - /* Disable paging */ - movl %cr0, %eax - btrl $X86_CR0_PG_BIT, %eax - movl %eax, %cr0 - - jmp startup_32 -SYM_FUNC_END(efi32_stub_entry) -#endif - .code64 .org 0x200 SYM_CODE_START(startup_64) @@ -372,20 +314,6 @@ SYM_CODE_START(startup_64) /* Start with the delta to where the kernel will run at. */ #ifdef CONFIG_RELOCATABLE leaq startup_32(%rip) /* - $startup_32 */, %rbp - -#ifdef CONFIG_EFI_STUB -/* - * If we were loaded via the EFI LoadImage service, startup_32 will be at an - * offset to the start of the space allocated for the image. efi_pe_entry will - * set up image_offset to tell us where the image actually starts, so that we - * can use the full available buffer. - * image_offset = startup_32 - image_base - * Otherwise image_offset will be zero and has no effect on the calculations. - */ - movl image_offset(%rip), %eax - subq %rax, %rbp -#endif - movl BP_kernel_alignment(%rsi), %eax decl %eax addq %rax, %rbp @@ -424,10 +352,6 @@ SYM_CODE_START(startup_64) * For the trampoline, we need the top page table to reside in lower * memory as we don't have a way to load 64-bit values into CR3 in * 32-bit mode. - * - * We go though the trampoline even if we don't have to: if we're - * already in a desired paging mode. This way the trampoline code gets - * tested on every boot. */ /* Make sure we have GDT with 32-bit code segment */ @@ -442,10 +366,14 @@ SYM_CODE_START(startup_64) lretq .Lon_kernel_cs: + /* + * RSI holds a pointer to a boot_params structure provided by the + * loader, and this needs to be preserved across C function calls. So + * move it into a callee saved register. + */ + movq %rsi, %r15 - pushq %rsi call load_stage1_idt - popq %rsi #ifdef CONFIG_AMD_MEM_ENCRYPT /* @@ -456,82 +384,24 @@ SYM_CODE_START(startup_64) * CPUID instructions being issued, so go ahead and do that now via * sev_enable(), which will also handle the rest of the SEV-related * detection/setup to ensure that has been done in advance of any dependent - * code. + * code. Pass the boot_params pointer as the first argument. */ - pushq %rsi - movq %rsi, %rdi /* real mode address */ + movq %r15, %rdi call sev_enable - popq %rsi #endif /* - * paging_prepare() sets up the trampoline and checks if we need to - * enable 5-level paging. - * - * paging_prepare() returns a two-quadword structure which lands - * into RDX:RAX: - * - Address of the trampoline is returned in RAX. - * - Non zero RDX means trampoline needs to enable 5-level - * paging. - * - * RSI holds real mode data and needs to be preserved across - * this function call. - */ - pushq %rsi - movq %rsi, %rdi /* real mode address */ - call paging_prepare - popq %rsi - - /* Save the trampoline address in RCX */ - movq %rax, %rcx - - /* Set up 32-bit addressable stack */ - leaq TRAMPOLINE_32BIT_STACK_END(%rcx), %rsp - - /* - * Preserve live 64-bit registers on the stack: this is necessary - * because the architecture does not guarantee that GPRs will retain - * their full 64-bit values across a 32-bit mode switch. - */ - pushq %rbp - pushq %rbx - pushq %rsi - - /* - * Push the 64-bit address of trampoline_return() onto the new stack. - * It will be used by the trampoline to return to the main code. Due to - * the 32-bit mode switch, it cannot be kept it in a register either. - */ - leaq trampoline_return(%rip), %rdi - pushq %rdi - - /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ - pushq $__KERNEL32_CS - leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax - pushq %rax - lretq -trampoline_return: - /* Restore live 64-bit registers */ - popq %rsi - popq %rbx - popq %rbp - - /* Restore the stack, the 32-bit trampoline uses its own stack */ - leaq rva(boot_stack_end)(%rbx), %rsp - - /* - * cleanup_trampoline() would restore trampoline memory. - * - * RDI is address of the page table to use instead of page table - * in trampoline memory (if required). + * configure_5level_paging() updates the number of paging levels using + * a trampoline in 32-bit addressable memory if the current number does + * not match the desired number. * - * RSI holds real mode data and needs to be preserved across - * this function call. + * Pass the boot_params pointer as the first argument. The second + * argument is the relocated address of the page table to use instead + * of the page table in trampoline memory (if required). */ - pushq %rsi - leaq rva(top_pgtable)(%rbx), %rdi - call cleanup_trampoline - popq %rsi + movq %r15, %rdi + leaq rva(top_pgtable)(%rbx), %rsi + call configure_5level_paging /* Zero EFLAGS */ pushq $0 @@ -541,7 +411,6 @@ trampoline_return: * Copy the compressed kernel to the end of our buffer * where decompression in place becomes safe. */ - pushq %rsi leaq (_bss-8)(%rip), %rsi leaq rva(_bss-8)(%rbx), %rdi movl $(_bss - startup_32), %ecx @@ -549,7 +418,6 @@ trampoline_return: std rep movsq cld - popq %rsi /* * The GDT may get overwritten either during the copy we just did or @@ -568,19 +436,6 @@ trampoline_return: jmp *%rax SYM_CODE_END(startup_64) -#ifdef CONFIG_EFI_STUB - .org 0x390 -SYM_FUNC_START(efi64_stub_entry) - and $~0xf, %rsp /* realign the stack */ - movq %rdx, %rbx /* save boot_params pointer */ - call efi_main - movq %rbx,%rsi - leaq rva(startup_64)(%rax), %rax - jmp *%rax -SYM_FUNC_END(efi64_stub_entry) -SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry) -#endif - .text SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) @@ -594,125 +449,122 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) shrq $3, %rcx rep stosq - pushq %rsi call load_stage2_idt /* Pass boot_params to initialize_identity_maps() */ - movq (%rsp), %rdi + movq %r15, %rdi call initialize_identity_maps - popq %rsi /* * Do the extraction, and jump to the new kernel.. */ - pushq %rsi /* Save the real mode argument */ - movq %rsi, %rdi /* real mode address */ - leaq boot_heap(%rip), %rsi /* malloc area for uncompression */ - leaq input_data(%rip), %rdx /* input_data */ - movl input_len(%rip), %ecx /* input_len */ - movq %rbp, %r8 /* output target address */ - movl output_len(%rip), %r9d /* decompressed length, end of relocs */ - call extract_kernel /* returns kernel location in %rax */ - popq %rsi + /* pass struct boot_params pointer and output target address */ + movq %r15, %rdi + movq %rbp, %rsi + call extract_kernel /* returns kernel entry point in %rax */ /* * Jump to the decompressed kernel. */ + movq %r15, %rsi jmp *%rax SYM_FUNC_END(.Lrelocated) - .code32 /* - * This is the 32-bit trampoline that will be copied over to low memory. + * This is the 32-bit trampoline that will be copied over to low memory. It + * will be called using the ordinary 64-bit calling convention from code + * running in 64-bit mode. * * Return address is at the top of the stack (might be above 4G). - * ECX contains the base address of the trampoline memory. - * Non zero RDX means trampoline needs to enable 5-level paging. + * The first argument (EDI) contains the address of the temporary PGD level + * page table in 32-bit addressable memory which will be programmed into + * register CR3. */ + .section ".rodata", "a", @progbits SYM_CODE_START(trampoline_32bit_src) - /* Set up data and stack segments */ - movl $__KERNEL_DS, %eax - movl %eax, %ds - movl %eax, %ss + /* + * Preserve callee save 64-bit registers on the stack: this is + * necessary because the architecture does not guarantee that GPRs will + * retain their full 64-bit values across a 32-bit mode switch. + */ + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbp + pushq %rbx + + /* Preserve top half of RSP in a legacy mode GPR to avoid truncation */ + movq %rsp, %rbx + shrq $32, %rbx + /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ + pushq $__KERNEL32_CS + leaq 0f(%rip), %rax + pushq %rax + lretq + + /* + * The 32-bit code below will do a far jump back to long mode and end + * up here after reconfiguring the number of paging levels. First, the + * stack pointer needs to be restored to its full 64-bit value before + * the callee save register contents can be popped from the stack. + */ +.Lret: + shlq $32, %rbx + orq %rbx, %rsp + + /* Restore the preserved 64-bit registers */ + popq %rbx + popq %rbp + popq %r12 + popq %r13 + popq %r14 + popq %r15 + retq + + .code32 +0: /* Disable paging */ movl %cr0, %eax btrl $X86_CR0_PG_BIT, %eax movl %eax, %cr0 - /* Check what paging mode we want to be in after the trampoline */ - testl %edx, %edx - jz 1f - - /* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */ - movl %cr4, %eax - testl $X86_CR4_LA57, %eax - jnz 3f - jmp 2f -1: - /* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */ - movl %cr4, %eax - testl $X86_CR4_LA57, %eax - jz 3f -2: /* Point CR3 to the trampoline's new top level page table */ - leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax - movl %eax, %cr3 -3: + movl %edi, %cr3 + /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */ - pushl %ecx - pushl %edx movl $MSR_EFER, %ecx rdmsr btsl $_EFER_LME, %eax /* Avoid writing EFER if no change was made (for TDX guest) */ jc 1f wrmsr -1: popl %edx - popl %ecx - -#ifdef CONFIG_X86_MCE - /* - * Preserve CR4.MCE if the kernel will enable #MC support. - * Clearing MCE may fault in some environments (that also force #MC - * support). Any machine check that occurs before #MC support is fully - * configured will crash the system regardless of the CR4.MCE value set - * here. - */ - movl %cr4, %eax - andl $X86_CR4_MCE, %eax -#else - movl $0, %eax -#endif - - /* Enable PAE and LA57 (if required) paging modes */ - orl $X86_CR4_PAE, %eax - testl %edx, %edx - jz 1f - orl $X86_CR4_LA57, %eax 1: + /* Toggle CR4.LA57 */ + movl %cr4, %eax + btcl $X86_CR4_LA57_BIT, %eax movl %eax, %cr4 - /* Calculate address of paging_enabled() once we are executing in the trampoline */ - leal .Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax - - /* Prepare the stack for far return to Long Mode */ - pushl $__KERNEL_CS - pushl %eax - /* Enable paging again. */ movl %cr0, %eax btsl $X86_CR0_PG_BIT, %eax movl %eax, %cr0 - lret + /* + * Return to the 64-bit calling code using LJMP rather than LRET, to + * avoid the need for a 32-bit addressable stack. The destination + * address will be adjusted after the template code is copied into a + * 32-bit addressable buffer. + */ +.Ljmp: ljmpl $__KERNEL_CS, $(.Lret - trampoline_32bit_src) SYM_CODE_END(trampoline_32bit_src) - .code64 -SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled) - /* Return from the trampoline */ - retq -SYM_FUNC_END(.Lpaging_enabled) +/* + * This symbol is placed right after trampoline_32bit_src() so its address can + * be used to infer the size of the trampoline code. + */ +SYM_DATA(trampoline_ljmp_imm_offset, .word .Ljmp + 1 - trampoline_32bit_src) /* * The trampoline code has a size limit. @@ -721,7 +573,7 @@ SYM_FUNC_END(.Lpaging_enabled) */ .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE - .code32 + .text SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode) /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */ 1: @@ -729,6 +581,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode) jmp 1b SYM_FUNC_END(.Lno_longmode) + .globl verify_cpu #include "../../kernel/verify_cpu.S" .data @@ -760,249 +613,11 @@ SYM_DATA_START(boot_idt) .endr SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end) -#ifdef CONFIG_AMD_MEM_ENCRYPT -SYM_DATA_START(boot32_idt_desc) - .word boot32_idt_end - boot32_idt - 1 - .long 0 -SYM_DATA_END(boot32_idt_desc) - .balign 8 -SYM_DATA_START(boot32_idt) - .rept 32 - .quad 0 - .endr -SYM_DATA_END_LABEL(boot32_idt, SYM_L_GLOBAL, boot32_idt_end) -#endif - -#ifdef CONFIG_EFI_STUB -SYM_DATA(image_offset, .long 0) -#endif -#ifdef CONFIG_EFI_MIXED -SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0) -SYM_DATA(efi_is64, .byte 1) - -#define ST32_boottime 60 // offsetof(efi_system_table_32_t, boottime) -#define BS32_handle_protocol 88 // offsetof(efi_boot_services_32_t, handle_protocol) -#define LI32_image_base 32 // offsetof(efi_loaded_image_32_t, image_base) - - __HEAD - .code32 -SYM_FUNC_START(efi32_pe_entry) -/* - * efi_status_t efi32_pe_entry(efi_handle_t image_handle, - * efi_system_table_32_t *sys_table) - */ - - pushl %ebp - movl %esp, %ebp - pushl %eax // dummy push to allocate loaded_image - - pushl %ebx // save callee-save registers - pushl %edi - - call verify_cpu // check for long mode support - testl %eax, %eax - movl $0x80000003, %eax // EFI_UNSUPPORTED - jnz 2f - - call 1f -1: pop %ebx - subl $ rva(1b), %ebx - - /* Get the loaded image protocol pointer from the image handle */ - leal -4(%ebp), %eax - pushl %eax // &loaded_image - leal rva(loaded_image_proto)(%ebx), %eax - pushl %eax // pass the GUID address - pushl 8(%ebp) // pass the image handle - - /* - * Note the alignment of the stack frame. - * sys_table - * handle <-- 16-byte aligned on entry by ABI - * return address - * frame pointer - * loaded_image <-- local variable - * saved %ebx <-- 16-byte aligned here - * saved %edi - * &loaded_image - * &loaded_image_proto - * handle <-- 16-byte aligned for call to handle_protocol - */ - - movl 12(%ebp), %eax // sys_table - movl ST32_boottime(%eax), %eax // sys_table->boottime - call *BS32_handle_protocol(%eax) // sys_table->boottime->handle_protocol - addl $12, %esp // restore argument space - testl %eax, %eax - jnz 2f - - movl 8(%ebp), %ecx // image_handle - movl 12(%ebp), %edx // sys_table - movl -4(%ebp), %esi // loaded_image - movl LI32_image_base(%esi), %esi // loaded_image->image_base - movl %ebx, %ebp // startup_32 for efi32_pe_stub_entry - /* - * We need to set the image_offset variable here since startup_32() will - * use it before we get to the 64-bit efi_pe_entry() in C code. - */ - subl %esi, %ebx - movl %ebx, rva(image_offset)(%ebp) // save image_offset - jmp efi32_pe_stub_entry - -2: popl %edi // restore callee-save registers - popl %ebx - leave - RET -SYM_FUNC_END(efi32_pe_entry) - - .section ".rodata" - /* EFI loaded image protocol GUID */ - .balign 4 -SYM_DATA_START_LOCAL(loaded_image_proto) - .long 0x5b1b31a1 - .word 0x9562, 0x11d2 - .byte 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b -SYM_DATA_END(loaded_image_proto) -#endif - -#ifdef CONFIG_AMD_MEM_ENCRYPT - __HEAD - .code32 -/* - * Write an IDT entry into boot32_idt - * - * Parameters: - * - * %eax: Handler address - * %edx: Vector number - * - * Physical offset is expected in %ebp - */ -SYM_FUNC_START(startup32_set_idt_entry) - push %ebx - push %ecx - - /* IDT entry address to %ebx */ - leal rva(boot32_idt)(%ebp), %ebx - shl $3, %edx - addl %edx, %ebx - - /* Build IDT entry, lower 4 bytes */ - movl %eax, %edx - andl $0x0000ffff, %edx # Target code segment offset [15:0] - movl $__KERNEL32_CS, %ecx # Target code segment selector - shl $16, %ecx - orl %ecx, %edx - - /* Store lower 4 bytes to IDT */ - movl %edx, (%ebx) - - /* Build IDT entry, upper 4 bytes */ - movl %eax, %edx - andl $0xffff0000, %edx # Target code segment offset [31:16] - orl $0x00008e00, %edx # Present, Type 32-bit Interrupt Gate - - /* Store upper 4 bytes to IDT */ - movl %edx, 4(%ebx) - - pop %ecx - pop %ebx - RET -SYM_FUNC_END(startup32_set_idt_entry) -#endif - -SYM_FUNC_START(startup32_load_idt) -#ifdef CONFIG_AMD_MEM_ENCRYPT - /* #VC handler */ - leal rva(startup32_vc_handler)(%ebp), %eax - movl $X86_TRAP_VC, %edx - call startup32_set_idt_entry - - /* Load IDT */ - leal rva(boot32_idt)(%ebp), %eax - movl %eax, rva(boot32_idt_desc+2)(%ebp) - lidt rva(boot32_idt_desc)(%ebp) -#endif - RET -SYM_FUNC_END(startup32_load_idt) - -/* - * Check for the correct C-bit position when the startup_32 boot-path is used. - * - * The check makes use of the fact that all memory is encrypted when paging is - * disabled. The function creates 64 bits of random data using the RDRAND - * instruction. RDRAND is mandatory for SEV guests, so always available. If the - * hypervisor violates that the kernel will crash right here. - * - * The 64 bits of random data are stored to a memory location and at the same - * time kept in the %eax and %ebx registers. Since encryption is always active - * when paging is off the random data will be stored encrypted in main memory. - * - * Then paging is enabled. When the C-bit position is correct all memory is - * still mapped encrypted and comparing the register values with memory will - * succeed. An incorrect C-bit position will map all memory unencrypted, so that - * the compare will use the encrypted random data and fail. - */ -SYM_FUNC_START(startup32_check_sev_cbit) -#ifdef CONFIG_AMD_MEM_ENCRYPT - pushl %eax - pushl %ebx - pushl %ecx - pushl %edx - - /* Check for non-zero sev_status */ - movl rva(sev_status)(%ebp), %eax - testl %eax, %eax - jz 4f - - /* - * Get two 32-bit random values - Don't bail out if RDRAND fails - * because it is better to prevent forward progress if no random value - * can be gathered. - */ -1: rdrand %eax - jnc 1b -2: rdrand %ebx - jnc 2b - - /* Store to memory and keep it in the registers */ - movl %eax, rva(sev_check_data)(%ebp) - movl %ebx, rva(sev_check_data+4)(%ebp) - - /* Enable paging to see if encryption is active */ - movl %cr0, %edx /* Backup %cr0 in %edx */ - movl $(X86_CR0_PG | X86_CR0_PE), %ecx /* Enable Paging and Protected mode */ - movl %ecx, %cr0 - - cmpl %eax, rva(sev_check_data)(%ebp) - jne 3f - cmpl %ebx, rva(sev_check_data+4)(%ebp) - jne 3f - - movl %edx, %cr0 /* Restore previous %cr0 */ - - jmp 4f - -3: /* Check failed - hlt the machine */ - hlt - jmp 3b - -4: - popl %edx - popl %ecx - popl %ebx - popl %eax -#endif - RET -SYM_FUNC_END(startup32_check_sev_cbit) - /* * Stack and heap for uncompression */ .bss .balign 4 -SYM_DATA_LOCAL(boot_heap, .fill BOOT_HEAP_SIZE, 1, 0) - SYM_DATA_START_LOCAL(boot_stack) .fill BOOT_STACK_SIZE, 1, 0 .balign 16 diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index b4155273d..b8c42339b 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -167,8 +167,9 @@ void initialize_identity_maps(void *rmode) * or does not touch all the pages covering them. */ kernel_add_identity_map((unsigned long)_head, (unsigned long)_end); - boot_params = rmode; - kernel_add_identity_map((unsigned long)boot_params, (unsigned long)(boot_params + 1)); + boot_params_ptr = rmode; + kernel_add_identity_map((unsigned long)boot_params_ptr, + (unsigned long)(boot_params_ptr + 1)); cmdline = get_cmd_line_ptr(); kernel_add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE); @@ -176,7 +177,7 @@ void initialize_identity_maps(void *rmode) * Also map the setup_data entries passed via boot_params in case they * need to be accessed by uncompressed kernel via the identity mapping. */ - sd = (struct setup_data *)boot_params->hdr.setup_data; + sd = (struct setup_data *)boot_params_ptr->hdr.setup_data; while (sd) { unsigned long sd_addr = (unsigned long)sd; @@ -393,3 +394,8 @@ void do_boot_page_fault(struct pt_regs *regs, unsigned long error_code) */ kernel_add_identity_map(address, end); } + +void do_boot_nmi_trap(struct pt_regs *regs, unsigned long error_code) +{ + /* Empty handler to ignore NMI during early boot */ +} diff --git a/arch/x86/boot/compressed/idt_64.c b/arch/x86/boot/compressed/idt_64.c index 3cdf94b41..d100284bb 100644 --- a/arch/x86/boot/compressed/idt_64.c +++ b/arch/x86/boot/compressed/idt_64.c @@ -61,6 +61,7 @@ void load_stage2_idt(void) boot_idt_desc.address = (unsigned long)boot_idt; set_idt_entry(X86_TRAP_PF, boot_page_fault); + set_idt_entry(X86_TRAP_NMI, boot_nmi_trap); #ifdef CONFIG_AMD_MEM_ENCRYPT /* diff --git a/arch/x86/boot/compressed/idt_handlers_64.S b/arch/x86/boot/compressed/idt_handlers_64.S index 22890e199..4d03c8562 100644 --- a/arch/x86/boot/compressed/idt_handlers_64.S +++ b/arch/x86/boot/compressed/idt_handlers_64.S @@ -70,6 +70,7 @@ SYM_FUNC_END(\name) .code64 EXCEPTION_HANDLER boot_page_fault do_boot_page_fault error_code=1 +EXCEPTION_HANDLER boot_nmi_trap do_boot_nmi_trap error_code=0 #ifdef CONFIG_AMD_MEM_ENCRYPT EXCEPTION_HANDLER boot_stage1_vc do_vc_no_ghcb error_code=1 diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index e476bcbd9..9794d9174 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -63,7 +63,7 @@ static unsigned long get_boot_seed(void) unsigned long hash = 0; hash = rotate_xor(hash, build_str, sizeof(build_str)); - hash = rotate_xor(hash, boot_params, sizeof(*boot_params)); + hash = rotate_xor(hash, boot_params_ptr, sizeof(*boot_params_ptr)); return hash; } @@ -383,7 +383,7 @@ static void handle_mem_options(void) static void mem_avoid_init(unsigned long input, unsigned long input_size, unsigned long output) { - unsigned long init_size = boot_params->hdr.init_size; + unsigned long init_size = boot_params_ptr->hdr.init_size; u64 initrd_start, initrd_size; unsigned long cmd_line, cmd_line_size; @@ -395,10 +395,10 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input; /* Avoid initrd. */ - initrd_start = (u64)boot_params->ext_ramdisk_image << 32; - initrd_start |= boot_params->hdr.ramdisk_image; - initrd_size = (u64)boot_params->ext_ramdisk_size << 32; - initrd_size |= boot_params->hdr.ramdisk_size; + initrd_start = (u64)boot_params_ptr->ext_ramdisk_image << 32; + initrd_start |= boot_params_ptr->hdr.ramdisk_image; + initrd_size = (u64)boot_params_ptr->ext_ramdisk_size << 32; + initrd_size |= boot_params_ptr->hdr.ramdisk_size; mem_avoid[MEM_AVOID_INITRD].start = initrd_start; mem_avoid[MEM_AVOID_INITRD].size = initrd_size; /* No need to set mapping for initrd, it will be handled in VO. */ @@ -413,8 +413,8 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, } /* Avoid boot parameters. */ - mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params; - mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params); + mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params_ptr; + mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params_ptr); /* We don't need to set a mapping for setup_data. */ @@ -447,7 +447,7 @@ static bool mem_avoid_overlap(struct mem_vector *img, } /* Avoid all entries in the setup_data linked list. */ - ptr = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data; + ptr = (struct setup_data *)(unsigned long)boot_params_ptr->hdr.setup_data; while (ptr) { struct mem_vector avoid; @@ -679,7 +679,7 @@ static bool process_mem_region(struct mem_vector *region, static bool process_efi_entries(unsigned long minimum, unsigned long image_size) { - struct efi_info *e = &boot_params->efi_info; + struct efi_info *e = &boot_params_ptr->efi_info; bool efi_mirror_found = false; struct mem_vector region; efi_memory_desc_t *md; @@ -761,8 +761,8 @@ static void process_e820_entries(unsigned long minimum, struct boot_e820_entry *entry; /* Verify potential e820 positions, appending to slots list. */ - for (i = 0; i < boot_params->e820_entries; i++) { - entry = &boot_params->e820_table[i]; + for (i = 0; i < boot_params_ptr->e820_entries; i++) { + entry = &boot_params_ptr->e820_table[i]; /* Skip non-RAM entries. */ if (entry->type != E820_TYPE_RAM) continue; @@ -836,7 +836,7 @@ void choose_random_location(unsigned long input, return; } - boot_params->hdr.loadflags |= KASLR_FLAG; + boot_params_ptr->hdr.loadflags |= KASLR_FLAG; if (IS_ENABLED(CONFIG_X86_32)) mem_limit = KERNEL_IMAGE_SIZE; diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S index a73e4d783..32f7cc8a8 100644 --- a/arch/x86/boot/compressed/mem_encrypt.S +++ b/arch/x86/boot/compressed/mem_encrypt.S @@ -12,16 +12,13 @@ #include <asm/processor-flags.h> #include <asm/msr.h> #include <asm/asm-offsets.h> +#include <asm/segment.h> +#include <asm/trapnr.h> .text .code32 SYM_FUNC_START(get_sev_encryption_bit) - xor %eax, %eax - -#ifdef CONFIG_AMD_MEM_ENCRYPT push %ebx - push %ecx - push %edx movl $0x80000000, %eax /* CPUID to check the highest leaf */ cpuid @@ -52,12 +49,7 @@ SYM_FUNC_START(get_sev_encryption_bit) xor %eax, %eax .Lsev_exit: - pop %edx - pop %ecx pop %ebx - -#endif /* CONFIG_AMD_MEM_ENCRYPT */ - RET SYM_FUNC_END(get_sev_encryption_bit) @@ -98,7 +90,7 @@ SYM_CODE_START_LOCAL(sev_es_req_cpuid) jmp 1b SYM_CODE_END(sev_es_req_cpuid) -SYM_CODE_START(startup32_vc_handler) +SYM_CODE_START_LOCAL(startup32_vc_handler) pushl %eax pushl %ebx pushl %ecx @@ -184,15 +176,149 @@ SYM_CODE_START(startup32_vc_handler) jmp .Lfail SYM_CODE_END(startup32_vc_handler) +/* + * Write an IDT entry into boot32_idt + * + * Parameters: + * + * %eax: Handler address + * %edx: Vector number + * %ecx: IDT address + */ +SYM_FUNC_START_LOCAL(startup32_set_idt_entry) + /* IDT entry address to %ecx */ + leal (%ecx, %edx, 8), %ecx + + /* Build IDT entry, lower 4 bytes */ + movl %eax, %edx + andl $0x0000ffff, %edx # Target code segment offset [15:0] + orl $(__KERNEL32_CS << 16), %edx # Target code segment selector + + /* Store lower 4 bytes to IDT */ + movl %edx, (%ecx) + + /* Build IDT entry, upper 4 bytes */ + movl %eax, %edx + andl $0xffff0000, %edx # Target code segment offset [31:16] + orl $0x00008e00, %edx # Present, Type 32-bit Interrupt Gate + + /* Store upper 4 bytes to IDT */ + movl %edx, 4(%ecx) + + RET +SYM_FUNC_END(startup32_set_idt_entry) + +SYM_FUNC_START(startup32_load_idt) + push %ebp + push %ebx + + call 1f +1: pop %ebp + + leal (boot32_idt - 1b)(%ebp), %ebx + + /* #VC handler */ + leal (startup32_vc_handler - 1b)(%ebp), %eax + movl $X86_TRAP_VC, %edx + movl %ebx, %ecx + call startup32_set_idt_entry + + /* Load IDT */ + leal (boot32_idt_desc - 1b)(%ebp), %ecx + movl %ebx, 2(%ecx) + lidt (%ecx) + + pop %ebx + pop %ebp + RET +SYM_FUNC_END(startup32_load_idt) + +/* + * Check for the correct C-bit position when the startup_32 boot-path is used. + * + * The check makes use of the fact that all memory is encrypted when paging is + * disabled. The function creates 64 bits of random data using the RDRAND + * instruction. RDRAND is mandatory for SEV guests, so always available. If the + * hypervisor violates that the kernel will crash right here. + * + * The 64 bits of random data are stored to a memory location and at the same + * time kept in the %eax and %ebx registers. Since encryption is always active + * when paging is off the random data will be stored encrypted in main memory. + * + * Then paging is enabled. When the C-bit position is correct all memory is + * still mapped encrypted and comparing the register values with memory will + * succeed. An incorrect C-bit position will map all memory unencrypted, so that + * the compare will use the encrypted random data and fail. + */ +SYM_FUNC_START(startup32_check_sev_cbit) + pushl %ebx + pushl %ebp + + call 0f +0: popl %ebp + + /* Check for non-zero sev_status */ + movl (sev_status - 0b)(%ebp), %eax + testl %eax, %eax + jz 4f + + /* + * Get two 32-bit random values - Don't bail out if RDRAND fails + * because it is better to prevent forward progress if no random value + * can be gathered. + */ +1: rdrand %eax + jnc 1b +2: rdrand %ebx + jnc 2b + + /* Store to memory and keep it in the registers */ + leal (sev_check_data - 0b)(%ebp), %ebp + movl %eax, 0(%ebp) + movl %ebx, 4(%ebp) + + /* Enable paging to see if encryption is active */ + movl %cr0, %edx /* Backup %cr0 in %edx */ + movl $(X86_CR0_PG | X86_CR0_PE), %ecx /* Enable Paging and Protected mode */ + movl %ecx, %cr0 + + cmpl %eax, 0(%ebp) + jne 3f + cmpl %ebx, 4(%ebp) + jne 3f + + movl %edx, %cr0 /* Restore previous %cr0 */ + + jmp 4f + +3: /* Check failed - hlt the machine */ + hlt + jmp 3b + +4: + popl %ebp + popl %ebx + RET +SYM_FUNC_END(startup32_check_sev_cbit) + .code64 #include "../../kernel/sev_verify_cbit.S" .data -#ifdef CONFIG_AMD_MEM_ENCRYPT .balign 8 SYM_DATA(sme_me_mask, .quad 0) SYM_DATA(sev_status, .quad 0) SYM_DATA(sev_check_data, .quad 0) -#endif + +SYM_DATA_START_LOCAL(boot32_idt) + .rept 32 + .quad 0 + .endr +SYM_DATA_END(boot32_idt) + +SYM_DATA_START_LOCAL(boot32_idt_desc) + .word . - boot32_idt - 1 + .long 0 +SYM_DATA_END(boot32_idt_desc) diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index cf690d871..8ae7893d7 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -46,7 +46,7 @@ void *memmove(void *dest, const void *src, size_t n); /* * This is set up by the setup-routine at boot-time */ -struct boot_params *boot_params; +struct boot_params *boot_params_ptr; struct port_io_ops pio_ops; @@ -132,8 +132,8 @@ void __putstr(const char *s) if (lines == 0 || cols == 0) return; - x = boot_params->screen_info.orig_x; - y = boot_params->screen_info.orig_y; + x = boot_params_ptr->screen_info.orig_x; + y = boot_params_ptr->screen_info.orig_y; while ((c = *s++) != '\0') { if (c == '\n') { @@ -154,8 +154,8 @@ void __putstr(const char *s) } } - boot_params->screen_info.orig_x = x; - boot_params->screen_info.orig_y = y; + boot_params_ptr->screen_info.orig_x = x; + boot_params_ptr->screen_info.orig_y = y; pos = (x + cols * y) * 2; /* Update cursor position */ outb(14, vidport); @@ -277,7 +277,7 @@ static inline void handle_relocations(void *output, unsigned long output_len, { } #endif -static void parse_elf(void *output) +static size_t parse_elf(void *output) { #ifdef CONFIG_X86_64 Elf64_Ehdr ehdr; @@ -293,10 +293,8 @@ static void parse_elf(void *output) if (ehdr.e_ident[EI_MAG0] != ELFMAG0 || ehdr.e_ident[EI_MAG1] != ELFMAG1 || ehdr.e_ident[EI_MAG2] != ELFMAG2 || - ehdr.e_ident[EI_MAG3] != ELFMAG3) { + ehdr.e_ident[EI_MAG3] != ELFMAG3) error("Kernel is not a valid ELF file"); - return; - } debug_putstr("Parsing ELF... "); @@ -328,6 +326,35 @@ static void parse_elf(void *output) } free(phdrs); + + return ehdr.e_entry - LOAD_PHYSICAL_ADDR; +} + +const unsigned long kernel_total_size = VO__end - VO__text; + +static u8 boot_heap[BOOT_HEAP_SIZE] __aligned(4); + +extern unsigned char input_data[]; +extern unsigned int input_len, output_len; + +unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, + void (*error)(char *x)) +{ + unsigned long entry; + + if (!free_mem_ptr) { + free_mem_ptr = (unsigned long)boot_heap; + free_mem_end_ptr = (unsigned long)boot_heap + sizeof(boot_heap); + } + + if (__decompress(input_data, input_len, NULL, NULL, outbuf, output_len, + NULL, error) < 0) + return ULONG_MAX; + + entry = parse_elf(outbuf); + handle_relocations(outbuf, output_len, virt_addr); + + return entry; } /* @@ -347,25 +374,22 @@ static void parse_elf(void *output) * |-------uncompressed kernel image---------| * */ -asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, - unsigned char *input_data, - unsigned long input_len, - unsigned char *output, - unsigned long output_len) +asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) { - const unsigned long kernel_total_size = VO__end - VO__text; unsigned long virt_addr = LOAD_PHYSICAL_ADDR; + memptr heap = (memptr)boot_heap; unsigned long needed_size; + size_t entry_offset; /* Retain x86 boot parameters pointer passed from startup_32/64. */ - boot_params = rmode; + boot_params_ptr = rmode; /* Clear flags intended for solely in-kernel use. */ - boot_params->hdr.loadflags &= ~KASLR_FLAG; + boot_params_ptr->hdr.loadflags &= ~KASLR_FLAG; - sanitize_boot_params(boot_params); + sanitize_boot_params(boot_params_ptr); - if (boot_params->screen_info.orig_video_mode == 7) { + if (boot_params_ptr->screen_info.orig_video_mode == 7) { vidmem = (char *) 0xb0000; vidport = 0x3b4; } else { @@ -373,8 +397,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, vidport = 0x3d4; } - lines = boot_params->screen_info.orig_video_lines; - cols = boot_params->screen_info.orig_video_cols; + lines = boot_params_ptr->screen_info.orig_video_lines; + cols = boot_params_ptr->screen_info.orig_video_cols; init_default_io_ops(); @@ -393,7 +417,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, * so that early debugging output from the RSDP parsing code can be * collected. */ - boot_params->acpi_rsdp_addr = get_rsdp_addr(); + boot_params_ptr->acpi_rsdp_addr = get_rsdp_addr(); debug_putstr("early console in extract_kernel\n"); @@ -411,7 +435,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, * entries. This ensures the full mapped area is usable RAM * and doesn't include any reserved areas. */ - needed_size = max(output_len, kernel_total_size); + needed_size = max_t(unsigned long, output_len, kernel_total_size); #ifdef CONFIG_X86_64 needed_size = ALIGN(needed_size, MIN_KERNEL_ALIGN); #endif @@ -442,7 +466,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, #ifdef CONFIG_X86_64 if (heap > 0x3fffffffffffUL) error("Destination address too large"); - if (virt_addr + max(output_len, kernel_total_size) > KERNEL_IMAGE_SIZE) + if (virt_addr + needed_size > KERNEL_IMAGE_SIZE) error("Destination virtual address is beyond the kernel mapping area"); #else if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff)) @@ -454,16 +478,17 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, #endif debug_putstr("\nDecompressing Linux... "); - __decompress(input_data, input_len, NULL, NULL, output, output_len, - NULL, error); - parse_elf(output); - handle_relocations(output, output_len, virt_addr); - debug_putstr("done.\nBooting the kernel.\n"); + + entry_offset = decompress_kernel(output, virt_addr, error); + + debug_putstr("done.\nBooting the kernel (entry_offset: 0x"); + debug_puthex(entry_offset); + debug_putstr(").\n"); /* Disable exception handling before booting the kernel */ cleanup_exception_handling(); - return output; + return output + entry_offset; } void fortify_panic(const char *name) diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 20118fb7c..254acd76e 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -52,7 +52,6 @@ extern memptr free_mem_ptr; extern memptr free_mem_end_ptr; void *malloc(int size); void free(void *where); -extern struct boot_params *boot_params; void __putstr(const char *s); void __puthex(unsigned long value); #define error_putstr(__x) __putstr(__x) @@ -170,9 +169,7 @@ static inline int count_immovable_mem_regions(void) { return 0; } #endif /* ident_map_64.c */ -#ifdef CONFIG_X86_5LEVEL extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d; -#endif extern void kernel_add_identity_map(unsigned long start, unsigned long end); /* Used by PAGE_KERN* macros: */ @@ -190,6 +187,7 @@ static inline void cleanup_exception_handling(void) { } /* IDT Entry Points */ void boot_page_fault(void); +void boot_nmi_trap(void); void boot_stage1_vc(void); void boot_stage2_vc(void); diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h index cc9b2529a..6d595abe0 100644 --- a/arch/x86/boot/compressed/pgtable.h +++ b/arch/x86/boot/compressed/pgtable.h @@ -3,18 +3,16 @@ #define TRAMPOLINE_32BIT_SIZE (2 * PAGE_SIZE) -#define TRAMPOLINE_32BIT_PGTABLE_OFFSET 0 - #define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE -#define TRAMPOLINE_32BIT_CODE_SIZE 0x80 - -#define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE +#define TRAMPOLINE_32BIT_CODE_SIZE 0xA0 #ifndef __ASSEMBLER__ extern unsigned long *trampoline_32bit; -extern void trampoline_32bit_src(void *return_ptr); +extern void trampoline_32bit_src(void *trampoline, bool enable_5lvl); + +extern const u16 trampoline_ljmp_imm_offset; #endif /* __ASSEMBLER__ */ #endif /* BOOT_COMPRESSED_PAGETABLE_H */ diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index 2ac12ff41..51f957b24 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -16,11 +16,6 @@ unsigned int __section(".data") pgdir_shift = 39; unsigned int __section(".data") ptrs_per_p4d = 1; #endif -struct paging_config { - unsigned long trampoline_start; - unsigned long l5_required; -}; - /* Buffer to preserve trampoline memory */ static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; @@ -29,11 +24,10 @@ static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; * purposes. * * Avoid putting the pointer into .bss as it will be cleared between - * paging_prepare() and extract_kernel(). + * configure_5level_paging() and extract_kernel(). */ unsigned long *trampoline_32bit __section(".data"); -extern struct boot_params *boot_params; int cmdline_find_option_bool(const char *option); static unsigned long find_trampoline_placement(void) @@ -54,7 +48,7 @@ static unsigned long find_trampoline_placement(void) * * Only look for values in the legacy ROM for non-EFI system. */ - signature = (char *)&boot_params->efi_info.efi_loader_signature; + signature = (char *)&boot_params_ptr->efi_info.efi_loader_signature; if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) && strncmp(signature, EFI64_LOADER_SIGNATURE, 4)) { ebda_start = *(unsigned short *)0x40e << 4; @@ -70,10 +64,10 @@ static unsigned long find_trampoline_placement(void) bios_start = round_down(bios_start, PAGE_SIZE); /* Find the first usable memory region under bios_start. */ - for (i = boot_params->e820_entries - 1; i >= 0; i--) { + for (i = boot_params_ptr->e820_entries - 1; i >= 0; i--) { unsigned long new = bios_start; - entry = &boot_params->e820_table[i]; + entry = &boot_params_ptr->e820_table[i]; /* Skip all entries above bios_start. */ if (bios_start <= entry->addr) @@ -106,12 +100,13 @@ static unsigned long find_trampoline_placement(void) return bios_start - TRAMPOLINE_32BIT_SIZE; } -struct paging_config paging_prepare(void *rmode) +asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable) { - struct paging_config paging_config = {}; + void (*toggle_la57)(void *cr3); + bool l5_required = false; /* Initialize boot_params. Required for cmdline_find_option_bool(). */ - boot_params = rmode; + boot_params_ptr = bp; /* * Check if LA57 is desired and supported. @@ -129,12 +124,22 @@ struct paging_config paging_prepare(void *rmode) !cmdline_find_option_bool("no5lvl") && native_cpuid_eax(0) >= 7 && (native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) { - paging_config.l5_required = 1; + l5_required = true; + + /* Initialize variables for 5-level paging */ + __pgtable_l5_enabled = 1; + pgdir_shift = 48; + ptrs_per_p4d = 512; } - paging_config.trampoline_start = find_trampoline_placement(); + /* + * The trampoline will not be used if the paging mode is already set to + * the desired one. + */ + if (l5_required == !!(native_read_cr4() & X86_CR4_LA57)) + return; - trampoline_32bit = (unsigned long *)paging_config.trampoline_start; + trampoline_32bit = (unsigned long *)find_trampoline_placement(); /* Preserve trampoline memory */ memcpy(trampoline_save, trampoline_32bit, TRAMPOLINE_32BIT_SIZE); @@ -143,32 +148,32 @@ struct paging_config paging_prepare(void *rmode) memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE); /* Copy trampoline code in place */ - memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long), + toggle_la57 = memcpy(trampoline_32bit + + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long), &trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE); /* + * Avoid the need for a stack in the 32-bit trampoline code, by using + * LJMP rather than LRET to return back to long mode. LJMP takes an + * immediate absolute address, which needs to be adjusted based on the + * placement of the trampoline. + */ + *(u32 *)((u8 *)toggle_la57 + trampoline_ljmp_imm_offset) += + (unsigned long)toggle_la57; + + /* * The code below prepares page table in trampoline memory. * * The new page table will be used by trampoline code for switching * from 4- to 5-level paging or vice versa. - * - * If switching is not required, the page table is unused: trampoline - * code wouldn't touch CR3. - */ - - /* - * We are not going to use the page table in trampoline memory if we - * are already in the desired paging mode. */ - if (paging_config.l5_required == !!(native_read_cr4() & X86_CR4_LA57)) - goto out; - if (paging_config.l5_required) { + if (l5_required) { /* * For 4- to 5-level paging transition, set up current CR3 as * the first and the only entry in a new top-level page table. */ - trampoline_32bit[TRAMPOLINE_32BIT_PGTABLE_OFFSET] = __native_read_cr3() | _PAGE_TABLE_NOENC; + *trampoline_32bit = __native_read_cr3() | _PAGE_TABLE_NOENC; } else { unsigned long src; @@ -181,38 +186,17 @@ struct paging_config paging_prepare(void *rmode) * may be above 4G. */ src = *(unsigned long *)__native_read_cr3() & PAGE_MASK; - memcpy(trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long), - (void *)src, PAGE_SIZE); + memcpy(trampoline_32bit, (void *)src, PAGE_SIZE); } -out: - return paging_config; -} - -void cleanup_trampoline(void *pgtable) -{ - void *trampoline_pgtable; - - trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long); + toggle_la57(trampoline_32bit); /* - * Move the top level page table out of trampoline memory, - * if it's there. + * Move the top level page table out of trampoline memory. */ - if ((void *)__native_read_cr3() == trampoline_pgtable) { - memcpy(pgtable, trampoline_pgtable, PAGE_SIZE); - native_write_cr3((unsigned long)pgtable); - } + memcpy(pgtable, trampoline_32bit, PAGE_SIZE); + native_write_cr3((unsigned long)pgtable); /* Restore trampoline memory */ memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE); - - /* Initialize variables for 5-level paging */ -#ifdef CONFIG_X86_5LEVEL - if (__read_cr4() & X86_CR4_LA57) { - __pgtable_l5_enabled = 1; - pgdir_shift = 48; - ptrs_per_p4d = 512; - } -#endif } diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 9c91cc40f..d07e665bb 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -327,20 +327,25 @@ static void enforce_vmpl0(void) */ #define SNP_FEATURES_PRESENT (0) +u64 snp_get_unsupported_features(u64 status) +{ + if (!(status & MSR_AMD64_SEV_SNP_ENABLED)) + return 0; + + return status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT; +} + void snp_check_features(void) { u64 unsupported; - if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) - return; - /* * Terminate the boot if hypervisor has enabled any feature lacking * guest side implementation. Pass on the unsupported features mask through * EXIT_INFO_2 of the GHCB protocol so that those features can be reported * as part of the guest boot failure. */ - unsupported = sev_status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT; + unsupported = snp_get_unsupported_features(sev_status); if (unsupported) { if (ghcb_version < 2 || (!boot_ghcb && !early_setup_ghcb())) sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); @@ -350,35 +355,22 @@ void snp_check_features(void) } } -void sev_enable(struct boot_params *bp) +/* + * sev_check_cpu_support - Check for SEV support in the CPU capabilities + * + * Returns < 0 if SEV is not supported, otherwise the position of the + * encryption bit in the page table descriptors. + */ +static int sev_check_cpu_support(void) { unsigned int eax, ebx, ecx, edx; - struct msr m; - bool snp; - - /* - * bp->cc_blob_address should only be set by boot/compressed kernel. - * Initialize it to 0 to ensure that uninitialized values from - * buggy bootloaders aren't propagated. - */ - if (bp) - bp->cc_blob_address = 0; - - /* - * Do an initial SEV capability check before snp_init() which - * loads the CPUID page and the same checks afterwards are done - * without the hypervisor and are trustworthy. - * - * If the HV fakes SEV support, the guest will crash'n'burn - * which is good enough. - */ /* Check for the SME/SEV support leaf */ eax = 0x80000000; ecx = 0; native_cpuid(&eax, &ebx, &ecx, &edx); if (eax < 0x8000001f) - return; + return -ENODEV; /* * Check for the SME/SEV feature: @@ -393,6 +385,35 @@ void sev_enable(struct boot_params *bp) native_cpuid(&eax, &ebx, &ecx, &edx); /* Check whether SEV is supported */ if (!(eax & BIT(1))) + return -ENODEV; + + return ebx & 0x3f; +} + +void sev_enable(struct boot_params *bp) +{ + struct msr m; + int bitpos; + bool snp; + + /* + * bp->cc_blob_address should only be set by boot/compressed kernel. + * Initialize it to 0 to ensure that uninitialized values from + * buggy bootloaders aren't propagated. + */ + if (bp) + bp->cc_blob_address = 0; + + /* + * Do an initial SEV capability check before snp_init() which + * loads the CPUID page and the same checks afterwards are done + * without the hypervisor and are trustworthy. + * + * If the HV fakes SEV support, the guest will crash'n'burn + * which is good enough. + */ + + if (sev_check_cpu_support() < 0) return; /* @@ -403,26 +424,8 @@ void sev_enable(struct boot_params *bp) /* Now repeat the checks with the SNP CPUID table. */ - /* Recheck the SME/SEV support leaf */ - eax = 0x80000000; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - if (eax < 0x8000001f) - return; - - /* - * Recheck for the SME/SEV feature: - * CPUID Fn8000_001F[EAX] - * - Bit 0 - Secure Memory Encryption support - * - Bit 1 - Secure Encrypted Virtualization support - * CPUID Fn8000_001F[EBX] - * - Bits 5:0 - Pagetable bit position used to indicate encryption - */ - eax = 0x8000001f; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - /* Check whether SEV is supported */ - if (!(eax & BIT(1))) { + bitpos = sev_check_cpu_support(); + if (bitpos < 0) { if (snp) error("SEV-SNP support indicated by CC blob, but not CPUID."); return; @@ -454,7 +457,24 @@ void sev_enable(struct boot_params *bp) if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) error("SEV-SNP supported indicated by CC blob, but not SEV status MSR."); - sme_me_mask = BIT_ULL(ebx & 0x3f); + sme_me_mask = BIT_ULL(bitpos); +} + +/* + * sev_get_status - Retrieve the SEV status mask + * + * Returns 0 if the CPU is not SEV capable, otherwise the value of the + * AMD64_SEV MSR. + */ +u64 sev_get_status(void) +{ + struct msr m; + + if (sev_check_cpu_support() < 0) + return 0; + + boot_rdmsr(MSR_AMD64_SEV, &m); + return m.q; } /* Search for Confidential Computing blob in the EFI config table. */ @@ -545,7 +565,7 @@ void sev_prep_identity_maps(unsigned long top_level_pgt) * accessed after switchover. */ if (sev_snp_enabled()) { - unsigned long cc_info_pa = boot_params->cc_blob_address; + unsigned long cc_info_pa = boot_params_ptr->cc_blob_address; struct cc_blob_sev_info *cc_info; kernel_add_identity_map(cc_info_pa, cc_info_pa + sizeof(*cc_info)); diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index f912d7770..d31982509 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -406,7 +406,7 @@ xloadflags: # define XLF1 0 #endif -#ifdef CONFIG_EFI_STUB +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL # ifdef CONFIG_EFI_MIXED # define XLF23 (XLF_EFI_HANDOVER_32|XLF_EFI_HANDOVER_64) # else diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index a3725ad46..bd247692b 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -290,6 +290,7 @@ static void efi_stub_entry_update(void) { unsigned long addr = efi32_stub_entry; +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL #ifdef CONFIG_X86_64 /* Yes, this is really how we defined it :( */ addr = efi64_stub_entry - 0x200; @@ -299,6 +300,7 @@ static void efi_stub_entry_update(void) if (efi32_stub_entry != addr) die("32-bit and 64-bit EFI entry points do not match\n"); #endif +#endif put_unaligned_le32(addr, &buf[0x264]); } diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index bfb7bcb36..09e99d13f 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -6,6 +6,9 @@ #include <linux/linkage.h> #include <asm/export.h> #include <asm/msr-index.h> +#include <asm/unwind_hints.h> +#include <asm/segment.h> +#include <asm/cache.h> .pushsection .noinstr.text, "ax" @@ -20,3 +23,23 @@ SYM_FUNC_END(entry_ibpb) EXPORT_SYMBOL_GPL(entry_ibpb); .popsection + +/* + * Define the VERW operand that is disguised as entry code so that + * it can be referenced with KPTI enabled. This ensure VERW can be + * used late in exit-to-user path after page tables are switched. + */ +.pushsection .entry.text, "ax" + +.align L1_CACHE_BYTES, 0xcc +SYM_CODE_START_NOALIGN(mds_verw_sel) + UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR + .word __KERNEL_DS +.align L1_CACHE_BYTES, 0xcc +SYM_CODE_END(mds_verw_sel); +/* For KVM */ +EXPORT_SYMBOL_GPL(mds_verw_sel); + +.popsection + diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index e309e7156..ee5def106 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -912,6 +912,7 @@ SYM_FUNC_START(entry_SYSENTER_32) BUG_IF_WRONG_CR3 no_user_check=1 popfl popl %eax + CLEAR_CPU_BUFFERS /* * Return back to the vDSO, which will pop ecx and edx. @@ -981,6 +982,7 @@ restore_all_switch_stack: /* Restore user state */ RESTORE_REGS pop=4 # skip orig_eax/error_code + CLEAR_CPU_BUFFERS .Lirq_return: /* * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization @@ -1173,6 +1175,7 @@ SYM_CODE_START(asm_exc_nmi) /* Not on SYSENTER stack. */ call exc_nmi + CLEAR_CPU_BUFFERS jmp .Lnmi_return .Lnmi_from_sysenter_stack: diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 9953d966d..c2383c288 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -223,6 +223,7 @@ syscall_return_via_sysret: SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SYM_L_GLOBAL) ANNOTATE_NOENDBR swapgs + CLEAR_CPU_BUFFERS sysretq SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR @@ -656,6 +657,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) /* Restore RDI. */ popq %rdi swapgs + CLEAR_CPU_BUFFERS jmp .Lnative_iret @@ -767,6 +769,8 @@ native_irq_return_ldt: */ popq %rax /* Restore user RAX */ + CLEAR_CPU_BUFFERS + /* * RSP now points to an ordinary IRET frame, except that the page * is read-only and RSP[31:16] are preloaded with the userspace @@ -1494,6 +1498,12 @@ nmi_restore: movq $0, 5*8(%rsp) /* clear "NMI executing" */ /* + * Skip CLEAR_CPU_BUFFERS here, since it only helps in rare cases like + * NMI in kernel after user state is restored. For an unprivileged user + * these conditions are hard to meet. + */ + + /* * iretq reads the "iret" frame and exits the NMI stack in a * single instruction. We are returning to kernel mode, so this * cannot result in a fault. Similarly, we don't need to worry @@ -1511,6 +1521,7 @@ SYM_CODE_START(ignore_sysret) UNWIND_HINT_EMPTY ENDBR mov $-ENOSYS, %eax + CLEAR_CPU_BUFFERS sysretl SYM_CODE_END(ignore_sysret) #endif diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index d6c08d898..4bcd009a2 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -272,6 +272,7 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL) xorl %r9d, %r9d xorl %r10d, %r10d swapgs + CLEAR_CPU_BUFFERS sysretl SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 215d37f7d..a38cc0afc 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -79,4 +79,14 @@ # define BOOT_STACK_SIZE 0x1000 #endif +#ifndef __ASSEMBLY__ +extern unsigned int output_len; +extern const unsigned long kernel_total_size; + +unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, + void (*error)(char *x)); + +extern struct boot_params *boot_params_ptr; +#endif + #endif /* _ASM_X86_BOOT_H */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index ce0c8f7d3..f835b328b 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -173,7 +173,7 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); */ static __always_inline bool _static_cpu_has(u16 bit) { - asm_volatile_goto( + asm goto( ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]") ".pushsection .altinstr_aux,\"ax\"\n" "6:\n" diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b12270879..b97a70aa4 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -304,7 +304,7 @@ #define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ #define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ #define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ - +#define X86_FEATURE_CLEAR_CPU_BUF (11*32+18) /* "" Clear CPU buffers using VERW */ #define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ @@ -477,4 +477,5 @@ /* BUG word 2 */ #define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ #define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ +#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 233ae6986..e601264b1 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -88,6 +88,8 @@ static inline void efi_fpu_end(void) } #ifdef CONFIG_X86_32 +#define EFI_X86_KERNEL_ALLOC_LIMIT (SZ_512M - 1) + #define arch_efi_call_virt_setup() \ ({ \ efi_fpu_begin(); \ @@ -101,8 +103,7 @@ static inline void efi_fpu_end(void) }) #else /* !CONFIG_X86_32 */ - -#define EFI_LOADER_SIGNATURE "EL64" +#define EFI_X86_KERNEL_ALLOC_LIMIT EFI_ALLOC_LIMIT extern asmlinkage u64 __efi_call(void *fp, ...); @@ -214,6 +215,8 @@ efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size, #ifdef CONFIG_EFI_MIXED +#define EFI_ALLOC_LIMIT (efi_is_64bit() ? ULONG_MAX : U32_MAX) + #define ARCH_HAS_EFISTUB_WRAPPERS static inline bool efi_is_64bit(void) @@ -325,6 +328,13 @@ static inline u32 efi64_convert_status(efi_status_t status) #define __efi64_argmap_set_memory_space_attributes(phys, size, flags) \ (__efi64_split(phys), __efi64_split(size), __efi64_split(flags)) +/* Memory Attribute Protocol */ +#define __efi64_argmap_set_memory_attributes(protocol, phys, size, flags) \ + ((protocol), __efi64_split(phys), __efi64_split(size), __efi64_split(flags)) + +#define __efi64_argmap_clear_memory_attributes(protocol, phys, size, flags) \ + ((protocol), __efi64_split(phys), __efi64_split(size), __efi64_split(flags)) + /* * The macros below handle the plumbing for the argument mapping. To add a * mapping for a specific EFI method, simply define a macro diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index 11203a9fe..ffe72790c 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -91,7 +91,6 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, static __always_inline void arch_exit_to_user_mode(void) { - mds_user_clear_cpu_buffers(); amd_clear_divider(); } #define arch_exit_to_user_mode arch_exit_to_user_mode diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h index 800ffce0d..6b4d36c95 100644 --- a/arch/x86/include/asm/irq_work.h +++ b/arch/x86/include/asm/irq_work.h @@ -9,7 +9,6 @@ static inline bool arch_irq_work_has_interrupt(void) { return boot_cpu_has(X86_FEATURE_APIC); } -extern void arch_irq_work_raise(void); #else static inline bool arch_irq_work_has_interrupt(void) { diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 071572e23..cbbef3251 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -24,7 +24,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:" + asm goto("1:" "jmp %l[l_yes] # objtool NOPs this \n\t" JUMP_TABLE_ENTRY : : "i" (key), "i" (2 | branch) : : l_yes); @@ -38,7 +38,7 @@ l_yes: static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { - asm_volatile_goto("1:" + asm goto("1:" ".byte " __stringify(BYTES_NOP5) "\n\t" JUMP_TABLE_ENTRY : : "i" (key), "i" (branch) : : l_yes); @@ -52,7 +52,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) { - asm_volatile_goto("1:" + asm goto("1:" "jmp %l[l_yes]\n\t" JUMP_TABLE_ENTRY : : "i" (key), "i" (branch) : : l_yes); diff --git a/arch/x86/include/asm/kmsan.h b/arch/x86/include/asm/kmsan.h index 8fa6ac0e2..d91b37f5b 100644 --- a/arch/x86/include/asm/kmsan.h +++ b/arch/x86/include/asm/kmsan.h @@ -64,6 +64,7 @@ static inline bool kmsan_virt_addr_valid(void *addr) { unsigned long x = (unsigned long)addr; unsigned long y = x - __START_KERNEL_map; + bool ret; /* use the carry flag to determine if x was < __START_KERNEL_map */ if (unlikely(x > y)) { @@ -79,7 +80,21 @@ static inline bool kmsan_virt_addr_valid(void *addr) return false; } - return pfn_valid(x >> PAGE_SHIFT); + /* + * pfn_valid() relies on RCU, and may call into the scheduler on exiting + * the critical section. However, this would result in recursion with + * KMSAN. Therefore, disable preemption here, and re-enable preemption + * below while suppressing reschedules to avoid recursion. + * + * Note, this sacrifices occasionally breaking scheduling guarantees. + * Although, a kernel compiled with KMSAN has already given up on any + * performance guarantees due to being heavily instrumented. + */ + preempt_disable(); + ret = pfn_valid(x >> PAGE_SHIFT); + preempt_enable_no_resched(); + + return ret; } #endif /* !MODULE */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ec955ab2f..005e41dc7 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -168,6 +168,14 @@ * CPU is not vulnerable to Gather * Data Sampling (GDS). */ +#define ARCH_CAP_RFDS_NO BIT(27) /* + * Not susceptible to Register + * File Data Sampling. + */ +#define ARCH_CAP_RFDS_CLEAR BIT(28) /* + * VERW clears CPU Register + * File. + */ #define ARCH_CAP_XAPIC_DISABLE BIT(21) /* * IA32_XAPIC_DISABLE_STATUS MSR diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 2f123d4fb..8f6f17a86 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -194,6 +194,19 @@ #endif .endm +/* + * Macro to execute VERW instruction that mitigate transient data sampling + * attacks such as MDS. On affected systems a microcode update overloaded VERW + * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF. + * + * Note: Only the memory operand variant of VERW clears the CPU buffers. + */ +.macro CLEAR_CPU_BUFFERS + ALTERNATIVE "jmp .Lskip_verw_\@", "", X86_FEATURE_CLEAR_CPU_BUF + verw _ASM_RIP(mds_verw_sel) +.Lskip_verw_\@: +.endm + #else /* __ASSEMBLY__ */ #define ANNOTATE_RETPOLINE_SAFE \ @@ -222,6 +235,8 @@ extern void srso_alias_untrain_ret(void); extern void entry_untrain_ret(void); extern void entry_ibpb(void); +extern void (*x86_return_thunk)(void); + #ifdef CONFIG_RETPOLINE #define GEN(reg) \ @@ -366,13 +381,14 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); -DECLARE_STATIC_KEY_FALSE(mds_user_clear); DECLARE_STATIC_KEY_FALSE(mds_idle_clear); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear); +extern u16 mds_verw_sel; + #include <asm/segment.h> /** @@ -399,17 +415,6 @@ static __always_inline void mds_clear_cpu_buffers(void) } /** - * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability - * - * Clear CPU buffers if the corresponding static key is enabled - */ -static __always_inline void mds_user_clear_cpu_buffers(void) -{ - if (static_branch_likely(&mds_user_clear)) - mds_clear_cpu_buffers(); -} - -/** * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability * * Clear CPU buffers if the corresponding static key is enabled diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index 7fa611216..1919ccf49 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -18,7 +18,7 @@ #define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \ ({ \ bool c = false; \ - asm_volatile_goto (fullop "; j" #cc " %l[cc_label]" \ + asm goto (fullop "; j" #cc " %l[cc_label]" \ : : [var] "m" (_var), ## __VA_ARGS__ \ : clobbers : cc_label); \ if (0) { \ diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 7ca5c9ec8..cf98fc286 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -157,6 +157,7 @@ static __always_inline void sev_es_nmi_complete(void) __sev_es_nmi_complete(); } extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd); +extern void sev_enable(struct boot_params *bp); static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { @@ -202,12 +203,15 @@ void snp_set_wakeup_secondary_cpu(void); bool snp_init(struct boot_params *bp); void __init __noreturn snp_abort(void); int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); +u64 snp_get_unsupported_features(u64 status); +u64 sev_get_status(void); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_exit(void) { } static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; } static inline void sev_es_nmi_complete(void) { } static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; } +static inline void sev_enable(struct boot_params *bp) { } static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; } static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; } static inline void setup_ghcb(void) { } @@ -225,6 +229,9 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in { return -ENOTTY; } + +static inline u64 snp_get_unsupported_features(u64 status) { return 0; } +static inline u64 sev_get_status(void) { return 0; } #endif #endif diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 6ca0c661c..c638535ee 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -155,7 +155,7 @@ extern int __get_user_bad(void); #ifdef CONFIG_X86_32 #define __put_user_goto_u64(x, addr, label) \ - asm_volatile_goto("\n" \ + asm goto("\n" \ "1: movl %%eax,0(%1)\n" \ "2: movl %%edx,4(%1)\n" \ _ASM_EXTABLE_UA(1b, %l2) \ @@ -317,7 +317,7 @@ do { \ } while (0) #define __get_user_asm(x, addr, itype, ltype, label) \ - asm_volatile_goto("\n" \ + asm_goto_output("\n" \ "1: mov"itype" %[umem],%[output]\n" \ _ASM_EXTABLE_UA(1b, %l2) \ : [output] ltype(x) \ @@ -397,7 +397,7 @@ do { \ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ __typeof__(*(_ptr)) __old = *_old; \ __typeof__(*(_ptr)) __new = (_new); \ - asm_volatile_goto("\n" \ + asm_goto_output("\n" \ "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\ _ASM_EXTABLE_UA(1b, %l[label]) \ : CC_OUT(z) (success), \ @@ -416,7 +416,7 @@ do { \ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ __typeof__(*(_ptr)) __old = *_old; \ __typeof__(*(_ptr)) __new = (_new); \ - asm_volatile_goto("\n" \ + asm_goto_output("\n" \ "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \ _ASM_EXTABLE_UA(1b, %l[label]) \ : CC_OUT(z) (success), \ @@ -499,7 +499,7 @@ struct __large_struct { unsigned long buf[100]; }; * aliasing issues. */ #define __put_user_goto(x, addr, itype, ltype, label) \ - asm_volatile_goto("\n" \ + asm goto("\n" \ "1: mov"itype" %0,%1\n" \ _ASM_EXTABLE_UA(1b, %l2) \ : : ltype(x), "m" (__m(addr)) \ diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h index 6c2e3ff3c..724ce4480 100644 --- a/arch/x86/include/asm/virtext.h +++ b/arch/x86/include/asm/virtext.h @@ -43,9 +43,9 @@ static inline int cpu_has_vmx(void) */ static inline int cpu_vmxoff(void) { - asm_volatile_goto("1: vmxoff\n\t" - _ASM_EXTABLE(1b, %l[fault]) - ::: "cc", "memory" : fault); + asm goto("1: vmxoff\n\t" + _ASM_EXTABLE(1b, %l[fault]) + ::: "cc", "memory" : fault); cr4_clear_bits(X86_CR4_VMXE); return 0; @@ -129,9 +129,9 @@ static inline void cpu_svm_disable(void) * case, GIF must already be set, otherwise the NMI would have * been blocked, so just eat the fault. */ - asm_volatile_goto("1: stgi\n\t" - _ASM_EXTABLE(1b, %l[fault]) - ::: "memory" : fault); + asm goto("1: stgi\n\t" + _ASM_EXTABLE(1b, %l[fault]) + ::: "memory" : fault); fault: wrmsrl(MSR_EFER, efer & ~EFER_SVME); } diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 6b8c93989..69f85e274 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -536,6 +536,7 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) } #ifdef CONFIG_RETHUNK + /* * Rewrite the compiler generated return thunk tail-calls. * @@ -551,14 +552,18 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes) { int i = 0; - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) - return -1; + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { + if (x86_return_thunk == __x86_return_thunk) + return -1; - bytes[i++] = RET_INSN_OPCODE; + i = JMP32_INSN_SIZE; + __text_gen_insn(bytes, JMP32_INSN_OPCODE, addr, x86_return_thunk, i); + } else { + bytes[i++] = RET_INSN_OPCODE; + } for (; i < insn->length;) bytes[i++] = INT3_INSN_OPCODE; - return i; } diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 13dffc43d..c68789fdc 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -110,9 +110,6 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); /* Control unconditional IBPB in switch_mm() */ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); -/* Control MDS CPU buffer clear before returning to user space */ -DEFINE_STATIC_KEY_FALSE(mds_user_clear); -EXPORT_SYMBOL_GPL(mds_user_clear); /* Control MDS CPU buffer clear before idling (halt, mwait) */ DEFINE_STATIC_KEY_FALSE(mds_idle_clear); EXPORT_SYMBOL_GPL(mds_idle_clear); @@ -251,7 +248,7 @@ static void __init mds_select_mitigation(void) if (!boot_cpu_has(X86_FEATURE_MD_CLEAR)) mds_mitigation = MDS_MITIGATION_VMWERV; - static_branch_enable(&mds_user_clear); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) && (mds_nosmt || cpu_mitigations_auto_nosmt())) @@ -355,7 +352,7 @@ static void __init taa_select_mitigation(void) * For guests that can't determine whether the correct microcode is * present on host, enable the mitigation for UCODE_NEEDED as well. */ - static_branch_enable(&mds_user_clear); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); if (taa_nosmt || cpu_mitigations_auto_nosmt()) cpu_smt_disable(false); @@ -423,7 +420,14 @@ static void __init mmio_select_mitigation(void) */ if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM))) - static_branch_enable(&mds_user_clear); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + + /* + * X86_FEATURE_CLEAR_CPU_BUF could be enabled by other VERW based + * mitigations, disable KVM-only mitigation in that case. + */ + if (boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF)) + static_branch_disable(&mmio_stale_data_clear); else static_branch_enable(&mmio_stale_data_clear); @@ -476,6 +480,57 @@ static int __init mmio_stale_data_parse_cmdline(char *str) early_param("mmio_stale_data", mmio_stale_data_parse_cmdline); #undef pr_fmt +#define pr_fmt(fmt) "Register File Data Sampling: " fmt + +enum rfds_mitigations { + RFDS_MITIGATION_OFF, + RFDS_MITIGATION_VERW, + RFDS_MITIGATION_UCODE_NEEDED, +}; + +/* Default mitigation for Register File Data Sampling */ +static enum rfds_mitigations rfds_mitigation __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_RFDS) ? RFDS_MITIGATION_VERW : RFDS_MITIGATION_OFF; + +static const char * const rfds_strings[] = { + [RFDS_MITIGATION_OFF] = "Vulnerable", + [RFDS_MITIGATION_VERW] = "Mitigation: Clear Register File", + [RFDS_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode", +}; + +static void __init rfds_select_mitigation(void) +{ + if (!boot_cpu_has_bug(X86_BUG_RFDS) || cpu_mitigations_off()) { + rfds_mitigation = RFDS_MITIGATION_OFF; + return; + } + if (rfds_mitigation == RFDS_MITIGATION_OFF) + return; + + if (x86_read_arch_cap_msr() & ARCH_CAP_RFDS_CLEAR) + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + else + rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED; +} + +static __init int rfds_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!boot_cpu_has_bug(X86_BUG_RFDS)) + return 0; + + if (!strcmp(str, "off")) + rfds_mitigation = RFDS_MITIGATION_OFF; + else if (!strcmp(str, "on")) + rfds_mitigation = RFDS_MITIGATION_VERW; + + return 0; +} +early_param("reg_file_data_sampling", rfds_parse_cmdline); + +#undef pr_fmt #define pr_fmt(fmt) "" fmt static void __init md_clear_update_mitigation(void) @@ -483,12 +538,12 @@ static void __init md_clear_update_mitigation(void) if (cpu_mitigations_off()) return; - if (!static_key_enabled(&mds_user_clear)) + if (!boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF)) goto out; /* - * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data - * mitigation, if necessary. + * X86_FEATURE_CLEAR_CPU_BUF is now enabled. Update MDS, TAA and MMIO + * Stale Data mitigation, if necessary. */ if (mds_mitigation == MDS_MITIGATION_OFF && boot_cpu_has_bug(X86_BUG_MDS)) { @@ -500,11 +555,19 @@ static void __init md_clear_update_mitigation(void) taa_mitigation = TAA_MITIGATION_VERW; taa_select_mitigation(); } - if (mmio_mitigation == MMIO_MITIGATION_OFF && - boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) { + /* + * MMIO_MITIGATION_OFF is not checked here so that mmio_stale_data_clear + * gets updated correctly as per X86_FEATURE_CLEAR_CPU_BUF state. + */ + if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) { mmio_mitigation = MMIO_MITIGATION_VERW; mmio_select_mitigation(); } + if (rfds_mitigation == RFDS_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_RFDS)) { + rfds_mitigation = RFDS_MITIGATION_VERW; + rfds_select_mitigation(); + } out: if (boot_cpu_has_bug(X86_BUG_MDS)) pr_info("MDS: %s\n", mds_strings[mds_mitigation]); @@ -514,6 +577,8 @@ out: pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]); else if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) pr_info("MMIO Stale Data: Unknown: No mitigations\n"); + if (boot_cpu_has_bug(X86_BUG_RFDS)) + pr_info("Register File Data Sampling: %s\n", rfds_strings[rfds_mitigation]); } static void __init md_clear_select_mitigation(void) @@ -521,11 +586,12 @@ static void __init md_clear_select_mitigation(void) mds_select_mitigation(); taa_select_mitigation(); mmio_select_mitigation(); + rfds_select_mitigation(); /* - * As MDS, TAA and MMIO Stale Data mitigations are inter-related, update - * and print their mitigation after MDS, TAA and MMIO Stale Data - * mitigation selection is done. + * As these mitigations are inter-related and rely on VERW instruction + * to clear the microarchitural buffers, update and print their status + * after mitigation selection is done for each of these vulnerabilities. */ md_clear_update_mitigation(); } @@ -2589,6 +2655,11 @@ static ssize_t mmio_stale_data_show_state(char *buf) sched_smt_active() ? "vulnerable" : "disabled"); } +static ssize_t rfds_show_state(char *buf) +{ + return sysfs_emit(buf, "%s\n", rfds_strings[rfds_mitigation]); +} + static char *stibp_state(void) { if (spectre_v2_in_eibrs_mode(spectre_v2_enabled)) @@ -2750,6 +2821,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_SRSO: return srso_show_state(buf); + case X86_BUG_RFDS: + return rfds_show_state(buf); + default: break; } @@ -2824,4 +2898,9 @@ ssize_t cpu_show_spec_rstack_overflow(struct device *dev, struct device_attribut { return cpu_show_common(dev, attr, buf, X86_BUG_SRSO); } + +ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_RFDS); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 454cdf341..758938c94 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1248,6 +1248,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define SRSO BIT(5) /* CPU is affected by GDS */ #define GDS BIT(6) +/* CPU is affected by Register File Data Sampling */ +#define RFDS BIT(7) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), @@ -1275,9 +1277,18 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS), VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS), - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ALDERLAKE, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ALDERLAKE_L, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(RAPTORLAKE, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(RAPTORLAKE_P, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(RAPTORLAKE_S, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ALDERLAKE_N, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO | RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT_D, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT_PLUS, X86_STEPPING_ANY, RFDS), VULNBL_AMD(0x15, RETBLEED), VULNBL_AMD(0x16, RETBLEED), @@ -1311,6 +1322,24 @@ static bool arch_cap_mmio_immune(u64 ia32_cap) ia32_cap & ARCH_CAP_SBDR_SSDP_NO); } +static bool __init vulnerable_to_rfds(u64 ia32_cap) +{ + /* The "immunity" bit trumps everything else: */ + if (ia32_cap & ARCH_CAP_RFDS_NO) + return false; + + /* + * VMMs set ARCH_CAP_RFDS_CLEAR for processors not in the blacklist to + * indicate that mitigation is needed because guest is running on a + * vulnerable hardware or may migrate to such hardware: + */ + if (ia32_cap & ARCH_CAP_RFDS_CLEAR) + return true; + + /* Only consult the blacklist when there is no enumeration: */ + return cpu_matches(cpu_vuln_blacklist, RFDS); +} + static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = x86_read_arch_cap_msr(); @@ -1419,6 +1448,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SRSO); } + if (vulnerable_to_rfds(ia32_cap)) + setup_force_cpu_bug(X86_BUG_RFDS); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 427899650..32bd64017 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -216,6 +216,90 @@ int intel_cpu_collect_info(struct ucode_cpu_info *uci) } EXPORT_SYMBOL_GPL(intel_cpu_collect_info); +#define MSR_IA32_TME_ACTIVATE 0x982 + +/* Helpers to access TME_ACTIVATE MSR */ +#define TME_ACTIVATE_LOCKED(x) (x & 0x1) +#define TME_ACTIVATE_ENABLED(x) (x & 0x2) + +#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */ +#define TME_ACTIVATE_POLICY_AES_XTS_128 0 + +#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */ + +#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */ +#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1 + +/* Values for mktme_status (SW only construct) */ +#define MKTME_ENABLED 0 +#define MKTME_DISABLED 1 +#define MKTME_UNINITIALIZED 2 +static int mktme_status = MKTME_UNINITIALIZED; + +static void detect_tme_early(struct cpuinfo_x86 *c) +{ + u64 tme_activate, tme_policy, tme_crypto_algs; + int keyid_bits = 0, nr_keyids = 0; + static u64 tme_activate_cpu0 = 0; + + rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate); + + if (mktme_status != MKTME_UNINITIALIZED) { + if (tme_activate != tme_activate_cpu0) { + /* Broken BIOS? */ + pr_err_once("x86/tme: configuration is inconsistent between CPUs\n"); + pr_err_once("x86/tme: MKTME is not usable\n"); + mktme_status = MKTME_DISABLED; + + /* Proceed. We may need to exclude bits from x86_phys_bits. */ + } + } else { + tme_activate_cpu0 = tme_activate; + } + + if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) { + pr_info_once("x86/tme: not enabled by BIOS\n"); + mktme_status = MKTME_DISABLED; + return; + } + + if (mktme_status != MKTME_UNINITIALIZED) + goto detect_keyid_bits; + + pr_info("x86/tme: enabled by BIOS\n"); + + tme_policy = TME_ACTIVATE_POLICY(tme_activate); + if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128) + pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy); + + tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate); + if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) { + pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n", + tme_crypto_algs); + mktme_status = MKTME_DISABLED; + } +detect_keyid_bits: + keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate); + nr_keyids = (1UL << keyid_bits) - 1; + if (nr_keyids) { + pr_info_once("x86/mktme: enabled by BIOS\n"); + pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids); + } else { + pr_info_once("x86/mktme: disabled by BIOS\n"); + } + + if (mktme_status == MKTME_UNINITIALIZED) { + /* MKTME is usable */ + mktme_status = MKTME_ENABLED; + } + + /* + * KeyID bits effectively lower the number of physical address + * bits. Update cpuinfo_x86::x86_phys_bits accordingly. + */ + c->x86_phys_bits -= keyid_bits; +} + static void early_init_intel(struct cpuinfo_x86 *c) { u64 misc_enable; @@ -367,6 +451,13 @@ static void early_init_intel(struct cpuinfo_x86 *c) */ if (detect_extended_topology_early(c) < 0) detect_ht_early(c); + + /* + * Adjust the number of physical bits early because it affects the + * valid bits of the MTRR mask registers. + */ + if (cpu_has(c, X86_FEATURE_TME)) + detect_tme_early(c); } static void bsp_init_intel(struct cpuinfo_x86 *c) @@ -527,90 +618,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c) #endif } -#define MSR_IA32_TME_ACTIVATE 0x982 - -/* Helpers to access TME_ACTIVATE MSR */ -#define TME_ACTIVATE_LOCKED(x) (x & 0x1) -#define TME_ACTIVATE_ENABLED(x) (x & 0x2) - -#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */ -#define TME_ACTIVATE_POLICY_AES_XTS_128 0 - -#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */ - -#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */ -#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1 - -/* Values for mktme_status (SW only construct) */ -#define MKTME_ENABLED 0 -#define MKTME_DISABLED 1 -#define MKTME_UNINITIALIZED 2 -static int mktme_status = MKTME_UNINITIALIZED; - -static void detect_tme(struct cpuinfo_x86 *c) -{ - u64 tme_activate, tme_policy, tme_crypto_algs; - int keyid_bits = 0, nr_keyids = 0; - static u64 tme_activate_cpu0 = 0; - - rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate); - - if (mktme_status != MKTME_UNINITIALIZED) { - if (tme_activate != tme_activate_cpu0) { - /* Broken BIOS? */ - pr_err_once("x86/tme: configuration is inconsistent between CPUs\n"); - pr_err_once("x86/tme: MKTME is not usable\n"); - mktme_status = MKTME_DISABLED; - - /* Proceed. We may need to exclude bits from x86_phys_bits. */ - } - } else { - tme_activate_cpu0 = tme_activate; - } - - if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) { - pr_info_once("x86/tme: not enabled by BIOS\n"); - mktme_status = MKTME_DISABLED; - return; - } - - if (mktme_status != MKTME_UNINITIALIZED) - goto detect_keyid_bits; - - pr_info("x86/tme: enabled by BIOS\n"); - - tme_policy = TME_ACTIVATE_POLICY(tme_activate); - if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128) - pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy); - - tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate); - if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) { - pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n", - tme_crypto_algs); - mktme_status = MKTME_DISABLED; - } -detect_keyid_bits: - keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate); - nr_keyids = (1UL << keyid_bits) - 1; - if (nr_keyids) { - pr_info_once("x86/mktme: enabled by BIOS\n"); - pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids); - } else { - pr_info_once("x86/mktme: disabled by BIOS\n"); - } - - if (mktme_status == MKTME_UNINITIALIZED) { - /* MKTME is usable */ - mktme_status = MKTME_ENABLED; - } - - /* - * KeyID bits effectively lower the number of physical address - * bits. Update cpuinfo_x86::x86_phys_bits accordingly. - */ - c->x86_phys_bits -= keyid_bits; -} - static void init_cpuid_fault(struct cpuinfo_x86 *c) { u64 msr; @@ -747,9 +754,6 @@ static void init_intel(struct cpuinfo_x86 *c) init_ia32_feat_ctl(c); - if (cpu_has(c, X86_FEATURE_TME)) - detect_tme(c); - init_intel_misc_features(c); split_lock_init(); diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index f1a748da5..cad6ea191 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -44,6 +44,7 @@ #include <linux/sync_core.h> #include <linux/task_work.h> #include <linux/hardirq.h> +#include <linux/kexec.h> #include <asm/intel-family.h> #include <asm/processor.h> @@ -239,6 +240,7 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp) struct llist_node *pending; struct mce_evt_llist *l; int apei_err = 0; + struct page *p; /* * Allow instrumentation around external facilities usage. Not that it @@ -292,6 +294,20 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp) if (!fake_panic) { if (panic_timeout == 0) panic_timeout = mca_cfg.panic_timeout; + + /* + * Kdump skips the poisoned page in order to avoid + * touching the error bits again. Poison the page even + * if the error is fatal and the machine is about to + * panic. + */ + if (kexec_crash_loaded()) { + if (final && (final->status & MCI_STATUS_ADDRV)) { + p = pfn_to_online_page(final->addr >> PAGE_SHIFT); + if (p) + SetPageHWPoison(p); + } + } panic(msg); } else pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg); diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 9dac24680..993734e96 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1017,10 +1017,12 @@ void __init e820__reserve_setup_data(void) e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); /* - * SETUP_EFI and SETUP_IMA are supplied by kexec and do not need - * to be reserved. + * SETUP_EFI, SETUP_IMA and SETUP_RNG_SEED are supplied by + * kexec and do not need to be reserved. */ - if (data->type != SETUP_EFI && data->type != SETUP_IMA) + if (data->type != SETUP_EFI && + data->type != SETUP_IMA && + data->type != SETUP_RNG_SEED) e820__range_update_kexec(pa_data, sizeof(*data) + data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 558076dbd..247f2225a 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -274,12 +274,13 @@ static int __restore_fpregs_from_user(void __user *buf, u64 ufeatures, * Attempt to restore the FPU registers directly from user memory. * Pagefaults are handled and any errors returned are fatal. */ -static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, - bool fx_only, unsigned int size) +static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only) { struct fpu *fpu = ¤t->thread.fpu; int ret; + /* Restore enabled features only. */ + xrestore &= fpu->fpstate->user_xfeatures; retry: fpregs_lock(); /* Ensure that XFD is up to date */ @@ -309,7 +310,7 @@ retry: if (ret != X86_TRAP_PF) return false; - if (!fault_in_readable(buf, size)) + if (!fault_in_readable(buf, fpu->fpstate->user_size)) goto retry; return false; } @@ -339,7 +340,6 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, struct user_i387_ia32_struct env; bool success, fx_only = false; union fpregs_state *fpregs; - unsigned int state_size; u64 user_xfeatures = 0; if (use_xsave()) { @@ -349,17 +349,14 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, return false; fx_only = !fx_sw_user.magic1; - state_size = fx_sw_user.xstate_size; user_xfeatures = fx_sw_user.xfeatures; } else { user_xfeatures = XFEATURE_MASK_FPSSE; - state_size = fpu->fpstate->user_size; } if (likely(!ia32_fxstate)) { /* Restore the FPU registers directly from user memory. */ - return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only, - state_size); + return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only); } /* diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index e07234ec7..ec51ce713 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -361,7 +361,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) ip = trampoline + size; if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) - __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, &__x86_return_thunk, JMP32_INSN_SIZE); + __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, x86_return_thunk, JMP32_INSN_SIZE); else memcpy(ip, retq, sizeof(retq)); diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index cec0bfa3b..ed6cce6c3 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -522,9 +522,6 @@ nmi_restart: write_cr2(this_cpu_read(nmi_cr2)); if (this_cpu_dec_return(nmi_state)) goto nmi_restart; - - if (user_mode(regs)) - mds_user_clear_cpu_buffers(); } #if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL) diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 3fbb49168..b32134b09 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -80,7 +80,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, case RET: if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) - code = text_gen_insn(JMP32_INSN_OPCODE, insn, &__x86_return_thunk); + code = text_gen_insn(JMP32_INSN_OPCODE, insn, x86_return_thunk); else code = &retinsn; break; diff --git a/arch/x86/kvm/svm/svm_ops.h b/arch/x86/kvm/svm/svm_ops.h index 36c8af87a..4e725854c 100644 --- a/arch/x86/kvm/svm/svm_ops.h +++ b/arch/x86/kvm/svm/svm_ops.h @@ -8,7 +8,7 @@ #define svm_asm(insn, clobber...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) "\n\t" \ + asm goto("1: " __stringify(insn) "\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ ::: clobber : fault); \ return; \ @@ -18,7 +18,7 @@ fault: \ #define svm_asm1(insn, op1, clobber...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \ + asm goto("1: " __stringify(insn) " %0\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ :: op1 : clobber : fault); \ return; \ @@ -28,7 +28,7 @@ fault: \ #define svm_asm2(insn, op1, op2, clobber...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \ + asm goto("1: " __stringify(insn) " %1, %0\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ :: op1, op2 : clobber : fault); \ return; \ diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 9a75a0d5d..220cdbe1e 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -38,7 +38,7 @@ static int fixed_pmc_events[] = {1, 0, 7}; static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data) { struct kvm_pmc *pmc; - u8 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl; + u64 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl; int i; pmu->fixed_ctr_ctrl = data; diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h index edc3f16cc..6a9bfdfbb 100644 --- a/arch/x86/kvm/vmx/run_flags.h +++ b/arch/x86/kvm/vmx/run_flags.h @@ -2,7 +2,10 @@ #ifndef __KVM_X86_VMX_RUN_FLAGS_H #define __KVM_X86_VMX_RUN_FLAGS_H -#define VMX_RUN_VMRESUME (1 << 0) -#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1) +#define VMX_RUN_VMRESUME_SHIFT 0 +#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1 + +#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT) +#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT) #endif /* __KVM_X86_VMX_RUN_FLAGS_H */ diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 0b5db4de4..0b2cad66d 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -106,7 +106,7 @@ SYM_FUNC_START(__vmx_vcpu_run) mov (%_ASM_SP), %_ASM_AX /* Check if vmlaunch or vmresume is needed */ - testb $VMX_RUN_VMRESUME, %bl + bt $VMX_RUN_VMRESUME_SHIFT, %bx /* Load guest registers. Don't clobber flags. */ mov VCPU_RCX(%_ASM_AX), %_ASM_CX @@ -128,8 +128,11 @@ SYM_FUNC_START(__vmx_vcpu_run) /* Load guest RAX. This kills the @regs pointer! */ mov VCPU_RAX(%_ASM_AX), %_ASM_AX - /* Check EFLAGS.ZF from 'testb' above */ - jz .Lvmlaunch + /* Clobbers EFLAGS.ZF */ + CLEAR_CPU_BUFFERS + + /* Check EFLAGS.CF from the VMX_RUN_VMRESUME bit test above. */ + jnc .Lvmlaunch /* * After a successful VMRESUME/VMLAUNCH, control flow "magically" diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 98d732b94..5c1590855 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -407,7 +407,8 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx) static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) { - vmx->disable_fb_clear = vmx_fb_clear_ctrl_available; + vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) && + vmx_fb_clear_ctrl_available; /* * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS @@ -2469,10 +2470,10 @@ static int kvm_cpu_vmxon(u64 vmxon_pointer) cr4_set_bits(X86_CR4_VMXE); - asm_volatile_goto("1: vmxon %[vmxon_pointer]\n\t" - _ASM_EXTABLE(1b, %l[fault]) - : : [vmxon_pointer] "m"(vmxon_pointer) - : : fault); + asm goto("1: vmxon %[vmxon_pointer]\n\t" + _ASM_EXTABLE(1b, %l[fault]) + : : [vmxon_pointer] "m"(vmxon_pointer) + : : fault); return 0; fault: @@ -7120,11 +7121,14 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, { guest_state_enter_irqoff(); - /* L1D Flush includes CPU buffer clear to mitigate MDS */ + /* + * L1D Flush includes CPU buffer clear to mitigate MDS, but VERW + * mitigation for MDS is done late in VMentry and is still + * executed in spite of L1D Flush. This is because an extra VERW + * should not matter much after the big hammer L1D Flush. + */ if (static_branch_unlikely(&vmx_l1d_should_flush)) vmx_l1d_flush(vcpu); - else if (static_branch_unlikely(&mds_user_clear)) - mds_clear_cpu_buffers(); else if (static_branch_unlikely(&mmio_stale_data_clear) && kvm_arch_has_assigned_device(vcpu->kvm)) mds_clear_cpu_buffers(); diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h index ec268df83..5edab28df 100644 --- a/arch/x86/kvm/vmx/vmx_ops.h +++ b/arch/x86/kvm/vmx/vmx_ops.h @@ -73,7 +73,7 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field) #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT - asm_volatile_goto("1: vmread %[field], %[output]\n\t" + asm_goto_output("1: vmread %[field], %[output]\n\t" "jna %l[do_fail]\n\t" _ASM_EXTABLE(1b, %l[do_exception]) @@ -166,7 +166,7 @@ static __always_inline unsigned long vmcs_readl(unsigned long field) #define vmx_asm1(insn, op1, error_args...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \ + asm goto("1: " __stringify(insn) " %0\n\t" \ ".byte 0x2e\n\t" /* branch not taken hint */ \ "jna %l[error]\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ @@ -183,7 +183,7 @@ fault: \ #define vmx_asm2(insn, op1, op2, error_args...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \ + asm goto("1: " __stringify(insn) " %1, %0\n\t" \ ".byte 0x2e\n\t" /* branch not taken hint */ \ "jna %l[error]\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7144e5166..688bc7b72 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1613,7 +1613,8 @@ static unsigned int num_msr_based_features; ARCH_CAP_SKIP_VMENTRY_L1DFLUSH | ARCH_CAP_SSB_NO | ARCH_CAP_MDS_NO | \ ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \ ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \ - ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO) + ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \ + ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR) static u64 kvm_get_arch_capabilities(void) { @@ -1650,6 +1651,8 @@ static u64 kvm_get_arch_capabilities(void) data |= ARCH_CAP_SSB_NO; if (!boot_cpu_has_bug(X86_BUG_MDS)) data |= ARCH_CAP_MDS_NO; + if (!boot_cpu_has_bug(X86_BUG_RFDS)) + data |= ARCH_CAP_RFDS_NO; if (!boot_cpu_has(X86_FEATURE_RTM)) { /* diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index 968d7005f..f50cc210a 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c @@ -26,18 +26,31 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, for (; addr < end; addr = next) { pud_t *pud = pud_page + pud_index(addr); pmd_t *pmd; + bool use_gbpage; next = (addr & PUD_MASK) + PUD_SIZE; if (next > end) next = end; - if (info->direct_gbpages) { - pud_t pudval; + /* if this is already a gbpage, this portion is already mapped */ + if (pud_large(*pud)) + continue; + + /* Is using a gbpage allowed? */ + use_gbpage = info->direct_gbpages; - if (pud_present(*pud)) - continue; + /* Don't use gbpage if it maps more than the requested region. */ + /* at the begining: */ + use_gbpage &= ((addr & ~PUD_MASK) == 0); + /* ... or at the end: */ + use_gbpage &= ((next & ~PUD_MASK) == 0); + + /* Never overwrite existing mappings */ + use_gbpage &= !pud_present(*pud); + + if (use_gbpage) { + pud_t pudval; - addr &= PUD_MASK; pudval = __pud((addr - info->offset) | info->page_flag); set_pud(pud, pudval); continue; diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index aa39d678f..dae5c9527 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -961,7 +961,7 @@ static int __init cmp_memblk(const void *a, const void *b) const struct numa_memblk *ma = *(const struct numa_memblk **)a; const struct numa_memblk *mb = *(const struct numa_memblk **)b; - return ma->start - mb->start; + return (ma->start > mb->start) - (ma->start < mb->start); } static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata; @@ -971,14 +971,12 @@ static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata; * @start: address to begin fill * @end: address to end fill * - * Find and extend numa_meminfo memblks to cover the @start-@end - * physical address range, such that the first memblk includes - * @start, the last memblk includes @end, and any gaps in between - * are filled. + * Find and extend numa_meminfo memblks to cover the physical + * address range @start-@end * * RETURNS: * 0 : Success - * NUMA_NO_MEMBLK : No memblk exists in @start-@end range + * NUMA_NO_MEMBLK : No memblks exist in address range @start-@end */ int __init numa_fill_memblks(u64 start, u64 end) @@ -990,17 +988,14 @@ int __init numa_fill_memblks(u64 start, u64 end) /* * Create a list of pointers to numa_meminfo memblks that - * overlap start, end. Exclude (start == bi->end) since - * end addresses in both a CFMWS range and a memblk range - * are exclusive. - * - * This list of pointers is used to make in-place changes - * that fill out the numa_meminfo memblks. + * overlap start, end. The list is used to make in-place + * changes that fill out the numa_meminfo memblks. */ for (int i = 0; i < mi->nr_blks; i++) { struct numa_memblk *bi = &mi->blk[i]; - if (start < bi->end && end >= bi->start) { + if (memblock_addrs_overlap(start, end - start, bi->start, + bi->end - bi->start)) { blk[count] = &mi->blk[i]; count++; } diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index b69aee624..7913440c0 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -432,7 +432,7 @@ static void emit_return(u8 **pprog, u8 *ip) u8 *prog = *pprog; if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { - emit_jump(&prog, &__x86_return_thunk, ip); + emit_jump(&prog, x86_return_thunk, ip); } else { EMIT1(0xC3); /* ret */ if (IS_ENABLED(CONFIG_SLS)) diff --git a/arch/xtensa/include/asm/jump_label.h b/arch/xtensa/include/asm/jump_label.h index c812bf850..46c859625 100644 --- a/arch/xtensa/include/asm/jump_label.h +++ b/arch/xtensa/include/asm/jump_label.h @@ -13,7 +13,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "_nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".word 1b, %l[l_yes], %c0\n\t" @@ -38,7 +38,7 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, * make it reachable and wrap both into a no-transform block * to avoid any assembler interference with this. */ - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" ".begin no-transform\n\t" "_j %l[l_yes]\n\t" "2:\n\t" diff --git a/block/bio.c b/block/bio.c index 6c22dd7b6..74c2818c7 100644 --- a/block/bio.c +++ b/block/bio.c @@ -927,7 +927,7 @@ static bool bio_try_merge_hw_seg(struct request_queue *q, struct bio *bio, if ((addr1 | mask) != (addr2 | mask)) return false; - if (bv->bv_len + len > queue_max_segment_size(q)) + if (len > queue_max_segment_size(q) - bv->bv_len) return false; return __bio_try_merge_page(bio, page, len, offset, same_page); } diff --git a/block/blk-core.c b/block/blk-core.c index 6eaf2b0ad..aefdf07bd 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -864,7 +864,16 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags) */ blk_flush_plug(current->plug, false); - if (bio_queue_enter(bio)) + /* + * We need to be able to enter a frozen queue, similar to how + * timeouts also need to do that. If that is blocked, then we can + * have pending IO when a queue freeze is started, and then the + * wait for the freeze to finish will wait for polled requests to + * timeout as the poller is preventer from entering the queue and + * completing them. As long as we prevent new IO from being queued, + * that should be all that matters. + */ + if (!percpu_ref_tryget(&q->q_usage_counter)) return 0; if (queue_is_mq(q)) { ret = blk_mq_poll(q, cookie, iob, flags); diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 7dd6a33e1..e6557024e 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -1337,6 +1337,13 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now) lockdep_assert_held(&iocg->waitq.lock); + /* + * If the delay is set by another CPU, we may be in the past. No need to + * change anything if so. This avoids decay calculation underflow. + */ + if (time_before64(now->now, iocg->delay_at)) + return false; + /* calculate the current delay in effect - 1/2 every second */ tdelta = now->now - iocg->delay_at; if (iocg->delay) diff --git a/block/blk-map.c b/block/blk-map.c index 66da9e2b1..b337ae347 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -203,12 +203,19 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, /* * success */ - if ((iov_iter_rw(iter) == WRITE && - (!map_data || !map_data->null_mapped)) || - (map_data && map_data->from_user)) { + if (iov_iter_rw(iter) == WRITE && + (!map_data || !map_data->null_mapped)) { ret = bio_copy_from_iter(bio, iter); if (ret) goto cleanup; + } else if (map_data && map_data->from_user) { + struct iov_iter iter2 = *iter; + + /* This is the copy-in part of SG_DXFER_TO_FROM_DEV. */ + iter2.data_source = ITER_SOURCE; + ret = bio_copy_from_iter(bio, &iter2); + if (ret) + goto cleanup; } else { if (bmd->is_our_pages) zero_fill_bio(bio); diff --git a/block/blk-mq.c b/block/blk-mq.c index b3f99dda4..7ed6b9469 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -747,11 +747,16 @@ static void req_bio_endio(struct request *rq, struct bio *bio, /* * Partial zone append completions cannot be supported as the * BIO fragments may end up not being written sequentially. + * For such case, force the completed nbytes to be equal to + * the BIO size so that bio_advance() sets the BIO remaining + * size to 0 and we end up calling bio_endio() before returning. */ - if (bio->bi_iter.bi_size != nbytes) + if (bio->bi_iter.bi_size != nbytes) { bio->bi_status = BLK_STS_IOERR; - else + nbytes = bio->bi_iter.bi_size; + } else { bio->bi_iter.bi_sector = rq->__sector; + } } bio_advance(bio, nbytes); @@ -1860,6 +1865,22 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, __add_wait_queue(wq, wait); /* + * Add one explicit barrier since blk_mq_get_driver_tag() may + * not imply barrier in case of failure. + * + * Order adding us to wait queue and allocating driver tag. + * + * The pair is the one implied in sbitmap_queue_wake_up() which + * orders clearing sbitmap tag bits and waitqueue_active() in + * __sbitmap_queue_wake_up(), since waitqueue_active() is lockless + * + * Otherwise, re-order of adding wait queue and getting driver tag + * may cause __sbitmap_queue_wake_up() to wake up nothing because + * the waitqueue_active() may not observe us in wait queue. + */ + smp_mb(); + + /* * It's possible that a tag was freed in the window between the * allocation failure and adding the hardware queue to the wait * queue. diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index 088db2356..0a84d5afd 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -308,9 +308,10 @@ err: static void __exit extlog_exit(void) { mce_unregister_decode_chain(&extlog_mce_dec); - ((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN; - if (extlog_l1_addr) + if (extlog_l1_addr) { + ((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN; acpi_os_unmap_iomem(extlog_l1_addr, l1_size); + } if (elog_addr) acpi_os_unmap_iomem(elog_addr, elog_size); release_mem_region(elog_base, elog_size); diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index f7852fb75..756ab8edd 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -513,6 +513,15 @@ static const struct dmi_system_id video_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 3350"), }, }, + { + .callback = video_set_report_key_events, + .driver_data = (void *)((uintptr_t)REPORT_BRIGHTNESS_KEY_EVENTS), + .ident = "COLORFUL X15 AT 23", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "COLORFUL"), + DMI_MATCH(DMI_PRODUCT_NAME, "X15 AT 23"), + }, + }, /* * Some machines change the brightness themselves when a brightness * hotkey gets pressed, despite us telling them not to. In this case diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 9952f3a79..dd808cf65 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -100,6 +100,20 @@ static inline bool is_hest_type_generic_v2(struct ghes *ghes) } /* + * A platform may describe one error source for the handling of synchronous + * errors (e.g. MCE or SEA), or for handling asynchronous errors (e.g. SCI + * or External Interrupt). On x86, the HEST notifications are always + * asynchronous, so only SEA on ARM is delivered as a synchronous + * notification. + */ +static inline bool is_hest_sync_notify(struct ghes *ghes) +{ + u8 notify_type = ghes->generic->notify.type; + + return notify_type == ACPI_HEST_NOTIFY_SEA; +} + +/* * This driver isn't really modular, however for the time being, * continuing to use module_param is the easiest way to remain * compatible with existing boot arg use cases. @@ -461,7 +475,7 @@ static bool ghes_do_memory_failure(u64 physical_addr, int flags) } static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, - int sev) + int sev, bool sync) { int flags = -1; int sec_sev = ghes_severity(gdata->error_severity); @@ -475,7 +489,7 @@ static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED)) flags = MF_SOFT_OFFLINE; if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE) - flags = 0; + flags = sync ? MF_ACTION_REQUIRED : 0; if (flags != -1) return ghes_do_memory_failure(mem_err->physical_addr, flags); @@ -483,9 +497,11 @@ static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, return false; } -static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int sev) +static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, + int sev, bool sync) { struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata); + int flags = sync ? MF_ACTION_REQUIRED : 0; bool queued = false; int sec_sev, i; char *p; @@ -510,7 +526,7 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int s * and don't filter out 'corrected' error here. */ if (is_cache && has_pa) { - queued = ghes_do_memory_failure(err_info->physical_fault_addr, 0); + queued = ghes_do_memory_failure(err_info->physical_fault_addr, flags); p += err_info->length; continue; } @@ -631,6 +647,7 @@ static bool ghes_do_proc(struct ghes *ghes, const guid_t *fru_id = &guid_null; char *fru_text = ""; bool queued = false; + bool sync = is_hest_sync_notify(ghes); sev = ghes_severity(estatus->error_severity); apei_estatus_for_each_section(estatus, gdata) { @@ -648,13 +665,13 @@ static bool ghes_do_proc(struct ghes *ghes, ghes_edac_report_mem_error(sev, mem_err); arch_apei_report_mem_error(sev, mem_err); - queued = ghes_handle_memory_failure(gdata, sev); + queued = ghes_handle_memory_failure(gdata, sev, sync); } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) { ghes_handle_aer(gdata); } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { - queued = ghes_handle_arm_hw_error(gdata, sev); + queued = ghes_handle_arm_hw_error(gdata, sev, sync); } else { void *err = acpi_hest_get_payload(gdata); diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c index 12f330b0e..b57de78fb 100644 --- a/drivers/acpi/numa/srat.c +++ b/drivers/acpi/numa/srat.c @@ -183,7 +183,7 @@ static int __init slit_valid(struct acpi_table_slit *slit) int i, j; int d = slit->locality_count; for (i = 0; i < d; i++) { - for (j = 0; j < d; j++) { + for (j = 0; j < d; j++) { u8 val = slit->entry[d*i + j]; if (i == j) { if (val != LOCAL_DISTANCE) @@ -532,7 +532,7 @@ int __init acpi_numa_init(void) */ /* fake_pxm is the next unused PXM value after SRAT parsing */ - for (i = 0, fake_pxm = -1; i < MAX_NUMNODES - 1; i++) { + for (i = 0, fake_pxm = -1; i < MAX_NUMNODES; i++) { if (node_to_pxm_map[i] > fake_pxm) fake_pxm = node_to_pxm_map[i]; } diff --git a/drivers/android/binder.c b/drivers/android/binder.c index d933ef6cc..55cd17a13 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -477,6 +477,16 @@ binder_enqueue_thread_work_ilocked(struct binder_thread *thread, { WARN_ON(!list_empty(&thread->waiting_thread_node)); binder_enqueue_work_ilocked(work, &thread->todo); + + /* (e)poll-based threads require an explicit wakeup signal when + * queuing their own work; they rely on these events to consume + * messages without I/O block. Without it, threads risk waiting + * indefinitely without handling the work. + */ + if (thread->looper & BINDER_LOOPER_STATE_POLL && + thread->pid == current->pid && !thread->process_todo) + wake_up_interruptible_sync(&thread->wait); + thread->process_todo = true; } diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 805645efb..1790a2ecb 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -49,6 +49,7 @@ enum { enum board_ids { /* board IDs by feature in alphabetical order */ board_ahci, + board_ahci_43bit_dma, board_ahci_ign_iferr, board_ahci_low_power, board_ahci_no_debounce_delay, @@ -129,6 +130,13 @@ static const struct ata_port_info ahci_port_info[] = { .udma_mask = ATA_UDMA6, .port_ops = &ahci_ops, }, + [board_ahci_43bit_dma] = { + AHCI_HFLAGS (AHCI_HFLAG_43BIT_ONLY), + .flags = AHCI_FLAG_COMMON, + .pio_mask = ATA_PIO4, + .udma_mask = ATA_UDMA6, + .port_ops = &ahci_ops, + }, [board_ahci_ign_iferr] = { AHCI_HFLAGS (AHCI_HFLAG_IGN_IRQ_IF_ERR), .flags = AHCI_FLAG_COMMON, @@ -597,14 +605,19 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(PROMISE, 0x3f20), board_ahci }, /* PDC42819 */ { PCI_VDEVICE(PROMISE, 0x3781), board_ahci }, /* FastTrak TX8660 ahci-mode */ - /* Asmedia */ - { PCI_VDEVICE(ASMEDIA, 0x0601), board_ahci }, /* ASM1060 */ - { PCI_VDEVICE(ASMEDIA, 0x0602), board_ahci }, /* ASM1060 */ - { PCI_VDEVICE(ASMEDIA, 0x0611), board_ahci }, /* ASM1061 */ - { PCI_VDEVICE(ASMEDIA, 0x0612), board_ahci }, /* ASM1062 */ - { PCI_VDEVICE(ASMEDIA, 0x0621), board_ahci }, /* ASM1061R */ - { PCI_VDEVICE(ASMEDIA, 0x0622), board_ahci }, /* ASM1062R */ - { PCI_VDEVICE(ASMEDIA, 0x0624), board_ahci }, /* ASM1062+JMB575 */ + /* ASMedia */ + { PCI_VDEVICE(ASMEDIA, 0x0601), board_ahci_43bit_dma }, /* ASM1060 */ + { PCI_VDEVICE(ASMEDIA, 0x0602), board_ahci_43bit_dma }, /* ASM1060 */ + { PCI_VDEVICE(ASMEDIA, 0x0611), board_ahci_43bit_dma }, /* ASM1061 */ + { PCI_VDEVICE(ASMEDIA, 0x0612), board_ahci_43bit_dma }, /* ASM1061/1062 */ + { PCI_VDEVICE(ASMEDIA, 0x0621), board_ahci_43bit_dma }, /* ASM1061R */ + { PCI_VDEVICE(ASMEDIA, 0x0622), board_ahci_43bit_dma }, /* ASM1062R */ + { PCI_VDEVICE(ASMEDIA, 0x0624), board_ahci_43bit_dma }, /* ASM1062+JMB575 */ + { PCI_VDEVICE(ASMEDIA, 0x1062), board_ahci }, /* ASM1062A */ + { PCI_VDEVICE(ASMEDIA, 0x1064), board_ahci }, /* ASM1064 */ + { PCI_VDEVICE(ASMEDIA, 0x1164), board_ahci }, /* ASM1164 */ + { PCI_VDEVICE(ASMEDIA, 0x1165), board_ahci }, /* ASM1165 */ + { PCI_VDEVICE(ASMEDIA, 0x1166), board_ahci }, /* ASM1166 */ /* * Samsung SSDs found on some macbooks. NCQ times out if MSI is @@ -658,6 +671,11 @@ MODULE_PARM_DESC(mobile_lpm_policy, "Default LPM policy for mobile chipsets"); static void ahci_pci_save_initial_config(struct pci_dev *pdev, struct ahci_host_priv *hpriv) { + if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && pdev->device == 0x1166) { + dev_info(&pdev->dev, "ASM1166 has only six ports\n"); + hpriv->saved_port_map = 0x3f; + } + if (pdev->vendor == PCI_VENDOR_ID_JMICRON && pdev->device == 0x2361) { dev_info(&pdev->dev, "JMB361 has only one port\n"); hpriv->saved_port_map = 1; @@ -944,11 +962,20 @@ static int ahci_pci_device_resume(struct device *dev) #endif /* CONFIG_PM */ -static int ahci_configure_dma_masks(struct pci_dev *pdev, int using_dac) +static int ahci_configure_dma_masks(struct pci_dev *pdev, + struct ahci_host_priv *hpriv) { - const int dma_bits = using_dac ? 64 : 32; + int dma_bits; int rc; + if (hpriv->cap & HOST_CAP_64) { + dma_bits = 64; + if (hpriv->flags & AHCI_HFLAG_43BIT_ONLY) + dma_bits = 43; + } else { + dma_bits = 32; + } + /* * If the device fixup already set the dma_mask to some non-standard * value, don't extend it here. This happens on STA2X11, for example. @@ -1921,7 +1948,7 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) ahci_gtf_filter_workaround(host); /* initialize adapter */ - rc = ahci_configure_dma_masks(pdev, hpriv->cap & HOST_CAP_64); + rc = ahci_configure_dma_masks(pdev, hpriv); if (rc) return rc; diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index ff8e6ae1c..f9c5906a8 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -247,6 +247,7 @@ enum { AHCI_HFLAG_SUSPEND_PHYS = BIT(26), /* handle PHYs during suspend/resume */ AHCI_HFLAG_NO_SXS = BIT(28), /* SXS not supported */ + AHCI_HFLAG_43BIT_ONLY = BIT(29), /* 43bit DMA addr limit */ /* ap->flags bits */ diff --git a/drivers/ata/ahci_ceva.c b/drivers/ata/ahci_ceva.c index cb24ecf36..50e07ea60 100644 --- a/drivers/ata/ahci_ceva.c +++ b/drivers/ata/ahci_ceva.c @@ -88,7 +88,6 @@ struct ceva_ahci_priv { u32 axicc; bool is_cci_enabled; int flags; - struct reset_control *rst; }; static unsigned int ceva_ahci_read_id(struct ata_device *dev, @@ -189,6 +188,60 @@ static struct scsi_host_template ahci_platform_sht = { AHCI_SHT(DRV_NAME), }; +static int ceva_ahci_platform_enable_resources(struct ahci_host_priv *hpriv) +{ + int rc, i; + + rc = ahci_platform_enable_regulators(hpriv); + if (rc) + return rc; + + rc = ahci_platform_enable_clks(hpriv); + if (rc) + goto disable_regulator; + + /* Assert the controller reset */ + rc = ahci_platform_assert_rsts(hpriv); + if (rc) + goto disable_clks; + + for (i = 0; i < hpriv->nports; i++) { + rc = phy_init(hpriv->phys[i]); + if (rc) + goto disable_rsts; + } + + /* De-assert the controller reset */ + ahci_platform_deassert_rsts(hpriv); + + for (i = 0; i < hpriv->nports; i++) { + rc = phy_power_on(hpriv->phys[i]); + if (rc) { + phy_exit(hpriv->phys[i]); + goto disable_phys; + } + } + + return 0; + +disable_rsts: + ahci_platform_deassert_rsts(hpriv); + +disable_phys: + while (--i >= 0) { + phy_power_off(hpriv->phys[i]); + phy_exit(hpriv->phys[i]); + } + +disable_clks: + ahci_platform_disable_clks(hpriv); + +disable_regulator: + ahci_platform_disable_regulators(hpriv); + + return rc; +} + static int ceva_ahci_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; @@ -203,47 +256,19 @@ static int ceva_ahci_probe(struct platform_device *pdev) return -ENOMEM; cevapriv->ahci_pdev = pdev; - - cevapriv->rst = devm_reset_control_get_optional_exclusive(&pdev->dev, - NULL); - if (IS_ERR(cevapriv->rst)) - dev_err_probe(&pdev->dev, PTR_ERR(cevapriv->rst), - "failed to get reset\n"); - hpriv = ahci_platform_get_resources(pdev, 0); if (IS_ERR(hpriv)) return PTR_ERR(hpriv); - if (!cevapriv->rst) { - rc = ahci_platform_enable_resources(hpriv); - if (rc) - return rc; - } else { - int i; + hpriv->rsts = devm_reset_control_get_optional_exclusive(&pdev->dev, + NULL); + if (IS_ERR(hpriv->rsts)) + return dev_err_probe(&pdev->dev, PTR_ERR(hpriv->rsts), + "failed to get reset\n"); - rc = ahci_platform_enable_clks(hpriv); - if (rc) - return rc; - /* Assert the controller reset */ - reset_control_assert(cevapriv->rst); - - for (i = 0; i < hpriv->nports; i++) { - rc = phy_init(hpriv->phys[i]); - if (rc) - return rc; - } - - /* De-assert the controller reset */ - reset_control_deassert(cevapriv->rst); - - for (i = 0; i < hpriv->nports; i++) { - rc = phy_power_on(hpriv->phys[i]); - if (rc) { - phy_exit(hpriv->phys[i]); - return rc; - } - } - } + rc = ceva_ahci_platform_enable_resources(hpriv); + if (rc) + return rc; if (of_property_read_bool(np, "ceva,broken-gen2")) cevapriv->flags = CEVA_FLAG_BROKEN_GEN2; @@ -252,52 +277,60 @@ static int ceva_ahci_probe(struct platform_device *pdev) if (of_property_read_u8_array(np, "ceva,p0-cominit-params", (u8 *)&cevapriv->pp2c[0], 4) < 0) { dev_warn(dev, "ceva,p0-cominit-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } if (of_property_read_u8_array(np, "ceva,p1-cominit-params", (u8 *)&cevapriv->pp2c[1], 4) < 0) { dev_warn(dev, "ceva,p1-cominit-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } /* Read OOB timing value for COMWAKE from device-tree*/ if (of_property_read_u8_array(np, "ceva,p0-comwake-params", (u8 *)&cevapriv->pp3c[0], 4) < 0) { dev_warn(dev, "ceva,p0-comwake-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } if (of_property_read_u8_array(np, "ceva,p1-comwake-params", (u8 *)&cevapriv->pp3c[1], 4) < 0) { dev_warn(dev, "ceva,p1-comwake-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } /* Read phy BURST timing value from device-tree */ if (of_property_read_u8_array(np, "ceva,p0-burst-params", (u8 *)&cevapriv->pp4c[0], 4) < 0) { dev_warn(dev, "ceva,p0-burst-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } if (of_property_read_u8_array(np, "ceva,p1-burst-params", (u8 *)&cevapriv->pp4c[1], 4) < 0) { dev_warn(dev, "ceva,p1-burst-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } /* Read phy RETRY interval timing value from device-tree */ if (of_property_read_u16_array(np, "ceva,p0-retry-params", (u16 *)&cevapriv->pp5c[0], 2) < 0) { dev_warn(dev, "ceva,p0-retry-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } if (of_property_read_u16_array(np, "ceva,p1-retry-params", (u16 *)&cevapriv->pp5c[1], 2) < 0) { dev_warn(dev, "ceva,p1-retry-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } /* @@ -335,7 +368,7 @@ static int __maybe_unused ceva_ahci_resume(struct device *dev) struct ahci_host_priv *hpriv = host->private_data; int rc; - rc = ahci_platform_enable_resources(hpriv); + rc = ceva_ahci_platform_enable_resources(hpriv); if (rc) return rc; diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index fa2fc1953..f14e56a5c 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2005,6 +2005,10 @@ void ata_dev_power_set_active(struct ata_device *dev) struct ata_taskfile tf; unsigned int err_mask; + /* If the device is already sleeping, do nothing. */ + if (dev->flags & ATA_DFLAG_SLEEPING) + return; + /* * Issue READ VERIFY SECTORS command for 1 sector at lba=0 only * if supported by the device. diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 49cb45373..2daf50d4c 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -2930,6 +2930,8 @@ open_card_ubr0(struct idt77252_dev *card) vc->scq = alloc_scq(card, vc->class); if (!vc->scq) { printk("%s: can't get SCQ.\n", card->name); + kfree(card->vcs[0]); + card->vcs[0] = NULL; return -ENOMEM; } diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c index eaa31e567..5b59d133b 100644 --- a/drivers/base/arch_numa.c +++ b/drivers/base/arch_numa.c @@ -144,7 +144,7 @@ void __init early_map_cpu_to_node(unsigned int cpu, int nid) unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; EXPORT_SYMBOL(__per_cpu_offset); -static int __init early_cpu_to_node(int cpu) +int __init early_cpu_to_node(int cpu) { return cpu_to_node_map[cpu]; } diff --git a/drivers/base/core.c b/drivers/base/core.c index af90bfb0c..3078f44dc 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -337,10 +337,12 @@ static bool device_is_ancestor(struct device *dev, struct device *target) return false; } +#define DL_MARKER_FLAGS (DL_FLAG_INFERRED | \ + DL_FLAG_CYCLE | \ + DL_FLAG_MANAGED) static inline bool device_link_flag_is_sync_state_only(u32 flags) { - return (flags & ~(DL_FLAG_INFERRED | DL_FLAG_CYCLE)) == - (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED); + return (flags & ~DL_MARKER_FLAGS) == DL_FLAG_SYNC_STATE_ONLY; } /** @@ -2054,9 +2056,14 @@ static int fw_devlink_create_devlink(struct device *con, /* * SYNC_STATE_ONLY device links don't block probing and supports cycles. - * So cycle detection isn't necessary and shouldn't be done. + * So, one might expect that cycle detection isn't necessary for them. + * However, if the device link was marked as SYNC_STATE_ONLY because + * it's part of a cycle, then we still need to do cycle detection. This + * is because the consumer and supplier might be part of multiple cycles + * and we need to detect all those cycles. */ - if (!(flags & DL_FLAG_SYNC_STATE_ONLY)) { + if (!device_link_flag_is_sync_state_only(flags) || + flags & DL_FLAG_CYCLE) { device_links_write_lock(); if (__fw_devlink_relax_cycles(con, sup_handle)) { __fwnode_link_cycle(link); diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index dab70a653..31da94afe 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -589,6 +589,12 @@ ssize_t __weak cpu_show_spec_rstack_overflow(struct device *dev, return sysfs_emit(buf, "Not affected\n"); } +ssize_t __weak cpu_show_reg_file_data_sampling(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); @@ -602,6 +608,7 @@ static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL); static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL); static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL); +static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -617,6 +624,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_retbleed.attr, &dev_attr_gather_data_sampling.attr, &dev_attr_spec_rstack_overflow.attr, + &dev_attr_reg_file_data_sampling.attr, NULL }; diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 56ceba469..d238b47f7 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1052,7 +1052,7 @@ static int __init genpd_power_off_unused(void) return 0; } -late_initcall(genpd_power_off_unused); +late_initcall_sync(genpd_power_off_unused); #ifdef CONFIG_PM_SLEEP diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 128722cf6..827802e41 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -333,6 +333,7 @@ aoeblk_gdalloc(void *vp) struct gendisk *gd; mempool_t *mp; struct blk_mq_tag_set *set; + sector_t ssize; ulong flags; int late = 0; int err; @@ -395,7 +396,7 @@ aoeblk_gdalloc(void *vp) gd->minors = AOE_PARTITIONS; gd->fops = &aoe_bdops; gd->private_data = d; - set_capacity(gd, d->ssize); + ssize = d->ssize; snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d", d->aoemajor, d->aoeminor); @@ -404,6 +405,8 @@ aoeblk_gdalloc(void *vp) spin_unlock_irqrestore(&d->lock, flags); + set_capacity(gd, ssize); + err = device_add_disk(NULL, gd, aoe_attr_groups); if (err) goto out_disk_cleanup; diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c index 2cfed2e58..ad451224e 100644 --- a/drivers/block/rnbd/rnbd-srv.c +++ b/drivers/block/rnbd/rnbd-srv.c @@ -587,6 +587,7 @@ static char *rnbd_srv_get_full_path(struct rnbd_srv_session *srv_sess, { char *full_path; char *a, *b; + int len; full_path = kmalloc(PATH_MAX, GFP_KERNEL); if (!full_path) @@ -598,19 +599,19 @@ static char *rnbd_srv_get_full_path(struct rnbd_srv_session *srv_sess, */ a = strnstr(dev_search_path, "%SESSNAME%", sizeof(dev_search_path)); if (a) { - int len = a - dev_search_path; + len = a - dev_search_path; len = snprintf(full_path, PATH_MAX, "%.*s/%s/%s", len, dev_search_path, srv_sess->sessname, dev_name); - if (len >= PATH_MAX) { - pr_err("Too long path: %s, %s, %s\n", - dev_search_path, srv_sess->sessname, dev_name); - kfree(full_path); - return ERR_PTR(-EINVAL); - } } else { - snprintf(full_path, PATH_MAX, "%s/%s", - dev_search_path, dev_name); + len = snprintf(full_path, PATH_MAX, "%s/%s", + dev_search_path, dev_name); + } + if (len >= PATH_MAX) { + pr_err("Too long path: %s, %s, %s\n", + dev_search_path, srv_sess->sessname, dev_name); + kfree(full_path); + return ERR_PTR(-EINVAL); } /* eliminitate duplicated slashes */ diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 3124837aa..505026f00 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1206,14 +1206,15 @@ static int virtblk_freeze(struct virtio_device *vdev) { struct virtio_blk *vblk = vdev->priv; + /* Ensure no requests in virtqueues before deleting vqs. */ + blk_mq_freeze_queue(vblk->disk->queue); + /* Ensure we don't receive any more interrupts */ virtio_reset_device(vdev); /* Make sure no work handler is accessing the device. */ flush_work(&vblk->config_work); - blk_mq_quiesce_queue(vblk->disk->queue); - vdev->config->del_vqs(vdev); kfree(vblk->vqs); @@ -1231,7 +1232,7 @@ static int virtblk_restore(struct virtio_device *vdev) virtio_device_ready(vdev); - blk_mq_unquiesce_queue(vblk->disk->queue); + blk_mq_unfreeze_queue(vblk->disk->queue); return 0; } #endif diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index c9064d34d..0211f704a 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -152,7 +152,7 @@ static int qca_send_patch_config_cmd(struct hci_dev *hdev) bt_dev_dbg(hdev, "QCA Patch config"); skb = __hci_cmd_sync_ev(hdev, EDL_PATCH_CMD_OPCODE, sizeof(cmd), - cmd, HCI_EV_VENDOR, HCI_INIT_TIMEOUT); + cmd, 0, HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { err = PTR_ERR(skb); bt_dev_err(hdev, "Sending QCA Patch config failed (%d)", err); @@ -594,27 +594,48 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, /* Firmware files to download are based on ROM version. * ROM version is derived from last two bytes of soc_ver. */ - rom_ver = ((soc_ver & 0x00000f00) >> 0x04) | (soc_ver & 0x0000000f); + if (soc_type == QCA_WCN3988) + rom_ver = ((soc_ver & 0x00000f00) >> 0x05) | (soc_ver & 0x0000000f); + else + rom_ver = ((soc_ver & 0x00000f00) >> 0x04) | (soc_ver & 0x0000000f); if (soc_type == QCA_WCN6750) qca_send_patch_config_cmd(hdev); /* Download rampatch file */ config.type = TLV_TYPE_PATCH; - if (qca_is_wcn399x(soc_type)) { + switch (soc_type) { + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: snprintf(config.fwname, sizeof(config.fwname), "qca/crbtfw%02x.tlv", rom_ver); - } else if (soc_type == QCA_QCA6390) { + break; + case QCA_WCN3988: + snprintf(config.fwname, sizeof(config.fwname), + "qca/apbtfw%02x.tlv", rom_ver); + break; + case QCA_QCA6390: snprintf(config.fwname, sizeof(config.fwname), "qca/htbtfw%02x.tlv", rom_ver); - } else if (soc_type == QCA_WCN6750) { + break; + case QCA_WCN6750: /* Choose mbn file by default.If mbn file is not found * then choose tlv file */ config.type = ELF_TYPE_PATCH; snprintf(config.fwname, sizeof(config.fwname), "qca/msbtfw%02x.mbn", rom_ver); - } else { + break; + case QCA_WCN6855: + snprintf(config.fwname, sizeof(config.fwname), + "qca/hpbtfw%02x.tlv", rom_ver); + break; + case QCA_WCN7850: + snprintf(config.fwname, sizeof(config.fwname), + "qca/hmtbtfw%02x.tlv", rom_ver); + break; + default: snprintf(config.fwname, sizeof(config.fwname), "qca/rampatch_%08x.bin", soc_ver); } @@ -630,27 +651,48 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, /* Download NVM configuration */ config.type = TLV_TYPE_NVM; - if (firmware_name) + if (firmware_name) { snprintf(config.fwname, sizeof(config.fwname), "qca/%s", firmware_name); - else if (qca_is_wcn399x(soc_type)) { - if (ver.soc_id == QCA_WCN3991_SOC_ID) { + } else { + switch (soc_type) { + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + if (le32_to_cpu(ver.soc_id) == QCA_WCN3991_SOC_ID) { + snprintf(config.fwname, sizeof(config.fwname), + "qca/crnv%02xu.bin", rom_ver); + } else { + snprintf(config.fwname, sizeof(config.fwname), + "qca/crnv%02x.bin", rom_ver); + } + break; + case QCA_WCN3988: snprintf(config.fwname, sizeof(config.fwname), - "qca/crnv%02xu.bin", rom_ver); - } else { + "qca/apnv%02x.bin", rom_ver); + break; + case QCA_QCA6390: + snprintf(config.fwname, sizeof(config.fwname), + "qca/htnv%02x.bin", rom_ver); + break; + case QCA_WCN6750: snprintf(config.fwname, sizeof(config.fwname), - "qca/crnv%02x.bin", rom_ver); + "qca/msnv%02x.bin", rom_ver); + break; + case QCA_WCN6855: + snprintf(config.fwname, sizeof(config.fwname), + "qca/hpnv%02x.bin", rom_ver); + break; + case QCA_WCN7850: + snprintf(config.fwname, sizeof(config.fwname), + "qca/hmtnv%02x.bin", rom_ver); + break; + + default: + snprintf(config.fwname, sizeof(config.fwname), + "qca/nvm_%08x.bin", soc_ver); } } - else if (soc_type == QCA_QCA6390) - snprintf(config.fwname, sizeof(config.fwname), - "qca/htnv%02x.bin", rom_ver); - else if (soc_type == QCA_WCN6750) - snprintf(config.fwname, sizeof(config.fwname), - "qca/msnv%02x.bin", rom_ver); - else - snprintf(config.fwname, sizeof(config.fwname), - "qca/nvm_%08x.bin", soc_ver); err = qca_download_firmware(hdev, &config, soc_type, rom_ver); if (err < 0) { @@ -658,16 +700,25 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, return err; } - if (soc_type >= QCA_WCN3991) { + switch (soc_type) { + case QCA_WCN3991: + case QCA_QCA6390: + case QCA_WCN6750: + case QCA_WCN6855: + case QCA_WCN7850: err = qca_disable_soc_logging(hdev); if (err < 0) return err; + break; + default: + break; } /* WCN399x and WCN6750 supports the Microsoft vendor extension with 0xFD70 as the * VsMsftOpCode. */ switch (soc_type) { + case QCA_WCN3988: case QCA_WCN3990: case QCA_WCN3991: case QCA_WCN3998: @@ -685,11 +736,18 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, return err; } - if (soc_type == QCA_WCN3991 || soc_type == QCA_WCN6750) { + switch (soc_type) { + case QCA_WCN3991: + case QCA_WCN6750: + case QCA_WCN6855: + case QCA_WCN7850: /* get fw build info */ err = qca_read_fw_build_info(hdev); if (err < 0) return err; + break; + default: + break; } bt_dev_info(hdev, "QCA setup on UART is completed"); diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h index 61e9a50e6..03bff5c00 100644 --- a/drivers/bluetooth/btqca.h +++ b/drivers/bluetooth/btqca.h @@ -142,11 +142,14 @@ enum qca_btsoc_type { QCA_INVALID = -1, QCA_AR3002, QCA_ROME, + QCA_WCN3988, QCA_WCN3990, QCA_WCN3998, QCA_WCN3991, QCA_QCA6390, QCA_WCN6750, + QCA_WCN6855, + QCA_WCN7850, }; #if IS_ENABLED(CONFIG_BT_QCA) @@ -159,16 +162,6 @@ int qca_read_soc_version(struct hci_dev *hdev, struct qca_btsoc_version *ver, enum qca_btsoc_type); int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr); int qca_send_pre_shutdown_cmd(struct hci_dev *hdev); -static inline bool qca_is_wcn399x(enum qca_btsoc_type soc_type) -{ - return soc_type == QCA_WCN3990 || soc_type == QCA_WCN3991 || - soc_type == QCA_WCN3998; -} -static inline bool qca_is_wcn6750(enum qca_btsoc_type soc_type) -{ - return soc_type == QCA_WCN6750; -} - #else static inline int qca_set_bdaddr_rome(struct hci_dev *hdev, const bdaddr_t *bdaddr) @@ -196,16 +189,6 @@ static inline int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr) return -EOPNOTSUPP; } -static inline bool qca_is_wcn399x(enum qca_btsoc_type soc_type) -{ - return false; -} - -static inline bool qca_is_wcn6750(enum qca_btsoc_type soc_type) -{ - return false; -} - static inline int qca_send_pre_shutdown_cmd(struct hci_dev *hdev) { return -EOPNOTSUPP; diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 45dffd2cb..8bfef7f81 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -7,6 +7,7 @@ * * Copyright (C) 2007 Texas Instruments, Inc. * Copyright (c) 2010, 2012, 2018 The Linux Foundation. All rights reserved. + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. * * Acknowledgements: * This file is based on hci_ll.c, which was... @@ -606,9 +607,18 @@ static int qca_open(struct hci_uart *hu) if (hu->serdev) { qcadev = serdev_device_get_drvdata(hu->serdev); - if (qca_is_wcn399x(qcadev->btsoc_type) || - qca_is_wcn6750(qcadev->btsoc_type)) + switch (qcadev->btsoc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: hu->init_speed = qcadev->init_speed; + break; + + default: + break; + } if (qcadev->oper_speed) hu->oper_speed = qcadev->oper_speed; @@ -1314,11 +1324,20 @@ static int qca_set_baudrate(struct hci_dev *hdev, uint8_t baudrate) msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS)); /* Give the controller time to process the request */ - if (qca_is_wcn399x(qca_soc_type(hu)) || - qca_is_wcn6750(qca_soc_type(hu))) + switch (qca_soc_type(hu)) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: + case QCA_WCN7850: usleep_range(1000, 10000); - else + break; + + default: msleep(300); + } return 0; } @@ -1391,12 +1410,20 @@ static unsigned int qca_get_speed(struct hci_uart *hu, static int qca_check_speeds(struct hci_uart *hu) { - if (qca_is_wcn399x(qca_soc_type(hu)) || - qca_is_wcn6750(qca_soc_type(hu))) { + switch (qca_soc_type(hu)) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: + case QCA_WCN7850: if (!qca_get_speed(hu, QCA_INIT_SPEED) && !qca_get_speed(hu, QCA_OPER_SPEED)) return -EINVAL; - } else { + break; + + default: if (!qca_get_speed(hu, QCA_INIT_SPEED) || !qca_get_speed(hu, QCA_OPER_SPEED)) return -EINVAL; @@ -1425,13 +1452,29 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type) /* Disable flow control for wcn3990 to deassert RTS while * changing the baudrate of chip and host. */ - if (qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type)) + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: + case QCA_WCN7850: hci_uart_set_flow_control(hu, true); + break; - if (soc_type == QCA_WCN3990) { + default: + break; + } + + switch (soc_type) { + case QCA_WCN3990: reinit_completion(&qca->drop_ev_comp); set_bit(QCA_DROP_VENDOR_EVENT, &qca->flags); + break; + + default: + break; } qca_baudrate = qca_get_baudrate_value(speed); @@ -1443,11 +1486,23 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type) host_set_baudrate(hu, speed); error: - if (qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type)) + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: + case QCA_WCN7850: hci_uart_set_flow_control(hu, false); + break; - if (soc_type == QCA_WCN3990) { + default: + break; + } + + switch (soc_type) { + case QCA_WCN3990: /* Wait for the controller to send the vendor event * for the baudrate change command. */ @@ -1459,6 +1514,10 @@ error: } clear_bit(QCA_DROP_VENDOR_EVENT, &qca->flags); + break; + + default: + break; } } @@ -1620,12 +1679,20 @@ static int qca_regulator_init(struct hci_uart *hu) } } - if (qca_is_wcn399x(soc_type)) { + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: /* Forcefully enable wcn399x to enter in to boot mode. */ host_set_baudrate(hu, 2400); ret = qca_send_power_pulse(hu, false); if (ret) return ret; + break; + + default: + break; } /* For wcn6750 need to enable gpio bt_en */ @@ -1642,10 +1709,18 @@ static int qca_regulator_init(struct hci_uart *hu) qca_set_speed(hu, QCA_INIT_SPEED); - if (qca_is_wcn399x(soc_type)) { + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: ret = qca_send_power_pulse(hu, true); if (ret) return ret; + break; + + default: + break; } /* Now the device is in ready state to communicate with host. @@ -1679,10 +1754,18 @@ static int qca_power_on(struct hci_dev *hdev) if (!hu->serdev) return 0; - if (qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type)) { + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: + case QCA_WCN7850: ret = qca_regulator_init(hu); - } else { + break; + + default: qcadev = serdev_device_get_drvdata(hu->serdev); if (qcadev->bt_en) { gpiod_set_value_cansleep(qcadev->bt_en, 1); @@ -1705,6 +1788,7 @@ static int qca_setup(struct hci_uart *hu) const char *firmware_name = qca_get_firmware_name(hu); int ret; struct qca_btsoc_version ver; + const char *soc_name; ret = qca_check_speeds(hu); if (ret) @@ -1719,9 +1803,30 @@ static int qca_setup(struct hci_uart *hu) */ set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); - bt_dev_info(hdev, "setting up %s", - qca_is_wcn399x(soc_type) ? "wcn399x" : - (soc_type == QCA_WCN6750) ? "wcn6750" : "ROME/QCA6390"); + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + soc_name = "wcn399x"; + break; + + case QCA_WCN6750: + soc_name = "wcn6750"; + break; + + case QCA_WCN6855: + soc_name = "wcn6855"; + break; + + case QCA_WCN7850: + soc_name = "wcn7850"; + break; + + default: + soc_name = "ROME/QCA6390"; + } + bt_dev_info(hdev, "setting up %s", soc_name); qca->memdump_state = QCA_MEMDUMP_IDLE; @@ -1732,15 +1837,33 @@ retry: clear_bit(QCA_SSR_TRIGGERED, &qca->flags); - if (qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type)) { - set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: + case QCA_WCN7850: + + /* Set BDA quirk bit for reading BDA value from fwnode property + * only if that property exist in DT. + */ + if (fwnode_property_present(dev_fwnode(hdev->dev.parent), "local-bd-address")) { + set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); + bt_dev_info(hdev, "setting quirk bit to read BDA from fwnode later"); + } else { + bt_dev_dbg(hdev, "local-bd-address` is not present in the devicetree so not setting quirk bit for BDA"); + } + hci_set_aosp_capable(hdev); ret = qca_read_soc_version(hdev, &ver, soc_type); if (ret) goto out; - } else { + break; + + default: qca_set_speed(hu, QCA_INIT_SPEED); } @@ -1754,8 +1877,17 @@ retry: qca_baudrate = qca_get_baudrate_value(speed); } - if (!(qca_is_wcn399x(soc_type) || - qca_is_wcn6750(soc_type))) { + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: + case QCA_WCN7850: + break; + + default: /* Get QCA version information */ ret = qca_read_soc_version(hdev, &ver, soc_type); if (ret) @@ -1824,7 +1956,18 @@ static const struct hci_uart_proto qca_proto = { .dequeue = qca_dequeue, }; -static const struct qca_device_data qca_soc_data_wcn3990 = { +static const struct qca_device_data qca_soc_data_wcn3988 __maybe_unused = { + .soc_type = QCA_WCN3988, + .vregs = (struct qca_vreg []) { + { "vddio", 15000 }, + { "vddxo", 80000 }, + { "vddrf", 300000 }, + { "vddch0", 450000 }, + }, + .num_vregs = 4, +}; + +static const struct qca_device_data qca_soc_data_wcn3990 __maybe_unused = { .soc_type = QCA_WCN3990, .vregs = (struct qca_vreg []) { { "vddio", 15000 }, @@ -1835,7 +1978,7 @@ static const struct qca_device_data qca_soc_data_wcn3990 = { .num_vregs = 4, }; -static const struct qca_device_data qca_soc_data_wcn3991 = { +static const struct qca_device_data qca_soc_data_wcn3991 __maybe_unused = { .soc_type = QCA_WCN3991, .vregs = (struct qca_vreg []) { { "vddio", 15000 }, @@ -1847,7 +1990,7 @@ static const struct qca_device_data qca_soc_data_wcn3991 = { .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, }; -static const struct qca_device_data qca_soc_data_wcn3998 = { +static const struct qca_device_data qca_soc_data_wcn3998 __maybe_unused = { .soc_type = QCA_WCN3998, .vregs = (struct qca_vreg []) { { "vddio", 10000 }, @@ -1858,12 +2001,13 @@ static const struct qca_device_data qca_soc_data_wcn3998 = { .num_vregs = 4, }; -static const struct qca_device_data qca_soc_data_qca6390 = { +static const struct qca_device_data qca_soc_data_qca6390 __maybe_unused = { .soc_type = QCA_QCA6390, .num_vregs = 0, + .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, }; -static const struct qca_device_data qca_soc_data_wcn6750 = { +static const struct qca_device_data qca_soc_data_wcn6750 __maybe_unused = { .soc_type = QCA_WCN6750, .vregs = (struct qca_vreg []) { { "vddio", 5000 }, @@ -1880,6 +2024,34 @@ static const struct qca_device_data qca_soc_data_wcn6750 = { .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, }; +static const struct qca_device_data qca_soc_data_wcn6855 = { + .soc_type = QCA_WCN6855, + .vregs = (struct qca_vreg []) { + { "vddio", 5000 }, + { "vddbtcxmx", 126000 }, + { "vddrfacmn", 12500 }, + { "vddrfa0p8", 102000 }, + { "vddrfa1p7", 302000 }, + { "vddrfa1p2", 257000 }, + }, + .num_vregs = 6, + .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, +}; + +static const struct qca_device_data qca_soc_data_wcn7850 __maybe_unused = { + .soc_type = QCA_WCN7850, + .vregs = (struct qca_vreg []) { + { "vddio", 5000 }, + { "vddaon", 26000 }, + { "vdddig", 126000 }, + { "vddrfa0p8", 102000 }, + { "vddrfa1p2", 257000 }, + { "vddrfa1p9", 302000 }, + }, + .num_vregs = 6, + .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, +}; + static void qca_power_shutdown(struct hci_uart *hu) { struct qca_serdev *qcadev; @@ -1905,11 +2077,18 @@ static void qca_power_shutdown(struct hci_uart *hu) qcadev = serdev_device_get_drvdata(hu->serdev); - if (qca_is_wcn399x(soc_type)) { + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: host_set_baudrate(hu, 2400); qca_send_power_pulse(hu, false); qca_regulator_disable(qcadev); - } else if (soc_type == QCA_WCN6750) { + break; + + case QCA_WCN6750: + case QCA_WCN6855: gpiod_set_value_cansleep(qcadev->bt_en, 0); msleep(100); qca_regulator_disable(qcadev); @@ -1917,7 +2096,9 @@ static void qca_power_shutdown(struct hci_uart *hu) sw_ctrl_state = gpiod_get_value_cansleep(qcadev->sw_ctrl); bt_dev_dbg(hu->hdev, "SW_CTRL is %d", sw_ctrl_state); } - } else if (qcadev->bt_en) { + break; + + default: gpiod_set_value_cansleep(qcadev->bt_en, 0); } @@ -2042,10 +2223,19 @@ static int qca_serdev_probe(struct serdev_device *serdev) if (!qcadev->oper_speed) BT_DBG("UART will pick default operating speed"); - if (data && - (qca_is_wcn399x(data->soc_type) || - qca_is_wcn6750(data->soc_type))) { + if (data) qcadev->btsoc_type = data->soc_type; + else + qcadev->btsoc_type = QCA_ROME; + + switch (qcadev->btsoc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: + case QCA_WCN7850: qcadev->bt_power = devm_kzalloc(&serdev->dev, sizeof(struct qca_power), GFP_KERNEL); @@ -2064,14 +2254,19 @@ static int qca_serdev_probe(struct serdev_device *serdev) qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", GPIOD_OUT_LOW); - if (IS_ERR_OR_NULL(qcadev->bt_en) && data->soc_type == QCA_WCN6750) { + if (IS_ERR_OR_NULL(qcadev->bt_en) && + (data->soc_type == QCA_WCN6750 || + data->soc_type == QCA_WCN6855)) { dev_err(&serdev->dev, "failed to acquire BT_EN gpio\n"); power_ctrl_enabled = false; } qcadev->sw_ctrl = devm_gpiod_get_optional(&serdev->dev, "swctrl", GPIOD_IN); - if (IS_ERR_OR_NULL(qcadev->sw_ctrl) && data->soc_type == QCA_WCN6750) + if (IS_ERR_OR_NULL(qcadev->sw_ctrl) && + (data->soc_type == QCA_WCN6750 || + data->soc_type == QCA_WCN6855 || + data->soc_type == QCA_WCN7850)) dev_warn(&serdev->dev, "failed to acquire SW_CTRL gpio\n"); qcadev->susclk = devm_clk_get_optional(&serdev->dev, NULL); @@ -2085,12 +2280,9 @@ static int qca_serdev_probe(struct serdev_device *serdev) BT_ERR("wcn3990 serdev registration failed"); return err; } - } else { - if (data) - qcadev->btsoc_type = data->soc_type; - else - qcadev->btsoc_type = QCA_ROME; + break; + default: qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", GPIOD_OUT_LOW); if (IS_ERR_OR_NULL(qcadev->bt_en)) { @@ -2146,12 +2338,24 @@ static void qca_serdev_remove(struct serdev_device *serdev) struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev); struct qca_power *power = qcadev->bt_power; - if ((qca_is_wcn399x(qcadev->btsoc_type) || - qca_is_wcn6750(qcadev->btsoc_type)) && - power->vregs_on) - qca_power_shutdown(&qcadev->serdev_hu); - else if (qcadev->susclk) - clk_disable_unprepare(qcadev->susclk); + switch (qcadev->btsoc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + case QCA_WCN6750: + case QCA_WCN6855: + case QCA_WCN7850: + if (power->vregs_on) { + qca_power_shutdown(&qcadev->serdev_hu); + break; + } + fallthrough; + + default: + if (qcadev->susclk) + clk_disable_unprepare(qcadev->susclk); + } hci_uart_unregister_device(&qcadev->serdev_hu); } @@ -2328,10 +2532,13 @@ static const struct of_device_id qca_bluetooth_of_match[] = { { .compatible = "qcom,qca6174-bt" }, { .compatible = "qcom,qca6390-bt", .data = &qca_soc_data_qca6390}, { .compatible = "qcom,qca9377-bt" }, + { .compatible = "qcom,wcn3988-bt", .data = &qca_soc_data_wcn3988}, { .compatible = "qcom,wcn3990-bt", .data = &qca_soc_data_wcn3990}, { .compatible = "qcom,wcn3991-bt", .data = &qca_soc_data_wcn3991}, { .compatible = "qcom,wcn3998-bt", .data = &qca_soc_data_wcn3998}, { .compatible = "qcom,wcn6750-bt", .data = &qca_soc_data_wcn6750}, + { .compatible = "qcom,wcn6855-bt", .data = &qca_soc_data_wcn6855}, + { .compatible = "qcom,wcn7850-bt", .data = &qca_soc_data_wcn7850}, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, qca_bluetooth_of_match); diff --git a/drivers/bus/moxtet.c b/drivers/bus/moxtet.c index 5eb0fe73d..79fc96c8d 100644 --- a/drivers/bus/moxtet.c +++ b/drivers/bus/moxtet.c @@ -830,6 +830,12 @@ static void moxtet_remove(struct spi_device *spi) mutex_destroy(&moxtet->lock); } +static const struct spi_device_id moxtet_spi_ids[] = { + { "moxtet" }, + { }, +}; +MODULE_DEVICE_TABLE(spi, moxtet_spi_ids); + static const struct of_device_id moxtet_dt_ids[] = { { .compatible = "cznic,moxtet" }, {}, @@ -841,6 +847,7 @@ static struct spi_driver moxtet_spi_driver = { .name = "moxtet", .of_match_table = moxtet_dt_ids, }, + .id_table = moxtet_spi_ids, .probe = moxtet_probe, .remove = moxtet_remove, }; diff --git a/drivers/clk/hisilicon/clk-hi3620.c b/drivers/clk/hisilicon/clk-hi3620.c index a3d04c7c3..eb9c139ba 100644 --- a/drivers/clk/hisilicon/clk-hi3620.c +++ b/drivers/clk/hisilicon/clk-hi3620.c @@ -467,8 +467,10 @@ static void __init hi3620_mmc_clk_init(struct device_node *node) return; clk_data->clks = kcalloc(num, sizeof(*clk_data->clks), GFP_KERNEL); - if (!clk_data->clks) + if (!clk_data->clks) { + kfree(clk_data); return; + } for (i = 0; i < num; i++) { struct hisi_mmc_clock *mmc_clk = &hi3620_mmc_clks[i]; diff --git a/drivers/clk/imx/clk-imx8qxp.c b/drivers/clk/imx/clk-imx8qxp.c index 273de1f29..1066ea16d 100644 --- a/drivers/clk/imx/clk-imx8qxp.c +++ b/drivers/clk/imx/clk-imx8qxp.c @@ -67,6 +67,22 @@ static const char * const lcd_pxl_sels[] = { "lcd_pxl_bypass_div_clk", }; +static const char *const lvds0_sels[] = { + "clk_dummy", + "clk_dummy", + "clk_dummy", + "clk_dummy", + "mipi0_lvds_bypass_clk", +}; + +static const char *const lvds1_sels[] = { + "clk_dummy", + "clk_dummy", + "clk_dummy", + "clk_dummy", + "mipi1_lvds_bypass_clk", +}; + static const char * const mipi_sels[] = { "clk_dummy", "clk_dummy", @@ -201,9 +217,9 @@ static int imx8qxp_clk_probe(struct platform_device *pdev) /* MIPI-LVDS SS */ imx_clk_scu("mipi0_bypass_clk", IMX_SC_R_MIPI_0, IMX_SC_PM_CLK_BYPASS); imx_clk_scu("mipi0_pixel_clk", IMX_SC_R_MIPI_0, IMX_SC_PM_CLK_PER); - imx_clk_scu("mipi0_lvds_pixel_clk", IMX_SC_R_LVDS_0, IMX_SC_PM_CLK_MISC2); imx_clk_scu("mipi0_lvds_bypass_clk", IMX_SC_R_LVDS_0, IMX_SC_PM_CLK_BYPASS); - imx_clk_scu("mipi0_lvds_phy_clk", IMX_SC_R_LVDS_0, IMX_SC_PM_CLK_MISC3); + imx_clk_scu2("mipi0_lvds_pixel_clk", lvds0_sels, ARRAY_SIZE(lvds0_sels), IMX_SC_R_LVDS_0, IMX_SC_PM_CLK_MISC2); + imx_clk_scu2("mipi0_lvds_phy_clk", lvds0_sels, ARRAY_SIZE(lvds0_sels), IMX_SC_R_LVDS_0, IMX_SC_PM_CLK_MISC3); imx_clk_scu2("mipi0_dsi_tx_esc_clk", mipi_sels, ARRAY_SIZE(mipi_sels), IMX_SC_R_MIPI_0, IMX_SC_PM_CLK_MST_BUS); imx_clk_scu2("mipi0_dsi_rx_esc_clk", mipi_sels, ARRAY_SIZE(mipi_sels), IMX_SC_R_MIPI_0, IMX_SC_PM_CLK_SLV_BUS); imx_clk_scu2("mipi0_dsi_phy_clk", mipi_sels, ARRAY_SIZE(mipi_sels), IMX_SC_R_MIPI_0, IMX_SC_PM_CLK_PHY); @@ -213,9 +229,9 @@ static int imx8qxp_clk_probe(struct platform_device *pdev) imx_clk_scu("mipi1_bypass_clk", IMX_SC_R_MIPI_1, IMX_SC_PM_CLK_BYPASS); imx_clk_scu("mipi1_pixel_clk", IMX_SC_R_MIPI_1, IMX_SC_PM_CLK_PER); - imx_clk_scu("mipi1_lvds_pixel_clk", IMX_SC_R_LVDS_1, IMX_SC_PM_CLK_MISC2); imx_clk_scu("mipi1_lvds_bypass_clk", IMX_SC_R_LVDS_1, IMX_SC_PM_CLK_BYPASS); - imx_clk_scu("mipi1_lvds_phy_clk", IMX_SC_R_LVDS_1, IMX_SC_PM_CLK_MISC3); + imx_clk_scu2("mipi1_lvds_pixel_clk", lvds1_sels, ARRAY_SIZE(lvds1_sels), IMX_SC_R_LVDS_1, IMX_SC_PM_CLK_MISC2); + imx_clk_scu2("mipi1_lvds_phy_clk", lvds1_sels, ARRAY_SIZE(lvds1_sels), IMX_SC_R_LVDS_1, IMX_SC_PM_CLK_MISC3); imx_clk_scu2("mipi1_dsi_tx_esc_clk", mipi_sels, ARRAY_SIZE(mipi_sels), IMX_SC_R_MIPI_1, IMX_SC_PM_CLK_MST_BUS); imx_clk_scu2("mipi1_dsi_rx_esc_clk", mipi_sels, ARRAY_SIZE(mipi_sels), IMX_SC_R_MIPI_1, IMX_SC_PM_CLK_SLV_BUS); diff --git a/drivers/clk/mmp/clk-of-pxa168.c b/drivers/clk/mmp/clk-of-pxa168.c index 130d1a723..cb0ebbd82 100644 --- a/drivers/clk/mmp/clk-of-pxa168.c +++ b/drivers/clk/mmp/clk-of-pxa168.c @@ -306,18 +306,21 @@ static void __init pxa168_clk_init(struct device_node *np) pxa_unit->mpmu_base = of_iomap(np, 0); if (!pxa_unit->mpmu_base) { pr_err("failed to map mpmu registers\n"); + kfree(pxa_unit); return; } pxa_unit->apmu_base = of_iomap(np, 1); if (!pxa_unit->apmu_base) { pr_err("failed to map apmu registers\n"); + kfree(pxa_unit); return; } pxa_unit->apbc_base = of_iomap(np, 2); if (!pxa_unit->apbc_base) { pr_err("failed to map apbc registers\n"); + kfree(pxa_unit); return; } diff --git a/drivers/clk/tegra/clk-tegra20.c b/drivers/clk/tegra/clk-tegra20.c index 422d78247..dcacc5064 100644 --- a/drivers/clk/tegra/clk-tegra20.c +++ b/drivers/clk/tegra/clk-tegra20.c @@ -21,24 +21,24 @@ #define MISC_CLK_ENB 0x48 #define OSC_CTRL 0x50 -#define OSC_CTRL_OSC_FREQ_MASK (3<<30) -#define OSC_CTRL_OSC_FREQ_13MHZ (0<<30) -#define OSC_CTRL_OSC_FREQ_19_2MHZ (1<<30) -#define OSC_CTRL_OSC_FREQ_12MHZ (2<<30) -#define OSC_CTRL_OSC_FREQ_26MHZ (3<<30) -#define OSC_CTRL_MASK (0x3f2 | OSC_CTRL_OSC_FREQ_MASK) - -#define OSC_CTRL_PLL_REF_DIV_MASK (3<<28) -#define OSC_CTRL_PLL_REF_DIV_1 (0<<28) -#define OSC_CTRL_PLL_REF_DIV_2 (1<<28) -#define OSC_CTRL_PLL_REF_DIV_4 (2<<28) +#define OSC_CTRL_OSC_FREQ_MASK (3u<<30) +#define OSC_CTRL_OSC_FREQ_13MHZ (0u<<30) +#define OSC_CTRL_OSC_FREQ_19_2MHZ (1u<<30) +#define OSC_CTRL_OSC_FREQ_12MHZ (2u<<30) +#define OSC_CTRL_OSC_FREQ_26MHZ (3u<<30) +#define OSC_CTRL_MASK (0x3f2u | OSC_CTRL_OSC_FREQ_MASK) + +#define OSC_CTRL_PLL_REF_DIV_MASK (3u<<28) +#define OSC_CTRL_PLL_REF_DIV_1 (0u<<28) +#define OSC_CTRL_PLL_REF_DIV_2 (1u<<28) +#define OSC_CTRL_PLL_REF_DIV_4 (2u<<28) #define OSC_FREQ_DET 0x58 -#define OSC_FREQ_DET_TRIG (1<<31) +#define OSC_FREQ_DET_TRIG (1u<<31) #define OSC_FREQ_DET_STATUS 0x5c -#define OSC_FREQ_DET_BUSY (1<<31) -#define OSC_FREQ_DET_CNT_MASK 0xFFFF +#define OSC_FREQ_DET_BUSYu (1<<31) +#define OSC_FREQ_DET_CNT_MASK 0xFFFFu #define TEGRA20_CLK_PERIPH_BANKS 3 diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index abdd26f7d..5771f3fc6 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2952,6 +2952,9 @@ static void intel_cpufreq_adjust_perf(unsigned int cpunum, if (min_pstate < cpu->min_perf_ratio) min_pstate = cpu->min_perf_ratio; + if (min_pstate > cpu->max_perf_ratio) + min_pstate = cpu->max_perf_ratio; + max_pstate = min(cap_pstate, cpu->max_perf_ratio); if (max_pstate < min_pstate) max_pstate = min_pstate; diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index b8e02c3a1..bbfb0f288 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -515,10 +515,16 @@ EXPORT_SYMBOL_GPL(sev_platform_init); static int __sev_platform_shutdown_locked(int *error) { - struct sev_device *sev = psp_master->sev_data; + struct psp_device *psp = psp_master; + struct sev_device *sev; int ret; - if (!sev || sev->state == SEV_STATE_UNINIT) + if (!psp || !psp->sev_data) + return 0; + + sev = psp->sev_data; + + if (sev->state == SEV_STATE_UNINIT) return 0; ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error); diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptlf.c b/drivers/crypto/marvell/octeontx2/otx2_cptlf.c index 71e5f7943..6e4a78e1f 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptlf.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptlf.c @@ -419,8 +419,8 @@ int otx2_cptlf_init(struct otx2_cptlfs_info *lfs, u8 eng_grp_mask, int pri, return 0; free_iq: - otx2_cpt_free_instruction_queues(lfs); cptlf_hw_cleanup(lfs); + otx2_cpt_free_instruction_queues(lfs); detach_rsrcs: otx2_cpt_detach_rsrcs_msg(lfs); clear_lfs_num: @@ -431,11 +431,13 @@ EXPORT_SYMBOL_NS_GPL(otx2_cptlf_init, CRYPTO_DEV_OCTEONTX2_CPT); void otx2_cptlf_shutdown(struct otx2_cptlfs_info *lfs) { - lfs->lfs_num = 0; /* Cleanup LFs hardware side */ cptlf_hw_cleanup(lfs); + /* Free instruction queues */ + otx2_cpt_free_instruction_queues(lfs); /* Send request to detach LFs */ otx2_cpt_detach_rsrcs_msg(lfs); + lfs->lfs_num = 0; } EXPORT_SYMBOL_NS_GPL(otx2_cptlf_shutdown, CRYPTO_DEV_OCTEONTX2_CPT); diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c index 392e9fee0..6f3373f99 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c @@ -249,8 +249,11 @@ static void cptvf_lf_shutdown(struct otx2_cptlfs_info *lfs) otx2_cptlf_unregister_interrupts(lfs); /* Cleanup LFs software side */ lf_sw_cleanup(lfs); + /* Free instruction queues */ + otx2_cpt_free_instruction_queues(lfs); /* Send request to detach LFs */ otx2_cpt_detach_rsrcs_msg(lfs); + lfs->lfs_num = 0; } static int cptvf_lf_init(struct otx2_cptvf_dev *cptvf) diff --git a/drivers/crypto/stm32/stm32-crc32.c b/drivers/crypto/stm32/stm32-crc32.c index 90a920e7f..c439be165 100644 --- a/drivers/crypto/stm32/stm32-crc32.c +++ b/drivers/crypto/stm32/stm32-crc32.c @@ -104,7 +104,7 @@ static struct stm32_crc *stm32_crc_get_next_crc(void) struct stm32_crc *crc; spin_lock_bh(&crc_list.lock); - crc = list_first_entry(&crc_list.dev_list, struct stm32_crc, list); + crc = list_first_entry_or_null(&crc_list.dev_list, struct stm32_crc, list); if (crc) list_move_tail(&crc->list, &crc_list.dev_list); spin_unlock_bh(&crc_list.lock); diff --git a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c index 168195672..d2df97cfc 100644 --- a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c @@ -104,7 +104,8 @@ static void virtio_crypto_dataq_akcipher_callback(struct virtio_crypto_request * } static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher_ctx *ctx, - struct virtio_crypto_ctrl_header *header, void *para, + struct virtio_crypto_ctrl_header *header, + struct virtio_crypto_akcipher_session_para *para, const uint8_t *key, unsigned int keylen) { struct scatterlist outhdr_sg, key_sg, inhdr_sg, *sgs[3]; @@ -128,7 +129,7 @@ static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher ctrl = &vc_ctrl_req->ctrl; memcpy(&ctrl->header, header, sizeof(ctrl->header)); - memcpy(&ctrl->u, para, sizeof(ctrl->u)); + memcpy(&ctrl->u.akcipher_create_session.para, para, sizeof(*para)); input = &vc_ctrl_req->input; input->status = cpu_to_le32(VIRTIO_CRYPTO_ERR); diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 003a44132..5584af153 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -376,9 +376,9 @@ static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds, allowed++; } - if (!allowed) { - cxl_set_mem_enable(cxlds, 0); - info->mem_enabled = 0; + if (!allowed && info->mem_enabled) { + dev_err(dev, "Range register decodes outside platform defined CXL ranges.\n"); + return -ENXIO; } /* diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 2951a87cc..344e27616 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -461,10 +461,14 @@ static void devfreq_monitor(struct work_struct *work) if (err) dev_err(&devfreq->dev, "dvfs failed with (%d) error\n", err); + if (devfreq->stop_polling) + goto out; + queue_delayed_work(devfreq_wq, &devfreq->work, msecs_to_jiffies(devfreq->profile->polling_ms)); - mutex_unlock(&devfreq->lock); +out: + mutex_unlock(&devfreq->lock); trace_devfreq_monitor(devfreq); } @@ -482,6 +486,10 @@ void devfreq_monitor_start(struct devfreq *devfreq) if (IS_SUPPORTED_FLAG(devfreq->governor->flags, IRQ_DRIVEN)) return; + mutex_lock(&devfreq->lock); + if (delayed_work_pending(&devfreq->work)) + goto out; + switch (devfreq->profile->timer) { case DEVFREQ_TIMER_DEFERRABLE: INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor); @@ -490,12 +498,16 @@ void devfreq_monitor_start(struct devfreq *devfreq) INIT_DELAYED_WORK(&devfreq->work, devfreq_monitor); break; default: - return; + goto out; } if (devfreq->profile->polling_ms) queue_delayed_work(devfreq_wq, &devfreq->work, msecs_to_jiffies(devfreq->profile->polling_ms)); + +out: + devfreq->stop_polling = false; + mutex_unlock(&devfreq->lock); } EXPORT_SYMBOL(devfreq_monitor_start); @@ -512,6 +524,14 @@ void devfreq_monitor_stop(struct devfreq *devfreq) if (IS_SUPPORTED_FLAG(devfreq->governor->flags, IRQ_DRIVEN)) return; + mutex_lock(&devfreq->lock); + if (devfreq->stop_polling) { + mutex_unlock(&devfreq->lock); + return; + } + + devfreq->stop_polling = true; + mutex_unlock(&devfreq->lock); cancel_delayed_work_sync(&devfreq->work); } EXPORT_SYMBOL(devfreq_monitor_stop); diff --git a/drivers/dma/apple-admac.c b/drivers/dma/apple-admac.c index 4cf8da77b..cac4532fe 100644 --- a/drivers/dma/apple-admac.c +++ b/drivers/dma/apple-admac.c @@ -56,6 +56,8 @@ #define REG_BUS_WIDTH(ch) (0x8040 + (ch) * 0x200) +#define BUS_WIDTH_WORD_SIZE GENMASK(3, 0) +#define BUS_WIDTH_FRAME_SIZE GENMASK(7, 4) #define BUS_WIDTH_8BIT 0x00 #define BUS_WIDTH_16BIT 0x01 #define BUS_WIDTH_32BIT 0x02 @@ -739,7 +741,8 @@ static int admac_device_config(struct dma_chan *chan, struct admac_data *ad = adchan->host; bool is_tx = admac_chan_direction(adchan->no) == DMA_MEM_TO_DEV; int wordsize = 0; - u32 bus_width = 0; + u32 bus_width = readl_relaxed(ad->base + REG_BUS_WIDTH(adchan->no)) & + ~(BUS_WIDTH_WORD_SIZE | BUS_WIDTH_FRAME_SIZE); switch (is_tx ? config->dst_addr_width : config->src_addr_width) { case DMA_SLAVE_BUSWIDTH_1_BYTE: diff --git a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c index 8dd40d00a..6b829d347 100644 --- a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c +++ b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c @@ -38,15 +38,17 @@ static int dpaa2_qdma_alloc_chan_resources(struct dma_chan *chan) if (!dpaa2_chan->fd_pool) goto err; - dpaa2_chan->fl_pool = dma_pool_create("fl_pool", dev, - sizeof(struct dpaa2_fl_entry), - sizeof(struct dpaa2_fl_entry), 0); + dpaa2_chan->fl_pool = + dma_pool_create("fl_pool", dev, + sizeof(struct dpaa2_fl_entry) * 3, + sizeof(struct dpaa2_fl_entry), 0); + if (!dpaa2_chan->fl_pool) goto err_fd; dpaa2_chan->sdd_pool = dma_pool_create("sdd_pool", dev, - sizeof(struct dpaa2_qdma_sd_d), + sizeof(struct dpaa2_qdma_sd_d) * 2, sizeof(struct dpaa2_qdma_sd_d), 0); if (!dpaa2_chan->sdd_pool) goto err_fl; diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c index 045ead46e..7082a5a68 100644 --- a/drivers/dma/fsl-qdma.c +++ b/drivers/dma/fsl-qdma.c @@ -109,6 +109,7 @@ #define FSL_QDMA_CMD_WTHROTL_OFFSET 20 #define FSL_QDMA_CMD_DSEN_OFFSET 19 #define FSL_QDMA_CMD_LWC_OFFSET 16 +#define FSL_QDMA_CMD_PF BIT(17) /* Field definition for Descriptor status */ #define QDMA_CCDF_STATUS_RTE BIT(5) @@ -384,7 +385,8 @@ static void fsl_qdma_comp_fill_memcpy(struct fsl_qdma_comp *fsl_comp, qdma_csgf_set_f(csgf_dest, len); /* Descriptor Buffer */ cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE << - FSL_QDMA_CMD_RWTTYPE_OFFSET); + FSL_QDMA_CMD_RWTTYPE_OFFSET) | + FSL_QDMA_CMD_PF; sdf->data = QDMA_SDDF_CMD(cmd); cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE << @@ -514,11 +516,11 @@ static struct fsl_qdma_queue queue_temp = queue_head + i + (j * queue_num); queue_temp->cq = - dma_alloc_coherent(&pdev->dev, - sizeof(struct fsl_qdma_format) * - queue_size[i], - &queue_temp->bus_addr, - GFP_KERNEL); + dmam_alloc_coherent(&pdev->dev, + sizeof(struct fsl_qdma_format) * + queue_size[i], + &queue_temp->bus_addr, + GFP_KERNEL); if (!queue_temp->cq) return NULL; queue_temp->block_base = fsl_qdma->block_base + @@ -563,11 +565,11 @@ static struct fsl_qdma_queue /* * Buffer for queue command */ - status_head->cq = dma_alloc_coherent(&pdev->dev, - sizeof(struct fsl_qdma_format) * - status_size, - &status_head->bus_addr, - GFP_KERNEL); + status_head->cq = dmam_alloc_coherent(&pdev->dev, + sizeof(struct fsl_qdma_format) * + status_size, + &status_head->bus_addr, + GFP_KERNEL); if (!status_head->cq) { devm_kfree(&pdev->dev, status_head); return NULL; @@ -805,7 +807,7 @@ fsl_qdma_irq_init(struct platform_device *pdev, int i; int cpu; int ret; - char irq_name[20]; + char irq_name[32]; fsl_qdma->error_irq = platform_get_irq_byname(pdev, "qdma-error"); @@ -1201,10 +1203,6 @@ static int fsl_qdma_probe(struct platform_device *pdev) if (!fsl_qdma->queue) return -ENOMEM; - ret = fsl_qdma_irq_init(pdev, fsl_qdma); - if (ret) - return ret; - fsl_qdma->irq_base = platform_get_irq_byname(pdev, "qdma-queue0"); if (fsl_qdma->irq_base < 0) return fsl_qdma->irq_base; @@ -1243,16 +1241,19 @@ static int fsl_qdma_probe(struct platform_device *pdev) platform_set_drvdata(pdev, fsl_qdma); - ret = dma_async_device_register(&fsl_qdma->dma_dev); + ret = fsl_qdma_reg_init(fsl_qdma); if (ret) { - dev_err(&pdev->dev, - "Can't register NXP Layerscape qDMA engine.\n"); + dev_err(&pdev->dev, "Can't Initialize the qDMA engine.\n"); return ret; } - ret = fsl_qdma_reg_init(fsl_qdma); + ret = fsl_qdma_irq_init(pdev, fsl_qdma); + if (ret) + return ret; + + ret = dma_async_device_register(&fsl_qdma->dma_dev); if (ret) { - dev_err(&pdev->dev, "Can't Initialize the qDMA engine.\n"); + dev_err(&pdev->dev, "Can't register NXP Layerscape qDMA engine.\n"); return ret; } @@ -1272,8 +1273,6 @@ static void fsl_qdma_cleanup_vchan(struct dma_device *dmadev) static int fsl_qdma_remove(struct platform_device *pdev) { - int i; - struct fsl_qdma_queue *status; struct device_node *np = pdev->dev.of_node; struct fsl_qdma_engine *fsl_qdma = platform_get_drvdata(pdev); @@ -1282,11 +1281,6 @@ static int fsl_qdma_remove(struct platform_device *pdev) of_dma_controller_free(np); dma_async_device_unregister(&fsl_qdma->dma_dev); - for (i = 0; i < fsl_qdma->block_number; i++) { - status = fsl_qdma->status[i]; - dma_free_coherent(&pdev->dev, sizeof(struct fsl_qdma_format) * - status->n_cq, status->cq, status->bus_addr); - } return 0; } diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index e2070df6c..0b846c605 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -584,11 +584,11 @@ desc_get_errstat(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc) } /** - * __cleanup - reclaim used descriptors + * __ioat_cleanup - reclaim used descriptors * @ioat_chan: channel (ring) to clean * @phys_complete: zeroed (or not) completion address (from status) */ -static void __cleanup(struct ioatdma_chan *ioat_chan, dma_addr_t phys_complete) +static void __ioat_cleanup(struct ioatdma_chan *ioat_chan, dma_addr_t phys_complete) { struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; struct ioat_ring_ent *desc; @@ -675,7 +675,7 @@ static void ioat_cleanup(struct ioatdma_chan *ioat_chan) spin_lock_bh(&ioat_chan->cleanup_lock); if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) - __cleanup(ioat_chan, phys_complete); + __ioat_cleanup(ioat_chan, phys_complete); if (is_ioat_halted(*ioat_chan->completion)) { u32 chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); @@ -712,7 +712,7 @@ static void ioat_restart_channel(struct ioatdma_chan *ioat_chan) ioat_quiesce(ioat_chan, 0); if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) - __cleanup(ioat_chan, phys_complete); + __ioat_cleanup(ioat_chan, phys_complete); __ioat_restart_chan(ioat_chan); } @@ -786,7 +786,7 @@ static void ioat_eh(struct ioatdma_chan *ioat_chan) /* cleanup so tail points to descriptor that caused the error */ if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) - __cleanup(ioat_chan, phys_complete); + __ioat_cleanup(ioat_chan, phys_complete); chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr_int); @@ -943,7 +943,7 @@ void ioat_timer_event(struct timer_list *t) /* timer restarted in ioat_cleanup_preamble * and IOAT_COMPLETION_ACK cleared */ - __cleanup(ioat_chan, phys_complete); + __ioat_cleanup(ioat_chan, phys_complete); goto unlock_out; } diff --git a/drivers/dma/ptdma/ptdma-dmaengine.c b/drivers/dma/ptdma/ptdma-dmaengine.c index 1aa65e5de..f79240734 100644 --- a/drivers/dma/ptdma/ptdma-dmaengine.c +++ b/drivers/dma/ptdma/ptdma-dmaengine.c @@ -385,8 +385,6 @@ int pt_dmaengine_register(struct pt_device *pt) chan->vc.desc_free = pt_do_cleanup; vchan_init(&chan->vc, dma_dev); - dma_set_mask_and_coherent(pt->dev, DMA_BIT_MASK(64)); - ret = dma_async_device_register(dma_dev); if (ret) goto err_reg; diff --git a/drivers/dma/sh/shdma.h b/drivers/dma/sh/shdma.h index 9c121a4b3..f97d80343 100644 --- a/drivers/dma/sh/shdma.h +++ b/drivers/dma/sh/shdma.h @@ -25,7 +25,7 @@ struct sh_dmae_chan { const struct sh_dmae_slave_config *config; /* Slave DMA configuration */ int xmit_shift; /* log_2(bytes_per_xfer) */ void __iomem *base; - char dev_id[16]; /* unique name per DMAC of channel */ + char dev_id[32]; /* unique name per DMAC of channel */ int pm_error; dma_addr_t slave_addr; }; diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c index 7ec6e5d72..9212ac9f9 100644 --- a/drivers/dma/ti/edma.c +++ b/drivers/dma/ti/edma.c @@ -2413,6 +2413,11 @@ static int edma_probe(struct platform_device *pdev) if (irq > 0) { irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccint", dev_name(dev)); + if (!irq_name) { + ret = -ENOMEM; + goto err_disable_pm; + } + ret = devm_request_irq(dev, irq, dma_irq_handler, 0, irq_name, ecc); if (ret) { @@ -2429,6 +2434,11 @@ static int edma_probe(struct platform_device *pdev) if (irq > 0) { irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccerrint", dev_name(dev)); + if (!irq_name) { + ret = -ENOMEM; + goto err_disable_pm; + } + ret = devm_request_irq(dev, irq, dma_ccerr_handler, 0, irq_name, ecc); if (ret) { diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index b86b809eb..82e7acfda 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -3963,6 +3963,7 @@ static void udma_desc_pre_callback(struct virt_dma_chan *vc, { struct udma_chan *uc = to_udma_chan(&vc->chan); struct udma_desc *d; + u8 status; if (!vd) return; @@ -3972,12 +3973,12 @@ static void udma_desc_pre_callback(struct virt_dma_chan *vc, if (d->metadata_size) udma_fetch_epib(uc, d); - /* Provide residue information for the client */ if (result) { void *desc_vaddr = udma_curr_cppi5_desc_vaddr(d, d->desc_idx); if (cppi5_desc_get_type(desc_vaddr) == CPPI5_INFO0_DESC_TYPE_VAL_HOST) { + /* Provide residue information for the client */ result->residue = d->residue - cppi5_hdesc_get_pktlen(desc_vaddr); if (result->residue) @@ -3986,7 +3987,12 @@ static void udma_desc_pre_callback(struct virt_dma_chan *vc, result->result = DMA_TRANS_NOERROR; } else { result->residue = 0; - result->result = DMA_TRANS_NOERROR; + /* Propagate TR Response errors to the client */ + status = d->hwdesc[0].tr_resp_base->status; + if (status) + result->result = DMA_TRANS_ABORTED; + else + result->result = DMA_TRANS_NOERROR; } } } diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c index 6ac5ff20a..8aaa7fcb2 100644 --- a/drivers/firewire/core-card.c +++ b/drivers/firewire/core-card.c @@ -429,7 +429,23 @@ static void bm_work(struct work_struct *work) */ card->bm_generation = generation; - if (root_device == NULL) { + if (card->gap_count == 0) { + /* + * If self IDs have inconsistent gap counts, do a + * bus reset ASAP. The config rom read might never + * complete, so don't wait for it. However, still + * send a PHY configuration packet prior to the + * bus reset. The PHY configuration packet might + * fail, but 1394-2008 8.4.5.2 explicitly permits + * it in this case, so it should be safe to try. + */ + new_root_id = local_id; + /* + * We must always send a bus reset if the gap count + * is inconsistent, so bypass the 5-reset limit. + */ + card->bm_retries = 0; + } else if (root_device == NULL) { /* * Either link_on is false, or we failed to read the * config rom. In either case, pick another root. diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index 74bab0628..1879ec27c 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -100,10 +100,9 @@ static int textual_leaf_to_string(const u32 *block, char *buf, size_t size) * @buf: where to put the string * @size: size of @buf, in bytes * - * The string is taken from a minimal ASCII text descriptor leaf after - * the immediate entry with @key. The string is zero-terminated. - * An overlong string is silently truncated such that it and the - * zero byte fit into @size. + * The string is taken from a minimal ASCII text descriptor leaf just after the entry with the + * @key. The string is zero-terminated. An overlong string is silently truncated such that it + * and the zero byte fit into @size. * * Returns strlen(buf) or a negative error code. */ diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 7c48c380d..1995f0a2e 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -107,7 +107,7 @@ static int __init arm_enable_runtime_services(void) efi_memory_desc_t *md; for_each_efi_memory_desc(md) { - int md_size = md->num_pages << EFI_PAGE_SHIFT; + u64 md_size = md->num_pages << EFI_PAGE_SHIFT; struct resource *res; if (!(md->attribute & EFI_MEMORY_SP)) diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c index 3e8d4b51a..97bafb5f7 100644 --- a/drivers/firmware/efi/capsule-loader.c +++ b/drivers/firmware/efi/capsule-loader.c @@ -292,7 +292,7 @@ static int efi_capsule_open(struct inode *inode, struct file *file) return -ENOMEM; } - cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL); + cap_info->phys = kzalloc(sizeof(phys_addr_t), GFP_KERNEL); if (!cap_info->phys) { kfree(cap_info->pages); kfree(cap_info); diff --git a/drivers/firmware/efi/efi-init.c b/drivers/firmware/efi/efi-init.c index 2fd770b49..ff9791ce2 100644 --- a/drivers/firmware/efi/efi-init.c +++ b/drivers/firmware/efi/efi-init.c @@ -117,15 +117,6 @@ static __init int is_usable_memory(efi_memory_desc_t *md) case EFI_CONVENTIONAL_MEMORY: case EFI_PERSISTENT_MEMORY: /* - * Special purpose memory is 'soft reserved', which means it - * is set aside initially, but can be hotplugged back in or - * be assigned to the dax driver after boot. - */ - if (efi_soft_reserve_enabled() && - (md->attribute & EFI_MEMORY_SP)) - return false; - - /* * According to the spec, these regions are no longer reserved * after calling ExitBootServices(). However, we can only use * them as System RAM if they can be mapped writeback cacheable. @@ -169,6 +160,16 @@ static __init void reserve_regions(void) size = npages << PAGE_SHIFT; if (is_memory(md)) { + /* + * Special purpose memory is 'soft reserved', which + * means it is set aside initially. Don't add a memblock + * for it now so that it can be hotplugged back in or + * be assigned to the dax driver after boot. + */ + if (efi_soft_reserve_enabled() && + (md->attribute & EFI_MEMORY_SP)) + continue; + early_init_dt_add_memory_arch(paddr, size); if (!is_usable_memory(md)) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index b7c0e8cc0..9077353d1 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -185,8 +185,27 @@ static const struct attribute_group efi_subsys_attr_group = { static struct efivars generic_efivars; static struct efivar_operations generic_ops; +static bool generic_ops_supported(void) +{ + unsigned long name_size; + efi_status_t status; + efi_char16_t name; + efi_guid_t guid; + + name_size = sizeof(name); + + status = efi.get_next_variable(&name_size, &name, &guid); + if (status == EFI_UNSUPPORTED) + return false; + + return true; +} + static int generic_ops_register(void) { + if (!generic_ops_supported()) + return 0; + generic_ops.get_variable = efi.get_variable; generic_ops.get_next_variable = efi.get_next_variable; generic_ops.query_variable_store = efi_query_variable_store; @@ -200,6 +219,9 @@ static int generic_ops_register(void) static void generic_ops_unregister(void) { + if (!generic_ops.get_variable) + return; + efivars_unregister(&generic_efivars); } diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index ef5045a53..473ef1842 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -25,7 +25,7 @@ cflags-$(CONFIG_ARM) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \ -fno-builtin -fpic \ $(call cc-option,-mno-single-pic-base) cflags-$(CONFIG_RISCV) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \ - -fpic + -fpic -mno-relax cflags-$(CONFIG_LOONGARCH) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \ -fpie @@ -84,6 +84,7 @@ lib-$(CONFIG_EFI_GENERIC_STUB) += efi-stub.o string.o intrinsics.o systable.o lib-$(CONFIG_ARM) += arm32-stub.o lib-$(CONFIG_ARM64) += arm64-stub.o smbios.o lib-$(CONFIG_X86) += x86-stub.o +lib-$(CONFIG_X86_64) += x86-5lvl.o lib-$(CONFIG_RISCV) += riscv-stub.o lib-$(CONFIG_LOONGARCH) += loongarch-stub.o diff --git a/drivers/firmware/efi/libstub/alignedmem.c b/drivers/firmware/efi/libstub/alignedmem.c index 1de9878dd..6b83c492c 100644 --- a/drivers/firmware/efi/libstub/alignedmem.c +++ b/drivers/firmware/efi/libstub/alignedmem.c @@ -22,12 +22,15 @@ * Return: status code */ efi_status_t efi_allocate_pages_aligned(unsigned long size, unsigned long *addr, - unsigned long max, unsigned long align) + unsigned long max, unsigned long align, + int memory_type) { efi_physical_addr_t alloc_addr; efi_status_t status; int slack; + max = min(max, EFI_ALLOC_LIMIT); + if (align < EFI_ALLOC_ALIGN) align = EFI_ALLOC_ALIGN; @@ -36,7 +39,7 @@ efi_status_t efi_allocate_pages_aligned(unsigned long size, unsigned long *addr, slack = align / EFI_PAGE_SIZE - 1; status = efi_bs_call(allocate_pages, EFI_ALLOCATE_MAX_ADDRESS, - EFI_LOADER_DATA, size / EFI_PAGE_SIZE + slack, + memory_type, size / EFI_PAGE_SIZE + slack, &alloc_addr); if (status != EFI_SUCCESS) return status; diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index e2f90566b..16f15e36f 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -180,7 +180,8 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, * locate the kernel at a randomized offset in physical memory. */ status = efi_random_alloc(*reserve_size, min_kimg_align, - reserve_addr, phys_seed); + reserve_addr, phys_seed, + EFI_LOADER_CODE, 0, EFI_ALLOC_LIMIT); if (status != EFI_SUCCESS) efi_warn("efi_random_alloc() failed: 0x%lx\n", status); } else { @@ -190,10 +191,11 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, if (status != EFI_SUCCESS) { if (!check_image_region((u64)_text, kernel_memsize)) { efi_err("FIRMWARE BUG: Image BSS overlaps adjacent EFI memory region\n"); - } else if (IS_ALIGNED((u64)_text, min_kimg_align)) { + } else if (IS_ALIGNED((u64)_text, min_kimg_align) && + (u64)_end < EFI_ALLOC_LIMIT) { /* * Just execute from wherever we were loaded by the - * UEFI PE/COFF loader if the alignment is suitable. + * UEFI PE/COFF loader if the placement is suitable. */ *image_addr = (u64)_text; *reserve_size = 0; @@ -201,7 +203,8 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, } status = efi_allocate_pages_aligned(*reserve_size, reserve_addr, - ULONG_MAX, min_kimg_align); + ULONG_MAX, min_kimg_align, + EFI_LOADER_CODE); if (status != EFI_SUCCESS) { efi_err("Failed to relocate kernel\n"); diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 3d9b2469a..97744822d 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -216,6 +216,8 @@ efi_status_t efi_parse_options(char const *cmdline) efi_loglevel = CONSOLE_LOGLEVEL_QUIET; } else if (!strcmp(param, "noinitrd")) { efi_noinitrd = true; + } else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) { + efi_no5lvl = true; } else if (!strcmp(param, "efi") && val) { efi_nochunk = parse_option_str(val, "nochunk"); efi_novamap |= parse_option_str(val, "novamap"); diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 970e86e3a..6741f3d90 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -29,6 +29,11 @@ #define EFI_ALLOC_ALIGN EFI_PAGE_SIZE #endif +#ifndef EFI_ALLOC_LIMIT +#define EFI_ALLOC_LIMIT ULONG_MAX +#endif + +extern bool efi_no5lvl; extern bool efi_nochunk; extern bool efi_nokaslr; extern int efi_loglevel; @@ -415,6 +420,26 @@ union efi_dxe_services_table { } mixed_mode; }; +typedef union efi_memory_attribute_protocol efi_memory_attribute_protocol_t; + +union efi_memory_attribute_protocol { + struct { + efi_status_t (__efiapi *get_memory_attributes)( + efi_memory_attribute_protocol_t *, efi_physical_addr_t, u64, u64 *); + + efi_status_t (__efiapi *set_memory_attributes)( + efi_memory_attribute_protocol_t *, efi_physical_addr_t, u64, u64); + + efi_status_t (__efiapi *clear_memory_attributes)( + efi_memory_attribute_protocol_t *, efi_physical_addr_t, u64, u64); + }; + struct { + u32 get_memory_attributes; + u32 set_memory_attributes; + u32 clear_memory_attributes; + } mixed_mode; +}; + typedef union efi_uga_draw_protocol efi_uga_draw_protocol_t; union efi_uga_draw_protocol { @@ -880,7 +905,9 @@ void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size, efi_status_t efi_get_random_bytes(unsigned long size, u8 *out); efi_status_t efi_random_alloc(unsigned long size, unsigned long align, - unsigned long *addr, unsigned long random_seed); + unsigned long *addr, unsigned long random_seed, + int memory_type, unsigned long alloc_min, + unsigned long alloc_max); efi_status_t efi_random_get_seed(void); @@ -907,7 +934,8 @@ efi_status_t efi_allocate_pages(unsigned long size, unsigned long *addr, unsigned long max); efi_status_t efi_allocate_pages_aligned(unsigned long size, unsigned long *addr, - unsigned long max, unsigned long align); + unsigned long max, unsigned long align, + int memory_type); efi_status_t efi_low_alloc_above(unsigned long size, unsigned long align, unsigned long *addr, unsigned long min); diff --git a/drivers/firmware/efi/libstub/mem.c b/drivers/firmware/efi/libstub/mem.c index 45841ef55..4f1fa3022 100644 --- a/drivers/firmware/efi/libstub/mem.c +++ b/drivers/firmware/efi/libstub/mem.c @@ -89,9 +89,12 @@ efi_status_t efi_allocate_pages(unsigned long size, unsigned long *addr, efi_physical_addr_t alloc_addr; efi_status_t status; + max = min(max, EFI_ALLOC_LIMIT); + if (EFI_ALLOC_ALIGN > EFI_PAGE_SIZE) return efi_allocate_pages_aligned(size, addr, max, - EFI_ALLOC_ALIGN); + EFI_ALLOC_ALIGN, + EFI_LOADER_DATA); alloc_addr = ALIGN_DOWN(max + 1, EFI_ALLOC_ALIGN) - 1; status = efi_bs_call(allocate_pages, EFI_ALLOCATE_MAX_ADDRESS, diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c index 9fb586989..7ba05719a 100644 --- a/drivers/firmware/efi/libstub/randomalloc.c +++ b/drivers/firmware/efi/libstub/randomalloc.c @@ -16,7 +16,8 @@ */ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, unsigned long size, - unsigned long align_shift) + unsigned long align_shift, + u64 alloc_min, u64 alloc_max) { unsigned long align = 1UL << align_shift; u64 first_slot, last_slot, region_end; @@ -29,11 +30,11 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, return 0; region_end = min(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - 1, - (u64)ULONG_MAX); + alloc_max); if (region_end < size) return 0; - first_slot = round_up(md->phys_addr, align); + first_slot = round_up(max(md->phys_addr, alloc_min), align); last_slot = round_down(region_end - size + 1, align); if (first_slot > last_slot) @@ -53,7 +54,10 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, efi_status_t efi_random_alloc(unsigned long size, unsigned long align, unsigned long *addr, - unsigned long random_seed) + unsigned long random_seed, + int memory_type, + unsigned long alloc_min, + unsigned long alloc_max) { unsigned long total_slots = 0, target_slot; unsigned long total_mirrored_slots = 0; @@ -75,7 +79,8 @@ efi_status_t efi_random_alloc(unsigned long size, efi_memory_desc_t *md = (void *)map->map + map_offset; unsigned long slots; - slots = get_entry_num_slots(md, size, ilog2(align)); + slots = get_entry_num_slots(md, size, ilog2(align), alloc_min, + alloc_max); MD_NUM_SLOTS(md) = slots; total_slots += slots; if (md->attribute & EFI_MEMORY_MORE_RELIABLE) @@ -118,7 +123,7 @@ efi_status_t efi_random_alloc(unsigned long size, pages = size / EFI_PAGE_SIZE; status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS, - EFI_LOADER_DATA, pages, &target); + memory_type, pages, &target); if (status == EFI_SUCCESS) *addr = target; break; diff --git a/drivers/firmware/efi/libstub/x86-5lvl.c b/drivers/firmware/efi/libstub/x86-5lvl.c new file mode 100644 index 000000000..479dd445a --- /dev/null +++ b/drivers/firmware/efi/libstub/x86-5lvl.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/efi.h> + +#include <asm/boot.h> +#include <asm/desc.h> +#include <asm/efi.h> + +#include "efistub.h" +#include "x86-stub.h" + +bool efi_no5lvl; + +static void (*la57_toggle)(void *cr3); + +static const struct desc_struct gdt[] = { + [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff), + [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff), +}; + +/* + * Enabling (or disabling) 5 level paging is tricky, because it can only be + * done from 32-bit mode with paging disabled. This means not only that the + * code itself must be running from 32-bit addressable physical memory, but + * also that the root page table must be 32-bit addressable, as programming + * a 64-bit value into CR3 when running in 32-bit mode is not supported. + */ +efi_status_t efi_setup_5level_paging(void) +{ + u8 tmpl_size = (u8 *)&trampoline_ljmp_imm_offset - (u8 *)&trampoline_32bit_src; + efi_status_t status; + u8 *la57_code; + + if (!efi_is_64bit()) + return EFI_SUCCESS; + + /* check for 5 level paging support */ + if (native_cpuid_eax(0) < 7 || + !(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) + return EFI_SUCCESS; + + /* allocate some 32-bit addressable memory for code and a page table */ + status = efi_allocate_pages(2 * PAGE_SIZE, (unsigned long *)&la57_code, + U32_MAX); + if (status != EFI_SUCCESS) + return status; + + la57_toggle = memcpy(la57_code, trampoline_32bit_src, tmpl_size); + memset(la57_code + tmpl_size, 0x90, PAGE_SIZE - tmpl_size); + + /* + * To avoid the need to allocate a 32-bit addressable stack, the + * trampoline uses a LJMP instruction to switch back to long mode. + * LJMP takes an absolute destination address, which needs to be + * fixed up at runtime. + */ + *(u32 *)&la57_code[trampoline_ljmp_imm_offset] += (unsigned long)la57_code; + + efi_adjust_memory_range_protection((unsigned long)la57_toggle, PAGE_SIZE); + + return EFI_SUCCESS; +} + +void efi_5level_switch(void) +{ + bool want_la57 = IS_ENABLED(CONFIG_X86_5LEVEL) && !efi_no5lvl; + bool have_la57 = native_read_cr4() & X86_CR4_LA57; + bool need_toggle = want_la57 ^ have_la57; + u64 *pgt = (void *)la57_toggle + PAGE_SIZE; + u64 *cr3 = (u64 *)__native_read_cr3(); + u64 *new_cr3; + + if (!la57_toggle || !need_toggle) + return; + + if (!have_la57) { + /* + * 5 level paging will be enabled, so a root level page needs + * to be allocated from the 32-bit addressable physical region, + * with its first entry referring to the existing hierarchy. + */ + new_cr3 = memset(pgt, 0, PAGE_SIZE); + new_cr3[0] = (u64)cr3 | _PAGE_TABLE_NOENC; + } else { + /* take the new root table pointer from the current entry #0 */ + new_cr3 = (u64 *)(cr3[0] & PAGE_MASK); + + /* copy the new root table if it is not 32-bit addressable */ + if ((u64)new_cr3 > U32_MAX) + new_cr3 = memcpy(pgt, new_cr3, PAGE_SIZE); + } + + native_load_gdt(&(struct desc_ptr){ sizeof(gdt) - 1, (u64)gdt }); + + la57_toggle(new_cr3); +} diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 4f0152b11..784e1b2ae 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -15,16 +15,16 @@ #include <asm/setup.h> #include <asm/desc.h> #include <asm/boot.h> +#include <asm/kaslr.h> +#include <asm/sev.h> #include "efistub.h" - -/* Maximum physical address for 64-bit kernel with 4-level paging */ -#define MAXMEM_X86_64_4LEVEL (1ull << 46) +#include "x86-stub.h" const efi_system_table_t *efi_system_table; const efi_dxe_services_table_t *efi_dxe_table; -extern u32 image_offset; static efi_loaded_image_t *image = NULL; +static efi_memory_attribute_protocol_t *memattr; static efi_status_t preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom) @@ -212,8 +212,8 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params) } } -static void -adjust_memory_range_protection(unsigned long start, unsigned long size) +efi_status_t efi_adjust_memory_range_protection(unsigned long start, + unsigned long size) { efi_status_t status; efi_gcd_memory_space_desc_t desc; @@ -221,12 +221,22 @@ adjust_memory_range_protection(unsigned long start, unsigned long size) unsigned long rounded_start, rounded_end; unsigned long unprotect_start, unprotect_size; - if (efi_dxe_table == NULL) - return; - rounded_start = rounddown(start, EFI_PAGE_SIZE); rounded_end = roundup(start + size, EFI_PAGE_SIZE); + if (memattr != NULL) { + status = efi_call_proto(memattr, clear_memory_attributes, + rounded_start, + rounded_end - rounded_start, + EFI_MEMORY_XP); + if (status != EFI_SUCCESS) + efi_warn("Failed to clear EFI_MEMORY_XP attribute\n"); + return status; + } + + if (efi_dxe_table == NULL) + return EFI_SUCCESS; + /* * Don't modify memory region attributes, they are * already suitable, to lower the possibility to @@ -238,7 +248,7 @@ adjust_memory_range_protection(unsigned long start, unsigned long size) status = efi_dxe_call(get_memory_space_descriptor, start, &desc); if (status != EFI_SUCCESS) - return; + break; next = desc.base_address + desc.length; @@ -263,69 +273,26 @@ adjust_memory_range_protection(unsigned long start, unsigned long size) unprotect_start, unprotect_start + unprotect_size, status); + break; } } + return EFI_SUCCESS; } -/* - * Trampoline takes 2 pages and can be loaded in first megabyte of memory - * with its end placed between 128k and 640k where BIOS might start. - * (see arch/x86/boot/compressed/pgtable_64.c) - * - * We cannot find exact trampoline placement since memory map - * can be modified by UEFI, and it can alter the computed address. - */ - -#define TRAMPOLINE_PLACEMENT_BASE ((128 - 8)*1024) -#define TRAMPOLINE_PLACEMENT_SIZE (640*1024 - (128 - 8)*1024) - -void startup_32(struct boot_params *boot_params); - -static void -setup_memory_protection(unsigned long image_base, unsigned long image_size) +static efi_char16_t *efistub_fw_vendor(void) { - /* - * Allow execution of possible trampoline used - * for switching between 4- and 5-level page tables - * and relocated kernel image. - */ - - adjust_memory_range_protection(TRAMPOLINE_PLACEMENT_BASE, - TRAMPOLINE_PLACEMENT_SIZE); + unsigned long vendor = efi_table_attr(efi_system_table, fw_vendor); -#ifdef CONFIG_64BIT - if (image_base != (unsigned long)startup_32) - adjust_memory_range_protection(image_base, image_size); -#else - /* - * Clear protection flags on a whole range of possible - * addresses used for KASLR. We don't need to do that - * on x86_64, since KASLR/extraction is performed after - * dedicated identity page tables are built and we only - * need to remove possible protection on relocated image - * itself disregarding further relocations. - */ - adjust_memory_range_protection(LOAD_PHYSICAL_ADDR, - KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR); -#endif + return (efi_char16_t *)vendor; } static const efi_char16_t apple[] = L"Apple"; -static void setup_quirks(struct boot_params *boot_params, - unsigned long image_base, - unsigned long image_size) +static void setup_quirks(struct boot_params *boot_params) { - efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long) - efi_table_attr(efi_system_table, fw_vendor); - - if (!memcmp(fw_vendor, apple, sizeof(apple))) { - if (IS_ENABLED(CONFIG_APPLE_PROPERTIES)) - retrieve_apple_device_properties(boot_params); - } - - if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES)) - setup_memory_protection(image_base, image_size); + if (IS_ENABLED(CONFIG_APPLE_PROPERTIES) && + !memcmp(efistub_fw_vendor(), apple, sizeof(apple))) + retrieve_apple_device_properties(boot_params); } /* @@ -478,7 +445,6 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, } image_base = efi_table_attr(image, image_base); - image_offset = (void *)startup_32 - image_base; status = efi_allocate_pages(sizeof(struct boot_params), (unsigned long *)&boot_params, ULONG_MAX); @@ -760,85 +726,139 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle) return EFI_SUCCESS; } +static bool have_unsupported_snp_features(void) +{ + u64 unsupported; + + unsupported = snp_get_unsupported_features(sev_get_status()); + if (unsupported) { + efi_err("Unsupported SEV-SNP features detected: 0x%llx\n", + unsupported); + return true; + } + return false; +} + +static void efi_get_seed(void *seed, int size) +{ + efi_get_random_bytes(size, seed); + + /* + * This only updates seed[0] when running on 32-bit, but in that case, + * seed[1] is not used anyway, as there is no virtual KASLR on 32-bit. + */ + *(unsigned long *)seed ^= kaslr_get_random_long("EFI"); +} + +static void error(char *str) +{ + efi_warn("Decompression failed: %s\n", str); +} + +static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry) +{ + unsigned long virt_addr = LOAD_PHYSICAL_ADDR; + unsigned long addr, alloc_size, entry; + efi_status_t status; + u32 seed[2] = {}; + + /* determine the required size of the allocation */ + alloc_size = ALIGN(max_t(unsigned long, output_len, kernel_total_size), + MIN_KERNEL_ALIGN); + + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && !efi_nokaslr) { + u64 range = KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR - kernel_total_size; + static const efi_char16_t ami[] = L"American Megatrends"; + + efi_get_seed(seed, sizeof(seed)); + + virt_addr += (range * seed[1]) >> 32; + virt_addr &= ~(CONFIG_PHYSICAL_ALIGN - 1); + + /* + * Older Dell systems with AMI UEFI firmware v2.0 may hang + * while decompressing the kernel if physical address + * randomization is enabled. + * + * https://bugzilla.kernel.org/show_bug.cgi?id=218173 + */ + if (efi_system_table->hdr.revision <= EFI_2_00_SYSTEM_TABLE_REVISION && + !memcmp(efistub_fw_vendor(), ami, sizeof(ami))) { + efi_debug("AMI firmware v2.0 or older detected - disabling physical KASLR\n"); + seed[0] = 0; + } + + boot_params_ptr->hdr.loadflags |= KASLR_FLAG; + } + + status = efi_random_alloc(alloc_size, CONFIG_PHYSICAL_ALIGN, &addr, + seed[0], EFI_LOADER_CODE, + LOAD_PHYSICAL_ADDR, + EFI_X86_KERNEL_ALLOC_LIMIT); + if (status != EFI_SUCCESS) + return status; + + entry = decompress_kernel((void *)addr, virt_addr, error); + if (entry == ULONG_MAX) { + efi_free(alloc_size, addr); + return EFI_LOAD_ERROR; + } + + *kernel_entry = addr + entry; + + return efi_adjust_memory_range_protection(addr, kernel_total_size); +} + +static void __noreturn enter_kernel(unsigned long kernel_addr, + struct boot_params *boot_params) +{ + /* enter decompressed kernel with boot_params pointer in RSI/ESI */ + asm("jmp *%0"::"r"(kernel_addr), "S"(boot_params)); + + unreachable(); +} + /* - * On success, we return the address of startup_32, which has potentially been - * relocated by efi_relocate_kernel. - * On failure, we exit to the firmware via efi_exit instead of returning. + * On success, this routine will jump to the relocated image directly and never + * return. On failure, it will exit to the firmware via efi_exit() instead of + * returning. */ -asmlinkage unsigned long efi_main(efi_handle_t handle, - efi_system_table_t *sys_table_arg, - struct boot_params *boot_params) +void __noreturn efi_stub_entry(efi_handle_t handle, + efi_system_table_t *sys_table_arg, + struct boot_params *boot_params) { - unsigned long bzimage_addr = (unsigned long)startup_32; - unsigned long buffer_start, buffer_end; + efi_guid_t guid = EFI_MEMORY_ATTRIBUTE_PROTOCOL_GUID; struct setup_header *hdr = &boot_params->hdr; const struct linux_efi_initrd *initrd = NULL; + unsigned long kernel_entry; efi_status_t status; + boot_params_ptr = boot_params; + efi_system_table = sys_table_arg; /* Check if we were booted by the EFI firmware */ if (efi_system_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) efi_exit(handle, EFI_INVALID_PARAMETER); - efi_dxe_table = get_efi_config_table(EFI_DXE_SERVICES_TABLE_GUID); - if (efi_dxe_table && - efi_dxe_table->hdr.signature != EFI_DXE_SERVICES_TABLE_SIGNATURE) { - efi_warn("Ignoring DXE services table: invalid signature\n"); - efi_dxe_table = NULL; + if (have_unsupported_snp_features()) + efi_exit(handle, EFI_UNSUPPORTED); + + if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES)) { + efi_dxe_table = get_efi_config_table(EFI_DXE_SERVICES_TABLE_GUID); + if (efi_dxe_table && + efi_dxe_table->hdr.signature != EFI_DXE_SERVICES_TABLE_SIGNATURE) { + efi_warn("Ignoring DXE services table: invalid signature\n"); + efi_dxe_table = NULL; + } } - /* - * If the kernel isn't already loaded at a suitable address, - * relocate it. - * - * It must be loaded above LOAD_PHYSICAL_ADDR. - * - * The maximum address for 64-bit is 1 << 46 for 4-level paging. This - * is defined as the macro MAXMEM, but unfortunately that is not a - * compile-time constant if 5-level paging is configured, so we instead - * define our own macro for use here. - * - * For 32-bit, the maximum address is complicated to figure out, for - * now use KERNEL_IMAGE_SIZE, which will be 512MiB, the same as what - * KASLR uses. - * - * Also relocate it if image_offset is zero, i.e. the kernel wasn't - * loaded by LoadImage, but rather by a bootloader that called the - * handover entry. The reason we must always relocate in this case is - * to handle the case of systemd-boot booting a unified kernel image, - * which is a PE executable that contains the bzImage and an initrd as - * COFF sections. The initrd section is placed after the bzImage - * without ensuring that there are at least init_size bytes available - * for the bzImage, and thus the compressed kernel's startup code may - * overwrite the initrd unless it is moved out of the way. - */ + /* grab the memory attributes protocol if it exists */ + efi_bs_call(locate_protocol, &guid, NULL, (void **)&memattr); - buffer_start = ALIGN(bzimage_addr - image_offset, - hdr->kernel_alignment); - buffer_end = buffer_start + hdr->init_size; - - if ((buffer_start < LOAD_PHYSICAL_ADDR) || - (IS_ENABLED(CONFIG_X86_32) && buffer_end > KERNEL_IMAGE_SIZE) || - (IS_ENABLED(CONFIG_X86_64) && buffer_end > MAXMEM_X86_64_4LEVEL) || - (image_offset == 0)) { - extern char _bss[]; - - status = efi_relocate_kernel(&bzimage_addr, - (unsigned long)_bss - bzimage_addr, - hdr->init_size, - hdr->pref_address, - hdr->kernel_alignment, - LOAD_PHYSICAL_ADDR); - if (status != EFI_SUCCESS) { - efi_err("efi_relocate_kernel() failed!\n"); - goto fail; - } - /* - * Now that we've copied the kernel elsewhere, we no longer - * have a set up block before startup_32(), so reset image_offset - * to zero in case it was set earlier. - */ - image_offset = 0; + status = efi_setup_5level_paging(); + if (status != EFI_SUCCESS) { + efi_err("efi_setup_5level_paging() failed!\n"); + goto fail; } #ifdef CONFIG_CMDLINE_BOOL @@ -858,6 +878,12 @@ asmlinkage unsigned long efi_main(efi_handle_t handle, } } + status = efi_decompress_kernel(&kernel_entry); + if (status != EFI_SUCCESS) { + efi_err("Failed to decompress kernel\n"); + goto fail; + } + /* * At this point, an initrd may already have been loaded by the * bootloader and passed via bootparams. We permit an initrd loaded @@ -897,7 +923,7 @@ asmlinkage unsigned long efi_main(efi_handle_t handle, setup_efi_pci(boot_params); - setup_quirks(boot_params, bzimage_addr, buffer_end - buffer_start); + setup_quirks(boot_params); status = exit_boot(boot_params, handle); if (status != EFI_SUCCESS) { @@ -905,9 +931,38 @@ asmlinkage unsigned long efi_main(efi_handle_t handle, goto fail; } - return bzimage_addr; + /* + * Call the SEV init code while still running with the firmware's + * GDT/IDT, so #VC exceptions will be handled by EFI. + */ + sev_enable(boot_params); + + efi_5level_switch(); + + enter_kernel(kernel_entry, boot_params); fail: - efi_err("efi_main() failed!\n"); + efi_err("efi_stub_entry() failed!\n"); efi_exit(handle, status); } + +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL +void efi_handover_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg, + struct boot_params *boot_params) +{ + extern char _bss[], _ebss[]; + + memset(_bss, 0, _ebss - _bss); + efi_stub_entry(handle, sys_table_arg, boot_params); +} + +#ifndef CONFIG_EFI_MIXED +extern __alias(efi_handover_entry) +void efi32_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg, + struct boot_params *boot_params); + +extern __alias(efi_handover_entry) +void efi64_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg, + struct boot_params *boot_params); +#endif +#endif diff --git a/drivers/firmware/efi/libstub/x86-stub.h b/drivers/firmware/efi/libstub/x86-stub.h new file mode 100644 index 000000000..1c20e99a6 --- /dev/null +++ b/drivers/firmware/efi/libstub/x86-stub.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include <linux/efi.h> + +extern void trampoline_32bit_src(void *, bool); +extern const u16 trampoline_ljmp_imm_offset; + +efi_status_t efi_adjust_memory_range_protection(unsigned long start, + unsigned long size); + +#ifdef CONFIG_X86_64 +efi_status_t efi_setup_5level_paging(void); +void efi_5level_switch(void); +#else +static inline efi_status_t efi_setup_5level_paging(void) { return EFI_SUCCESS; } +static inline void efi_5level_switch(void) {} +#endif diff --git a/drivers/firmware/efi/riscv-runtime.c b/drivers/firmware/efi/riscv-runtime.c index d0daacd2c..6b142aa35 100644 --- a/drivers/firmware/efi/riscv-runtime.c +++ b/drivers/firmware/efi/riscv-runtime.c @@ -85,7 +85,7 @@ static int __init riscv_enable_runtime_services(void) efi_memory_desc_t *md; for_each_efi_memory_desc(md) { - int md_size = md->num_pages << EFI_PAGE_SHIFT; + u64 md_size = md->num_pages << EFI_PAGE_SHIFT; struct resource *res; if (!(md->attribute & EFI_MEMORY_SP)) diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index 0ba9f1831..4ca256bcd 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -66,19 +66,28 @@ int efivars_register(struct efivars *efivars, const struct efivar_operations *ops, struct kobject *kobject) { + int rv; + if (down_interruptible(&efivars_lock)) return -EINTR; + if (__efivars) { + pr_warn("efivars already registered\n"); + rv = -EBUSY; + goto out; + } + efivars->ops = ops; efivars->kobject = kobject; __efivars = efivars; pr_info("Registered efivars operations\n"); - + rv = 0; +out: up(&efivars_lock); - return 0; + return rv; } EXPORT_SYMBOL_GPL(efivars_register); diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c index e00c33310..753e7be03 100644 --- a/drivers/gpio/gpio-74x164.c +++ b/drivers/gpio/gpio-74x164.c @@ -127,8 +127,6 @@ static int gen_74x164_probe(struct spi_device *spi) if (IS_ERR(chip->gpiod_oe)) return PTR_ERR(chip->gpiod_oe); - gpiod_set_value_cansleep(chip->gpiod_oe, 1); - spi_set_drvdata(spi, chip); chip->gpio_chip.label = spi->modalias; @@ -153,6 +151,8 @@ static int gen_74x164_probe(struct spi_device *spi) goto exit_destroy; } + gpiod_set_value_cansleep(chip->gpiod_oe, 1); + ret = gpiochip_add_data(&chip->gpio_chip, chip); if (!ret) return 0; diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 6d3e3454a..9d8c78312 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -784,11 +784,11 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, ret = gpiochip_irqchip_init_valid_mask(gc); if (ret) - goto err_remove_acpi_chip; + goto err_free_hogs; ret = gpiochip_irqchip_init_hw(gc); if (ret) - goto err_remove_acpi_chip; + goto err_remove_irqchip_mask; ret = gpiochip_add_irqchip(gc, lock_key, request_key); if (ret) @@ -813,13 +813,13 @@ err_remove_irqchip: gpiochip_irqchip_remove(gc); err_remove_irqchip_mask: gpiochip_irqchip_free_valid_mask(gc); -err_remove_acpi_chip: +err_free_hogs: + gpiochip_free_hogs(gc); acpi_gpiochip_remove(gc); + gpiochip_remove_pin_ranges(gc); err_remove_of_chip: - gpiochip_free_hogs(gc); of_gpiochip_remove(gc); err_free_gpiochip_mask: - gpiochip_remove_pin_ranges(gc); gpiochip_free_valid_mask(gc); if (gdev->dev.release) { /* release() has been registered by gpiochip_setup_dev() */ diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c index 2b97b8a96..fa6193535 100644 --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c @@ -333,6 +333,7 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, { struct list_head *reset_device_list = reset_context->reset_device_list; struct amdgpu_device *tmp_adev = NULL; + struct amdgpu_ras *con; int r; if (reset_device_list == NULL) @@ -358,7 +359,30 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, */ amdgpu_register_gpu_instance(tmp_adev); - /* Resume RAS */ + /* Resume RAS, ecc_irq */ + con = amdgpu_ras_get_context(tmp_adev); + if (!amdgpu_sriov_vf(tmp_adev) && con) { + if (tmp_adev->sdma.ras && + tmp_adev->sdma.ras->ras_block.ras_late_init) { + r = tmp_adev->sdma.ras->ras_block.ras_late_init(tmp_adev, + &tmp_adev->sdma.ras->ras_block.ras_comm); + if (r) { + dev_err(tmp_adev->dev, "SDMA failed to execute ras_late_init! ret:%d\n", r); + goto end; + } + } + + if (tmp_adev->gfx.ras && + tmp_adev->gfx.ras->ras_block.ras_late_init) { + r = tmp_adev->gfx.ras->ras_block.ras_late_init(tmp_adev, + &tmp_adev->gfx.ras->ras_block.ras_comm); + if (r) { + dev_err(tmp_adev->dev, "GFX failed to execute ras_late_init! ret:%d\n", r); + goto end; + } + } + } + amdgpu_ras_resume(tmp_adev); /* Update PSP FW topology after reset */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c46c6fbd2..e636c7850 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -999,6 +999,8 @@ struct amdgpu_device { bool in_s3; bool in_s4; bool in_s0ix; + /* indicate amdgpu suspension status */ + bool suspend_complete; enum pp_mp1_state mp1_state; struct amdgpu_doorbell_index doorbell_index; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c index 469785d33..1ef758ac5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c @@ -90,7 +90,7 @@ struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) return NULL; fence = container_of(f, struct amdgpu_amdkfd_fence, base); - if (fence && f->ops == &amdkfd_fence_ops) + if (f->ops == &amdkfd_fence_ops) return fence; return NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a5352e5e2..6a4749c0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1310,6 +1310,7 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev) return true; fw_ver = *((uint32_t *)adev->pm.fw->data + 69); + release_firmware(adev->pm.fw); if (fw_ver < 0x00160e00) return true; } @@ -4202,7 +4203,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true); cancel_delayed_work_sync(&adev->delayed_init_work); - flush_delayed_work(&adev->gfx.gfx_off_delay_work); amdgpu_ras_suspend(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b9983ca99..f24c3a20e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2414,6 +2414,7 @@ static int amdgpu_pmops_suspend(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); + adev->suspend_complete = false; if (amdgpu_acpi_is_s0ix_active(adev)) adev->in_s0ix = true; else if (amdgpu_acpi_is_s3_active(adev)) @@ -2428,6 +2429,7 @@ static int amdgpu_pmops_suspend_noirq(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); + adev->suspend_complete = true; if (amdgpu_acpi_should_gpu_reset(adev)) return amdgpu_asic_reset(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 23f0067f9..b803e785d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -585,8 +585,15 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) if (adev->gfx.gfx_off_req_count == 0 && !adev->gfx.gfx_off_state) { - schedule_delayed_work(&adev->gfx.gfx_off_delay_work, + /* If going to s2idle, no need to wait */ + if (adev->in_s0ix) { + if (!amdgpu_dpm_set_powergating_by_smu(adev, + AMD_IP_BLOCK_TYPE_GFX, true)) + adev->gfx.gfx_off_state = true; + } else { + schedule_delayed_work(&adev->gfx.gfx_off_delay_work, delay); + } } } else { if (adev->gfx.gfx_off_req_count == 0) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 2bc791ed8..ea0fb079f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -808,19 +808,26 @@ int amdgpu_gmc_vram_checking(struct amdgpu_device *adev) * seconds, so here, we just pick up three parts for emulation. */ ret = memcmp(vram_ptr, cptr, 10); - if (ret) - return ret; + if (ret) { + ret = -EIO; + goto release_buffer; + } ret = memcmp(vram_ptr + (size / 2), cptr, 10); - if (ret) - return ret; + if (ret) { + ret = -EIO; + goto release_buffer; + } ret = memcmp(vram_ptr + size - 10, cptr, 10); - if (ret) - return ret; + if (ret) { + ret = -EIO; + goto release_buffer; + } +release_buffer: amdgpu_bo_free_kernel(&vram_bo, &vram_gpu, &vram_ptr); - return 0; + return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 0ee7c935f..cde2fd2f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1222,19 +1222,15 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, * amdgpu_bo_move_notify - notification about a memory move * @bo: pointer to a buffer object * @evict: if this move is evicting the buffer from the graphics address space - * @new_mem: new information of the bufer object * * Marks the corresponding &amdgpu_bo buffer object as invalid, also performs * bookkeeping. * TTM driver callback which is called when ttm moves a buffer. */ -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, - bool evict, - struct ttm_resource *new_mem) +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct amdgpu_bo *abo; - struct ttm_resource *old_mem = bo->resource; if (!amdgpu_bo_is_amdgpu_bo(bo)) return; @@ -1251,13 +1247,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, /* remember the eviction */ if (evict) atomic64_inc(&adev->num_evictions); - - /* update statistics */ - if (!new_mem) - return; - - /* move_notify is called before move happens */ - trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type); } void amdgpu_bo_get_memory(struct amdgpu_bo *bo, uint64_t *vram_mem, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 6dcd7bab4..2ada421e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -312,9 +312,7 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata, int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, size_t buffer_size, uint32_t *metadata_size, uint64_t *flags); -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, - bool evict, - struct ttm_resource *new_mem); +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict); void amdgpu_bo_release_notify(struct ttm_buffer_object *bo); vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo); void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 090e66a1b..54bdbd83a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -191,7 +191,8 @@ static bool amdgpu_sync_test_fence(struct amdgpu_device *adev, /* Never sync to VM updates either. */ if (fence_owner == AMDGPU_FENCE_OWNER_VM && - owner != AMDGPU_FENCE_OWNER_UNDEFINED) + owner != AMDGPU_FENCE_OWNER_UNDEFINED && + owner != AMDGPU_FENCE_OWNER_KFD) return false; /* Ignore fences depending on the sync mode */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 10469f20a..158b79188 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -555,10 +555,11 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, return r; } + trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type); out: /* update statistics */ atomic64_add(bo->base.size, &adev->num_bytes_moved); - amdgpu_bo_move_notify(bo, evict, new_mem); + amdgpu_bo_move_notify(bo, evict); return 0; } @@ -1503,7 +1504,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, static void amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo) { - amdgpu_bo_move_notify(bo, false, NULL); + amdgpu_bo_move_notify(bo, false); } static struct ttm_device_funcs amdgpu_bo_driver = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 6e7058a2d..779707f19 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -1110,9 +1110,13 @@ int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw, if (err) return -ENODEV; + err = amdgpu_ucode_validate(*fw); - if (err) + if (err) { dev_dbg(adev->dev, "\"%s\" failed to validate\n", fw_name); + release_firmware(*fw); + *fw = NULL; + } return err; } diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index df385ffc9..6578ca1b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -204,6 +204,12 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev, tmp = RREG32(mmIH_RB_CNTL); tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; WREG32(mmIH_RB_CNTL, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; + WREG32(mmIH_RB_CNTL, tmp); } return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index b8c47e0cf..c19681492 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -216,6 +216,11 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32(mmIH_RB_CNTL, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32(mmIH_RB_CNTL, tmp); out: return (wptr & ih->ptr_mask); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 84ca601f7..195b29892 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3064,6 +3064,14 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) gfx_v9_0_cp_gfx_enable(adev, true); + /* Now only limit the quirk on the APU gfx9 series and already + * confirmed that the APU gfx10/gfx11 needn't such update. + */ + if (adev->flags & AMD_IS_APU && + adev->in_s3 && !adev->suspend_complete) { + DRM_INFO(" Will skip the CSB packet resubmit\n"); + return 0; + } r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); if (r) { DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index d96ee48e1..35921b41f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -1144,6 +1144,10 @@ static int gmc_v10_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); + if (adev->gmc.ecc_irq.funcs && + amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 7124347d2..310a5607d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -951,6 +951,11 @@ static int gmc_v11_0_hw_fini(void *handle) } amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); + + if (adev->gmc.ecc_irq.funcs && + amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); + gmc_v11_0_gart_disable(adev); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index ec291d28e..345f0c9f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -921,8 +921,8 @@ static int gmc_v6_0_hw_init(void *handle) if (amdgpu_emu_mode == 1) return amdgpu_gmc_vram_checking(adev); - else - return r; + + return 0; } static int gmc_v6_0_hw_fini(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 979da6f51..12411f4c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -1110,8 +1110,8 @@ static int gmc_v7_0_hw_init(void *handle) if (amdgpu_emu_mode == 1) return amdgpu_gmc_vram_checking(adev); - else - return r; + + return 0; } static int gmc_v7_0_hw_fini(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 382dde1ce..cec9926e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1240,8 +1240,8 @@ static int gmc_v8_0_hw_init(void *handle) if (amdgpu_emu_mode == 1) return amdgpu_gmc_vram_checking(adev); - else - return r; + + return 0; } static int gmc_v8_0_hw_fini(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 0d9e9d9dd..3e631aefa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1861,8 +1861,8 @@ static int gmc_v9_0_hw_init(void *handle) if (amdgpu_emu_mode == 1) return amdgpu_gmc_vram_checking(adev); - else - return r; + + return 0; } /** @@ -1900,6 +1900,10 @@ static int gmc_v9_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); + if (adev->gmc.ecc_irq.funcs && + amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index aecad530b..2c02ae698 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -215,6 +215,11 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32(mmIH_RB_CNTL, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32(mmIH_RB_CNTL, tmp); out: return (wptr & ih->ptr_mask); diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index 7cd79a384..657e4ca6f 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -417,6 +417,12 @@ static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev, tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index eec13cb5b..84e8e8b00 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -442,6 +442,12 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev, tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 9a24f17a5..cada9f300 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -119,6 +119,12 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev, tmp = RREG32(IH_RB_CNTL); tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; WREG32(IH_RB_CNTL, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; + WREG32(IH_RB_CNTL, tmp); } return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 811dd3ea6..489c89465 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1285,10 +1285,32 @@ static int soc15_common_suspend(void *handle) return soc15_common_hw_fini(adev); } +static bool soc15_need_reset_on_resume(struct amdgpu_device *adev) +{ + u32 sol_reg; + + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + + /* Will reset for the following suspend abort cases. + * 1) Only reset limit on APU side, dGPU hasn't checked yet. + * 2) S3 suspend abort and TOS already launched. + */ + if (adev->flags & AMD_IS_APU && adev->in_s3 && + !adev->suspend_complete && + sol_reg) + return true; + + return false; +} + static int soc15_common_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (soc15_need_reset_on_resume(adev)) { + dev_info(adev->dev, "S3 suspend abort case, let's reset ASIC.\n"); + soc15_asic_reset(adev); + } return soc15_common_hw_init(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index b08905d1c..07a5d95be 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -219,6 +219,12 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32(mmIH_RB_CNTL, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32(mmIH_RB_CNTL, tmp); + out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 1e83db0c5..74c94df42 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -373,6 +373,12 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index 59dfca093..f1ba76c35 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -424,6 +424,12 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 208812512..7fa5e70f1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -380,14 +380,9 @@ static void svm_range_bo_release(struct kref *kref) spin_lock(&svm_bo->list_lock); } spin_unlock(&svm_bo->list_lock); - if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) { - /* We're not in the eviction worker. - * Signal the fence and synchronize with any - * pending eviction work. - */ + if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) + /* We're not in the eviction worker. Signal the fence. */ dma_fence_signal(&svm_bo->eviction_fence->base); - cancel_work_sync(&svm_bo->eviction_work); - } dma_fence_put(&svm_bo->eviction_fence->base); amdgpu_bo_unref(&svm_bo->bo); kfree(svm_bo); @@ -2246,8 +2241,10 @@ retry: mutex_unlock(&svms->lock); mmap_write_unlock(mm); - /* Pairs with mmget in svm_range_add_list_work */ - mmput(mm); + /* Pairs with mmget in svm_range_add_list_work. If dropping the + * last mm refcount, schedule release work to avoid circular locking + */ + mmput_async(mm); spin_lock(&svms->deferred_list_lock); } @@ -2556,6 +2553,7 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr, { struct vm_area_struct *vma; struct interval_tree_node *node; + struct rb_node *rb_node; unsigned long start_limit, end_limit; vma = find_vma(p->mm, addr << PAGE_SHIFT); @@ -2578,16 +2576,15 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr, if (node) { end_limit = min(end_limit, node->start); /* Last range that ends before the fault address */ - node = container_of(rb_prev(&node->rb), - struct interval_tree_node, rb); + rb_node = rb_prev(&node->rb); } else { /* Last range must end before addr because * there was no range after addr */ - node = container_of(rb_last(&p->svms.objects.rb_root), - struct interval_tree_node, rb); + rb_node = rb_last(&p->svms.objects.rb_root); } - if (node) { + if (rb_node) { + node = container_of(rb_node, struct interval_tree_node, rb); if (node->last >= addr) { WARN(1, "Overlap with prev node and page fault addr\n"); return -EFAULT; @@ -3310,13 +3307,14 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence) { - if (!fence) - return -EINVAL; - - if (dma_fence_is_signaled(&fence->base)) - return 0; - - if (fence->svm_bo) { + /* Dereferencing fence->svm_bo is safe here because the fence hasn't + * signaled yet and we're under the protection of the fence->lock. + * After the fence is signaled in svm_range_bo_release, we cannot get + * here any more. + * + * Reference is dropped in svm_range_evict_svm_bo_worker. + */ + if (svm_bo_ref_unless_zero(fence->svm_bo)) { WRITE_ONCE(fence->svm_bo->evicting, 1); schedule_work(&fence->svm_bo->eviction_work); } @@ -3331,8 +3329,6 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) int r = 0; svm_bo = container_of(work, struct svm_range_bo, eviction_work); - if (!svm_bo_ref_unless_zero(svm_bo)) - return; /* svm_bo was freed while eviction was pending */ if (mmget_not_zero(svm_bo->eviction_fence->mm)) { mm = svm_bo->eviction_fence->mm; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 705d9e91b..029916971 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1513,17 +1513,19 @@ static int kfd_add_peer_prop(struct kfd_topology_device *kdev, /* CPU->CPU link*/ cpu_dev = kfd_topology_device_by_proximity_domain(iolink1->node_to); if (cpu_dev) { - list_for_each_entry(iolink3, &cpu_dev->io_link_props, list) - if (iolink3->node_to == iolink2->node_to) - break; - - props->weight += iolink3->weight; - props->min_latency += iolink3->min_latency; - props->max_latency += iolink3->max_latency; - props->min_bandwidth = min(props->min_bandwidth, - iolink3->min_bandwidth); - props->max_bandwidth = min(props->max_bandwidth, - iolink3->max_bandwidth); + list_for_each_entry(iolink3, &cpu_dev->io_link_props, list) { + if (iolink3->node_to != iolink2->node_to) + continue; + + props->weight += iolink3->weight; + props->min_latency += iolink3->min_latency; + props->max_latency += iolink3->max_latency; + props->min_bandwidth = min(props->min_bandwidth, + iolink3->min_bandwidth); + props->max_bandwidth = min(props->max_bandwidth, + iolink3->max_bandwidth); + break; + } } else { WARN(1, "CPU node not found"); } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f02e509d5..a6c6f286a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2255,6 +2255,7 @@ static int dm_sw_fini(void *handle) if (adev->dm.dmub_srv) { dmub_srv_destroy(adev->dm.dmub_srv); + kfree(adev->dm.dmub_srv); adev->dm.dmub_srv = NULL; } @@ -5937,6 +5938,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, bool scale = dm_state ? (dm_state->scaling != RMX_OFF) : false; int mode_refresh; int preferred_refresh = 0; + enum color_transfer_func tf = TRANSFER_FUNC_UNKNOWN; #if defined(CONFIG_DRM_AMD_DC_DCN) struct dsc_dec_dpcd_caps dsc_caps; #endif @@ -6001,7 +6003,9 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, if (recalculate_timing) { freesync_mode = get_highest_refresh_rate_mode(aconnector, false); drm_mode_copy(&saved_mode, &mode); + saved_mode.picture_aspect_ratio = mode.picture_aspect_ratio; drm_mode_copy(&mode, freesync_mode); + mode.picture_aspect_ratio = saved_mode.picture_aspect_ratio; } else { decide_crtc_timing_for_drm_display_mode( &mode, preferred_mode, scale); @@ -6068,7 +6072,9 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, if (stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED) stream->use_vsc_sdp_for_colorimetry = true; } - mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space); + if (stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22) + tf = TRANSFER_FUNC_GAMMA_22; + mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space, tf); aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY; } @@ -10117,11 +10123,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, } #if defined(CONFIG_DRM_AMD_DC_DCN) - ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars); - if (ret) { - DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n"); - ret = -EINVAL; - goto fail; + if (dc_resource_is_dsc_encoding_supported(dc)) { + ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars); + if (ret) { + DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n"); + ret = -EINVAL; + goto fail; + } } ret = dm_update_mst_vcpi_slots_for_dsc(state, dm_state->context, vars); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 7a309547c..f415733f1 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1903,6 +1903,10 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c wait_for_no_pipes_pending(dc, context); /* pplib is notified if disp_num changed */ dc->hwss.optimize_bandwidth(dc, context); + /* Need to do otg sync again as otg could be out of sync due to otg + * workaround applied during clock update + */ + dc_trigger_sync(dc, context); } if (dc->hwss.update_dsc_pg) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c index af110bf94..aefca9756 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c @@ -202,7 +202,7 @@ enum dc_status core_link_read_dpcd( uint32_t extended_size; /* size of the remaining partitioned address space */ uint32_t size_left_to_read; - enum dc_status status; + enum dc_status status = DC_ERROR_UNEXPECTED; /* size of the next partition to be read from */ uint32_t partition_size; uint32_t data_index = 0; @@ -231,7 +231,7 @@ enum dc_status core_link_write_dpcd( { uint32_t partition_size; uint32_t data_index = 0; - enum dc_status status; + enum dc_status status = DC_ERROR_UNEXPECTED; while (size) { partition_size = dpcd_get_next_partition_size(address, size); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 66923f510..e2f80cd0c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -3038,6 +3038,12 @@ static void set_avi_info_frame( hdmi_info.bits.C0_C1 = COLORIMETRY_EXTENDED; } + if (pixel_encoding && color_space == COLOR_SPACE_2020_YCBCR && + stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22) { + hdmi_info.bits.EC0_EC2 = 0; + hdmi_info.bits.C0_C1 = COLORIMETRY_ITU709; + } + /* TODO: un-hardcode aspect ratio */ aspect = stream->timing.aspect_ratio; diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index 46c2b991a..811c11766 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -244,7 +244,7 @@ enum pixel_format { #define DC_MAX_DIRTY_RECTS 3 struct dc_flip_addrs { struct dc_plane_address address; - unsigned int flip_timestamp_in_us; + unsigned long long flip_timestamp_in_us; bool flip_immediate; /* TODO: add flip duration for FreeSync */ bool triplebuffer_flips; diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c index f04595b75..5ec3f50a7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c @@ -1014,7 +1014,7 @@ static struct stream_encoder *dcn301_stream_encoder_create(enum engine_id eng_id vpg = dcn301_vpg_create(ctx, vpg_inst); afmt = dcn301_afmt_create(ctx, afmt_inst); - if (!enc1 || !vpg || !afmt) { + if (!enc1 || !vpg || !afmt || eng_id >= ARRAY_SIZE(stream_enc_regs)) { kfree(enc1); kfree(vpg); kfree(afmt); diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index ca7d24000..6c7b286e1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -51,8 +51,12 @@ endif endif ifneq ($(CONFIG_FRAME_WARN),0) +ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) +frame_warn_flag := -Wframe-larger-than=3072 +else frame_warn_flag := -Wframe-larger-than=2048 endif +endif CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags) @@ -60,11 +64,11 @@ ifdef CONFIG_DRM_AMD_DC_DCN CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 19f556572..cc8c1a48c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -810,6 +810,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman (v->DRAMSpeedPerState[mode_lib->vba.VoltageLevel] <= MEM_STROBE_FREQ_MHZ || v->DCFCLKPerState[mode_lib->vba.VoltageLevel] <= DCFCLK_FREQ_EXTRA_PREFETCH_REQ_MHZ) ? mode_lib->vba.ip.min_prefetch_in_strobe_us : 0, + mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] > 0 || mode_lib->vba.DRAMClockChangeRequirementFinal == false, + /* Output */ &v->DSTXAfterScaler[k], &v->DSTYAfterScaler[k], @@ -3291,6 +3293,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->SwathHeightCThisState[k], v->TWait, (v->DRAMSpeedPerState[i] <= MEM_STROBE_FREQ_MHZ || v->DCFCLKState[i][j] <= DCFCLK_FREQ_EXTRA_PREFETCH_REQ_MHZ) ? mode_lib->vba.ip.min_prefetch_in_strobe_us : 0, + mode_lib->vba.PrefetchModePerState[i][j] > 0 || mode_lib->vba.DRAMClockChangeRequirementFinal == false, /* Output */ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTXAfterScaler[k], diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index 23e4be2ad..7f4fc49be 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -3418,6 +3418,7 @@ bool dml32_CalculatePrefetchSchedule( unsigned int SwathHeightC, double TWait, double TPreReq, + bool ExtendPrefetchIfPossible, /* Output */ double *DSTXAfterScaler, double *DSTYAfterScaler, @@ -3887,12 +3888,32 @@ bool dml32_CalculatePrefetchSchedule( /* Clamp to oto for bandwidth calculation */ LinesForPrefetchBandwidth = dst_y_prefetch_oto; } else { - *DestinationLinesForPrefetch = dst_y_prefetch_equ; - TimeForFetchingMetaPTE = Tvm_equ; - TimeForFetchingRowInVBlank = Tr0_equ; - *PrefetchBandwidth = prefetch_bw_equ; - /* Clamp to equ for bandwidth calculation */ - LinesForPrefetchBandwidth = dst_y_prefetch_equ; + /* For mode programming we want to extend the prefetch as much as possible + * (up to oto, or as long as we can for equ) if we're not already applying + * the 60us prefetch requirement. This is to avoid intermittent underflow + * issues during prefetch. + * + * The prefetch extension is applied under the following scenarios: + * 1. We're in prefetch mode > 0 (i.e. we don't support MCLK switch in blank) + * 2. We're using subvp or drr methods of p-state switch, in which case we + * we don't care if prefetch takes up more of the blanking time + * + * Mode programming typically chooses the smallest prefetch time possible + * (i.e. highest bandwidth during prefetch) presumably to create margin between + * p-states / c-states that happen in vblank and prefetch. Therefore we only + * apply this prefetch extension when p-state in vblank is not required (UCLK + * p-states take up the most vblank time). + */ + if (ExtendPrefetchIfPossible && TPreReq == 0 && VStartup < MaxVStartup) { + MyError = true; + } else { + *DestinationLinesForPrefetch = dst_y_prefetch_equ; + TimeForFetchingMetaPTE = Tvm_equ; + TimeForFetchingRowInVBlank = Tr0_equ; + *PrefetchBandwidth = prefetch_bw_equ; + /* Clamp to equ for bandwidth calculation */ + LinesForPrefetchBandwidth = dst_y_prefetch_equ; + } } *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h index 779c6805f..1823434d8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h @@ -744,6 +744,7 @@ bool dml32_CalculatePrefetchSchedule( unsigned int SwathHeightC, double TWait, double TPreReq, + bool ExtendPrefetchIfPossible, /* Output */ double *DSTXAfterScaler, double *DSTYAfterScaler, diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h index 1d8b746b0..edf5845f6 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h @@ -35,7 +35,8 @@ struct mod_vrr_params; void mod_build_vsc_infopacket(const struct dc_stream_state *stream, struct dc_info_packet *info_packet, - enum dc_color_space cs); + enum dc_color_space cs, + enum color_transfer_func tf); void mod_build_hf_vsif_infopacket(const struct dc_stream_state *stream, struct dc_info_packet *info_packet); diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c index 27ceba9d6..69691058a 100644 --- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c +++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c @@ -132,7 +132,8 @@ enum ColorimetryYCCDP { void mod_build_vsc_infopacket(const struct dc_stream_state *stream, struct dc_info_packet *info_packet, - enum dc_color_space cs) + enum dc_color_space cs, + enum color_transfer_func tf) { unsigned int vsc_packet_revision = vsc_packet_undefined; unsigned int i; @@ -382,6 +383,9 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream, colorimetryFormat = ColorimetryYCC_DP_AdobeYCC; else if (cs == COLOR_SPACE_2020_YCBCR) colorimetryFormat = ColorimetryYCC_DP_ITU2020YCbCr; + + if (cs == COLOR_SPACE_2020_YCBCR && tf == TRANSFER_FUNC_GAMMA_22) + colorimetryFormat = ColorimetryYCC_DP_ITU709; break; default: diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index e2f436dea..67287ad07 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -818,8 +818,6 @@ bool is_psr_su_specific_panel(struct dc_link *link) isPSRSUSupported = false; else if (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x03) isPSRSUSupported = false; - else if (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x03) - isPSRSUSupported = false; else if (dpcd_caps->psr_info.force_psrsu_cap == 0x1) isPSRSUSupported = true; } diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index dc0a6fba7..ff1032de4 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -6925,6 +6925,23 @@ static int si_dpm_enable(struct amdgpu_device *adev) return 0; } +static int si_set_temperature_range(struct amdgpu_device *adev) +{ + int ret; + + ret = si_thermal_enable_alert(adev, false); + if (ret) + return ret; + ret = si_thermal_set_temperature_range(adev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX); + if (ret) + return ret; + ret = si_thermal_enable_alert(adev, true); + if (ret) + return ret; + + return ret; +} + static void si_dpm_disable(struct amdgpu_device *adev) { struct rv7xx_power_info *pi = rv770_get_pi(adev); @@ -7608,6 +7625,18 @@ static int si_dpm_process_interrupt(struct amdgpu_device *adev, static int si_dpm_late_init(void *handle) { + int ret; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!adev->pm.dpm_enabled) + return 0; + + ret = si_set_temperature_range(adev); + if (ret) + return ret; +#if 0 //TODO ? + si_dpm_powergate_uvd(adev, true); +#endif return 0; } diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c index f2a55c141..17882f8df 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c @@ -200,7 +200,7 @@ static int get_platform_power_management_table( struct pp_hwmgr *hwmgr, ATOM_Tonga_PPM_Table *atom_ppm_table) { - struct phm_ppm_table *ptr = kzalloc(sizeof(ATOM_Tonga_PPM_Table), GFP_KERNEL); + struct phm_ppm_table *ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); struct phm_ppt_v1_information *pp_table_information = (struct phm_ppt_v1_information *)(hwmgr->pptable); diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c index 5f8137e9c..77a304ac4 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.c +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c @@ -1300,10 +1300,32 @@ static void anx7625_config(struct anx7625_data *ctx) XTAL_FRQ_SEL, XTAL_FRQ_27M); } +static int anx7625_hpd_timer_config(struct anx7625_data *ctx) +{ + int ret; + + /* Set irq detect window to 2ms */ + ret = anx7625_reg_write(ctx, ctx->i2c.tx_p2_client, + HPD_DET_TIMER_BIT0_7, HPD_TIME & 0xFF); + ret |= anx7625_reg_write(ctx, ctx->i2c.tx_p2_client, + HPD_DET_TIMER_BIT8_15, + (HPD_TIME >> 8) & 0xFF); + ret |= anx7625_reg_write(ctx, ctx->i2c.tx_p2_client, + HPD_DET_TIMER_BIT16_23, + (HPD_TIME >> 16) & 0xFF); + + return ret; +} + +static int anx7625_read_hpd_gpio_config_status(struct anx7625_data *ctx) +{ + return anx7625_reg_read(ctx, ctx->i2c.rx_p0_client, GPIO_CTRL_2); +} + static void anx7625_disable_pd_protocol(struct anx7625_data *ctx) { struct device *dev = &ctx->client->dev; - int ret; + int ret, val; /* Reset main ocm */ ret = anx7625_reg_write(ctx, ctx->i2c.rx_p0_client, 0x88, 0x40); @@ -1317,6 +1339,19 @@ static void anx7625_disable_pd_protocol(struct anx7625_data *ctx) DRM_DEV_DEBUG_DRIVER(dev, "disable PD feature fail.\n"); else DRM_DEV_DEBUG_DRIVER(dev, "disable PD feature succeeded.\n"); + + /* + * Make sure the HPD GPIO already be configured after OCM release before + * setting HPD detect window register. Here we poll the status register + * at maximum 40ms, then config HPD irq detect window register + */ + readx_poll_timeout(anx7625_read_hpd_gpio_config_status, + ctx, val, + ((val & HPD_SOURCE) || (val < 0)), + 2000, 2000 * 20); + + /* Set HPD irq detect window to 2ms */ + anx7625_hpd_timer_config(ctx); } static int anx7625_ocm_loading_check(struct anx7625_data *ctx) @@ -1440,20 +1475,6 @@ static void anx7625_start_dp_work(struct anx7625_data *ctx) static int anx7625_read_hpd_status_p0(struct anx7625_data *ctx) { - int ret; - - /* Set irq detect window to 2ms */ - ret = anx7625_reg_write(ctx, ctx->i2c.tx_p2_client, - HPD_DET_TIMER_BIT0_7, HPD_TIME & 0xFF); - ret |= anx7625_reg_write(ctx, ctx->i2c.tx_p2_client, - HPD_DET_TIMER_BIT8_15, - (HPD_TIME >> 8) & 0xFF); - ret |= anx7625_reg_write(ctx, ctx->i2c.tx_p2_client, - HPD_DET_TIMER_BIT16_23, - (HPD_TIME >> 16) & 0xFF); - if (ret < 0) - return ret; - return anx7625_reg_read(ctx, ctx->i2c.rx_p0_client, SYSTEM_STSTUS); } diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.h b/drivers/gpu/drm/bridge/analogix/anx7625.h index 239956199..164250c8c 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.h +++ b/drivers/gpu/drm/bridge/analogix/anx7625.h @@ -259,6 +259,10 @@ #define AP_MIPI_RX_EN BIT(5) /* 1: MIPI RX input in 0: no RX in */ #define AP_DISABLE_PD BIT(6) #define AP_DISABLE_DISPLAY BIT(7) + +#define GPIO_CTRL_2 0x49 +#define HPD_SOURCE BIT(6) + /***************************************************************/ /* Register definition of device address 0x84 */ #define MIPI_PHY_CONTROL_3 0x03 diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index 7098f125b..fd32041f8 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -332,6 +332,7 @@ alloc_range_bias(struct drm_buddy *mm, u64 start, u64 end, unsigned int order) { + u64 req_size = mm->chunk_size << order; struct drm_buddy_block *block; struct drm_buddy_block *buddy; LIST_HEAD(dfs); @@ -367,6 +368,15 @@ alloc_range_bias(struct drm_buddy *mm, if (drm_buddy_block_is_allocated(block)) continue; + if (block_start < start || block_end > end) { + u64 adjusted_start = max(block_start, start); + u64 adjusted_end = min(block_end, end); + + if (round_down(adjusted_end + 1, req_size) <= + round_up(adjusted_start, req_size)) + continue; + } + if (contains(start, end, block_start, block_end) && order == drm_buddy_block_order(block)) { /* diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index a8b4d918e..d6a057298 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -406,7 +406,7 @@ int drm_open(struct inode *inode, struct file *filp) { struct drm_device *dev; struct drm_minor *minor; - int retcode; + int retcode = 0; int need_setup = 0; minor = drm_minor_acquire(iminor(inode)); diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c index 2dd97473c..72ad1715f 100644 --- a/drivers/gpu/drm/drm_framebuffer.c +++ b/drivers/gpu/drm/drm_framebuffer.c @@ -570,7 +570,7 @@ int drm_mode_getfb2_ioctl(struct drm_device *dev, struct drm_mode_fb_cmd2 *r = data; struct drm_framebuffer *fb; unsigned int i; - int ret; + int ret = 0; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EINVAL; diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c index 853208e8d..ef7ec6886 100644 --- a/drivers/gpu/drm/drm_mipi_dsi.c +++ b/drivers/gpu/drm/drm_mipi_dsi.c @@ -346,7 +346,8 @@ static int mipi_dsi_remove_device_fn(struct device *dev, void *priv) { struct mipi_dsi_device *dsi = to_mipi_dsi_device(dev); - mipi_dsi_detach(dsi); + if (dsi->attached) + mipi_dsi_detach(dsi); mipi_dsi_device_unregister(dsi); return 0; @@ -369,11 +370,18 @@ EXPORT_SYMBOL(mipi_dsi_host_unregister); int mipi_dsi_attach(struct mipi_dsi_device *dsi) { const struct mipi_dsi_host_ops *ops = dsi->host->ops; + int ret; if (!ops || !ops->attach) return -ENOSYS; - return ops->attach(dsi->host, dsi); + ret = ops->attach(dsi->host, dsi); + if (ret) + return ret; + + dsi->attached = true; + + return 0; } EXPORT_SYMBOL(mipi_dsi_attach); @@ -385,9 +393,14 @@ int mipi_dsi_detach(struct mipi_dsi_device *dsi) { const struct mipi_dsi_host_ops *ops = dsi->host->ops; + if (WARN_ON(!dsi->attached)) + return -EINVAL; + if (!ops || !ops->detach) return -ENOSYS; + dsi->attached = false; + return ops->detach(dsi->host, dsi); } EXPORT_SYMBOL(mipi_dsi_detach); diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index eb09e8604..68a6d4b0e 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -828,7 +828,7 @@ struct sg_table *drm_prime_pages_to_sg(struct drm_device *dev, if (max_segment == 0) max_segment = UINT_MAX; err = sg_alloc_table_from_pages_segment(sg, pages, nr_pages, 0, - nr_pages << PAGE_SHIFT, + (unsigned long)nr_pages << PAGE_SHIFT, max_segment, GFP_KERNEL); if (err) { kfree(sg); diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index da0145bc1..8f2737075 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -980,7 +980,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, uint64_t *points; uint32_t signaled_count, i; - if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) + if (flags & (DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE)) lockdep_assert_none_held_once(); points = kmalloc_array(count, sizeof(*points), GFP_KERNEL); @@ -1049,7 +1050,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, * fallthough and try a 0 timeout wait! */ - if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) { + if (flags & (DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE)) { for (i = 0; i < count; ++i) drm_syncobj_fence_add_wait(syncobjs[i], &entries[i]); } diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index 16c539657..4095b0d3a 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -309,6 +309,7 @@ err_mode_config_cleanup: drm_mode_config_cleanup(drm); exynos_drm_cleanup_dma(drm); kfree(private); + dev_set_drvdata(dev, NULL); err_free_drm: drm_dev_put(drm); @@ -323,6 +324,7 @@ static void exynos_drm_unbind(struct device *dev) exynos_drm_fbdev_fini(drm); drm_kms_helper_poll_fini(drm); + drm_atomic_helper_shutdown(drm); component_unbind_all(drm->dev, drm); drm_mode_config_cleanup(drm); @@ -360,9 +362,18 @@ static int exynos_drm_platform_remove(struct platform_device *pdev) return 0; } +static void exynos_drm_platform_shutdown(struct platform_device *pdev) +{ + struct drm_device *drm = platform_get_drvdata(pdev); + + if (drm) + drm_atomic_helper_shutdown(drm); +} + static struct platform_driver exynos_drm_platform_driver = { .probe = exynos_drm_platform_probe, .remove = exynos_drm_platform_remove, + .shutdown = exynos_drm_platform_shutdown, .driver = { .name = "exynos-drm", .pm = &exynos_drm_pm_ops, diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index daac2050d..6f531bb61 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -2844,8 +2844,7 @@ static int handle_mmio(struct intel_gvt_mmio_table_iter *iter, u32 offset, for (i = start; i < end; i += 4) { p = intel_gvt_find_mmio_info(gvt, i); if (p) { - WARN(1, "dup mmio definition offset %x\n", - info->offset); + WARN(1, "dup mmio definition offset %x\n", i); /* We return -EEXIST here to make GVT-g load fail. * So duplicated MMIO can be found as soon as diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index 119544d88..fbac39aa3 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -316,32 +316,34 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) goto exit_afbcd; if (has_components) { - ret = component_bind_all(drm->dev, drm); + ret = component_bind_all(dev, drm); if (ret) { dev_err(drm->dev, "Couldn't bind all components\n"); + /* Do not try to unbind */ + has_components = false; goto exit_afbcd; } } ret = meson_encoder_hdmi_init(priv); if (ret) - goto unbind_all; + goto exit_afbcd; ret = meson_plane_create(priv); if (ret) - goto unbind_all; + goto exit_afbcd; ret = meson_overlay_create(priv); if (ret) - goto unbind_all; + goto exit_afbcd; ret = meson_crtc_create(priv); if (ret) - goto unbind_all; + goto exit_afbcd; ret = request_irq(priv->vsync_irq, meson_irq, 0, drm->driver->name, drm); if (ret) - goto unbind_all; + goto exit_afbcd; drm_mode_config_reset(drm); @@ -359,15 +361,18 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) uninstall_irq: free_irq(priv->vsync_irq, drm); -unbind_all: - if (has_components) - component_unbind_all(drm->dev, drm); exit_afbcd: if (priv->afbcd.ops) priv->afbcd.ops->exit(priv); free_drm: drm_dev_put(drm); + meson_encoder_hdmi_remove(priv); + meson_encoder_cvbs_remove(priv); + + if (has_components) + component_unbind_all(dev, drm); + return ret; } diff --git a/drivers/gpu/drm/meson/meson_encoder_cvbs.c b/drivers/gpu/drm/meson/meson_encoder_cvbs.c index 3f73b211f..340745043 100644 --- a/drivers/gpu/drm/meson/meson_encoder_cvbs.c +++ b/drivers/gpu/drm/meson/meson_encoder_cvbs.c @@ -294,6 +294,5 @@ void meson_encoder_cvbs_remove(struct meson_drm *priv) if (priv->encoders[MESON_ENC_CVBS]) { meson_encoder_cvbs = priv->encoders[MESON_ENC_CVBS]; drm_bridge_remove(&meson_encoder_cvbs->bridge); - drm_bridge_remove(meson_encoder_cvbs->next_bridge); } } diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c index b14e6e507..03062e7a0 100644 --- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c +++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c @@ -472,6 +472,5 @@ void meson_encoder_hdmi_remove(struct meson_drm *priv) if (priv->encoders[MESON_ENC_HDMI]) { meson_encoder_hdmi = priv->encoders[MESON_ENC_HDMI]; drm_bridge_remove(&meson_encoder_hdmi->bridge); - drm_bridge_remove(meson_encoder_hdmi->next_bridge); } } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index b0eb881f8..25245ef38 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -38,6 +38,9 @@ #define DPU_ERROR_ENC(e, fmt, ...) DPU_ERROR("enc%d " fmt,\ (e) ? (e)->base.base.id : -1, ##__VA_ARGS__) +#define DPU_ERROR_ENC_RATELIMITED(e, fmt, ...) DPU_ERROR_RATELIMITED("enc%d " fmt,\ + (e) ? (e)->base.base.id : -1, ##__VA_ARGS__) + /* * Two to anticipate panels that can do cmd/vid dynamic switching * plan is to create all possible physical encoder types, and switch between @@ -2050,7 +2053,7 @@ void dpu_encoder_helper_phys_cleanup(struct dpu_encoder_phys *phys_enc) } /* reset the merge 3D HW block */ - if (phys_enc->hw_pp->merge_3d) { + if (phys_enc->hw_pp && phys_enc->hw_pp->merge_3d) { phys_enc->hw_pp->merge_3d->ops.setup_3d_mode(phys_enc->hw_pp->merge_3d, BLEND_3D_NONE); if (phys_enc->hw_ctl->ops.update_pending_flush_merge_3d) @@ -2066,7 +2069,7 @@ void dpu_encoder_helper_phys_cleanup(struct dpu_encoder_phys *phys_enc) if (phys_enc->hw_wb) intf_cfg.wb = phys_enc->hw_wb->idx; - if (phys_enc->hw_pp->merge_3d) + if (phys_enc->hw_pp && phys_enc->hw_pp->merge_3d) intf_cfg.merge_3d = phys_enc->hw_pp->merge_3d->idx; if (ctl->ops.reset_intf_cfg) @@ -2385,7 +2388,7 @@ static void dpu_encoder_frame_done_timeout(struct timer_list *t) return; } - DPU_ERROR_ENC(dpu_enc, "frame done timeout\n"); + DPU_ERROR_ENC_RATELIMITED(dpu_enc, "frame done timeout\n"); event = DPU_ENCODER_FRAME_EVENT_ERROR; trace_dpu_enc_frame_done_timeout(DRMID(drm_enc), event); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c index a3e413d27..63dc2ee44 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c @@ -105,6 +105,9 @@ static void dpu_hw_wb_setup_format(struct dpu_hw_wb *ctx, dst_format |= BIT(14); /* DST_ALPHA_X */ } + if (DPU_FORMAT_IS_YUV(fmt)) + dst_format |= BIT(15); + pattern = (fmt->element[3] << 24) | (fmt->element[2] << 16) | (fmt->element[1] << 8) | diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h index ed80ed678..bb35aa5f5 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h @@ -51,6 +51,7 @@ } while (0) #define DPU_ERROR(fmt, ...) pr_err("[dpu error]" fmt, ##__VA_ARGS__) +#define DPU_ERROR_RATELIMITED(fmt, ...) pr_err_ratelimited("[dpu error]" fmt, ##__VA_ARGS__) /** * ktime_compare_safe - compare two ktime structures diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index 103eef9f0..b20701893 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -133,11 +133,6 @@ static void dp_ctrl_config_ctrl(struct dp_ctrl_private *ctrl) tbd = dp_link_get_test_bits_depth(ctrl->link, ctrl->panel->dp_mode.bpp); - if (tbd == DP_TEST_BIT_DEPTH_UNKNOWN) { - pr_debug("BIT_DEPTH not set. Configure default\n"); - tbd = DP_TEST_BIT_DEPTH_8; - } - config |= tbd << DP_CONFIGURATION_CTRL_BPC_SHIFT; /* Num of Lanes */ diff --git a/drivers/gpu/drm/msm/dp/dp_link.c b/drivers/gpu/drm/msm/dp/dp_link.c index cb66d1126..ceb382fa5 100644 --- a/drivers/gpu/drm/msm/dp/dp_link.c +++ b/drivers/gpu/drm/msm/dp/dp_link.c @@ -7,6 +7,7 @@ #include <drm/drm_print.h> +#include "dp_reg.h" #include "dp_link.h" #include "dp_panel.h" @@ -1075,7 +1076,7 @@ int dp_link_process_request(struct dp_link *dp_link) int dp_link_get_colorimetry_config(struct dp_link *dp_link) { - u32 cc; + u32 cc = DP_MISC0_COLORIMERY_CFG_LEGACY_RGB; struct dp_link_private *link; if (!dp_link) { @@ -1089,10 +1090,11 @@ int dp_link_get_colorimetry_config(struct dp_link *dp_link) * Unless a video pattern CTS test is ongoing, use RGB_VESA * Only RGB_VESA and RGB_CEA supported for now */ - if (dp_link_is_video_pattern_requested(link)) - cc = link->dp_link.test_video.test_dyn_range; - else - cc = DP_TEST_DYNAMIC_RANGE_VESA; + if (dp_link_is_video_pattern_requested(link)) { + if (link->dp_link.test_video.test_dyn_range & + DP_TEST_DYNAMIC_RANGE_CEA) + cc = DP_MISC0_COLORIMERY_CFG_CEA_RGB; + } return cc; } @@ -1172,6 +1174,9 @@ void dp_link_reset_phy_params_vx_px(struct dp_link *dp_link) u32 dp_link_get_test_bits_depth(struct dp_link *dp_link, u32 bpp) { u32 tbd; + struct dp_link_private *link; + + link = container_of(dp_link, struct dp_link_private, dp_link); /* * Few simplistic rules and assumptions made here: @@ -1189,12 +1194,13 @@ u32 dp_link_get_test_bits_depth(struct dp_link *dp_link, u32 bpp) tbd = DP_TEST_BIT_DEPTH_10; break; default: - tbd = DP_TEST_BIT_DEPTH_UNKNOWN; + drm_dbg_dp(link->drm_dev, "bpp=%d not supported, use bpc=8\n", + bpp); + tbd = DP_TEST_BIT_DEPTH_8; break; } - if (tbd != DP_TEST_BIT_DEPTH_UNKNOWN) - tbd = (tbd >> DP_TEST_BIT_DEPTH_SHIFT); + tbd = (tbd >> DP_TEST_BIT_DEPTH_SHIFT); return tbd; } diff --git a/drivers/gpu/drm/msm/dp/dp_reg.h b/drivers/gpu/drm/msm/dp/dp_reg.h index 268602803..176a503ec 100644 --- a/drivers/gpu/drm/msm/dp/dp_reg.h +++ b/drivers/gpu/drm/msm/dp/dp_reg.h @@ -129,6 +129,9 @@ #define DP_MISC0_COLORIMETRY_CFG_SHIFT (0x00000001) #define DP_MISC0_TEST_BITS_DEPTH_SHIFT (0x00000005) +#define DP_MISC0_COLORIMERY_CFG_LEGACY_RGB (0) +#define DP_MISC0_COLORIMERY_CFG_CEA_RGB (0x04) + #define REG_DP_VALID_BOUNDARY (0x00000030) #define REG_DP_VALID_BOUNDARY_2 (0x00000034) diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index 62bc3756f..c0bcf020e 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -673,6 +673,10 @@ static int dsi_phy_driver_probe(struct platform_device *pdev) return dev_err_probe(dev, PTR_ERR(phy->ahb_clk), "Unable to get ahb clk\n"); + ret = devm_pm_runtime_enable(&pdev->dev); + if (ret) + return ret; + /* PLL init will call into clk_register which requires * register access, so we need to enable power and ahb clock. */ diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index d12ba47b3..0de361213 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -21,6 +21,8 @@ struct msm_iommu_pagetable { struct msm_mmu base; struct msm_mmu *parent; struct io_pgtable_ops *pgtbl_ops; + const struct iommu_flush_ops *tlb; + struct device *iommu_dev; unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ phys_addr_t ttbr; u32 asid; @@ -194,11 +196,33 @@ static const struct msm_mmu_funcs pagetable_funcs = { static void msm_iommu_tlb_flush_all(void *cookie) { + struct msm_iommu_pagetable *pagetable = cookie; + struct adreno_smmu_priv *adreno_smmu; + + if (!pm_runtime_get_if_in_use(pagetable->iommu_dev)) + return; + + adreno_smmu = dev_get_drvdata(pagetable->parent->dev); + + pagetable->tlb->tlb_flush_all((void *)adreno_smmu->cookie); + + pm_runtime_put_autosuspend(pagetable->iommu_dev); } static void msm_iommu_tlb_flush_walk(unsigned long iova, size_t size, size_t granule, void *cookie) { + struct msm_iommu_pagetable *pagetable = cookie; + struct adreno_smmu_priv *adreno_smmu; + + if (!pm_runtime_get_if_in_use(pagetable->iommu_dev)) + return; + + adreno_smmu = dev_get_drvdata(pagetable->parent->dev); + + pagetable->tlb->tlb_flush_walk(iova, size, granule, (void *)adreno_smmu->cookie); + + pm_runtime_put_autosuspend(pagetable->iommu_dev); } static void msm_iommu_tlb_add_page(struct iommu_iotlb_gather *gather, @@ -206,7 +230,7 @@ static void msm_iommu_tlb_add_page(struct iommu_iotlb_gather *gather, { } -static const struct iommu_flush_ops null_tlb_ops = { +static const struct iommu_flush_ops tlb_ops = { .tlb_flush_all = msm_iommu_tlb_flush_all, .tlb_flush_walk = msm_iommu_tlb_flush_walk, .tlb_add_page = msm_iommu_tlb_add_page, @@ -254,10 +278,10 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent) /* The incoming cfg will have the TTBR1 quirk enabled */ ttbr0_cfg.quirks &= ~IO_PGTABLE_QUIRK_ARM_TTBR1; - ttbr0_cfg.tlb = &null_tlb_ops; + ttbr0_cfg.tlb = &tlb_ops; pagetable->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1, - &ttbr0_cfg, iommu->domain); + &ttbr0_cfg, pagetable); if (!pagetable->pgtbl_ops) { kfree(pagetable); @@ -282,6 +306,8 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent) /* Needed later for TLB flush */ pagetable->parent = parent; + pagetable->tlb = ttbr1_cfg->tlb; + pagetable->iommu_dev = ttbr1_cfg->iommu_dev; pagetable->pgsize_bitmap = ttbr0_cfg.pgsize_bitmap; pagetable->ttbr = ttbr0_cfg.arm_lpae_s1_cfg.ttbr; diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index 31a5b81ee..be6674fb1 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -997,7 +997,7 @@ nouveau_svm_fault_buffer_ctor(struct nouveau_svm *svm, s32 oclass, int id) if (ret) return ret; - buffer->fault = kvcalloc(sizeof(*buffer->fault), buffer->entries, GFP_KERNEL); + buffer->fault = kvcalloc(buffer->entries, sizeof(*buffer->fault), GFP_KERNEL); if (!buffer->fault) return -ENOMEM; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c index 19188683c..8c2bf1c16 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c @@ -154,11 +154,17 @@ shadow_fw_init(struct nvkm_bios *bios, const char *name) return (void *)fw; } +static void +shadow_fw_release(void *fw) +{ + release_firmware(fw); +} + static const struct nvbios_source shadow_fw = { .name = "firmware", .init = shadow_fw_init, - .fini = (void(*)(void *))release_firmware, + .fini = shadow_fw_release, .read = shadow_fw_read, .rw = false, }; diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c index ecd623874..42584d8a9 100644 --- a/drivers/gpu/drm/panel/panel-edp.c +++ b/drivers/gpu/drm/panel/panel-edp.c @@ -203,6 +203,9 @@ struct edp_panel_entry { /** @name: Name of this panel (for printing to logs). */ const char *name; + + /** @override_edid_mode: Override the mode obtained by edid. */ + const struct drm_display_mode *override_edid_mode; }; struct panel_edp { @@ -301,6 +304,24 @@ static unsigned int panel_edp_get_display_modes(struct panel_edp *panel, return num; } +static int panel_edp_override_edid_mode(struct panel_edp *panel, + struct drm_connector *connector, + const struct drm_display_mode *override_mode) +{ + struct drm_display_mode *mode; + + mode = drm_mode_duplicate(connector->dev, override_mode); + if (!mode) { + dev_err(panel->base.dev, "failed to add additional mode\n"); + return 0; + } + + mode->type |= DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED; + drm_mode_set_name(mode); + drm_mode_probed_add(connector, mode); + return 1; +} + static int panel_edp_get_non_edid_modes(struct panel_edp *panel, struct drm_connector *connector) { @@ -568,6 +589,9 @@ static int panel_edp_get_modes(struct drm_panel *panel, { struct panel_edp *p = to_panel_edp(panel); int num = 0; + bool has_override_edid_mode = p->detected_panel && + p->detected_panel != ERR_PTR(-EINVAL) && + p->detected_panel->override_edid_mode; /* probe EDID if a DDC bus is available */ if (p->ddc) { @@ -575,9 +599,18 @@ static int panel_edp_get_modes(struct drm_panel *panel, if (!p->edid) p->edid = drm_get_edid(connector, p->ddc); - - if (p->edid) - num += drm_add_edid_modes(connector, p->edid); + if (p->edid) { + if (has_override_edid_mode) { + /* + * override_edid_mode is specified. Use + * override_edid_mode instead of from edid. + */ + num += panel_edp_override_edid_mode(p, connector, + p->detected_panel->override_edid_mode); + } else { + num += drm_add_edid_modes(connector, p->edid); + } + } pm_runtime_mark_last_busy(panel->dev); pm_runtime_put_autosuspend(panel->dev); @@ -1861,6 +1894,15 @@ static const struct panel_delay delay_200_500_e200 = { .delay = _delay \ } +#define EDP_PANEL_ENTRY2(vend_chr_0, vend_chr_1, vend_chr_2, product_id, _delay, _name, _mode) \ +{ \ + .name = _name, \ + .panel_id = drm_edid_encode_panel_id(vend_chr_0, vend_chr_1, vend_chr_2, \ + product_id), \ + .delay = _delay, \ + .override_edid_mode = _mode \ +} + /* * This table is used to figure out power sequencing delays for panels that * are detected by EDID. Entries here may point to entries in the diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 5fc55b977..6806779f8 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -1252,9 +1252,26 @@ static int host1x_drm_probe(struct host1x_device *dev) drm_mode_config_reset(drm); - err = drm_aperture_remove_framebuffers(&tegra_drm_driver); - if (err < 0) - goto hub; + /* + * Only take over from a potential firmware framebuffer if any CRTCs + * have been registered. This must not be a fatal error because there + * are other accelerators that are exposed via this driver. + * + * Another case where this happens is on Tegra234 where the display + * hardware is no longer part of the host1x complex, so this driver + * will not expose any modesetting features. + */ + if (drm->mode_config.num_crtc > 0) { + err = drm_aperture_remove_framebuffers(&tegra_drm_driver); + if (err < 0) + goto hub; + } else { + /* + * Indicate to userspace that this doesn't expose any display + * capabilities. + */ + drm->driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC); + } err = tegra_drm_fb_init(drm); if (err < 0) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 86affe987..393b97b4a 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -383,7 +383,7 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt, enum ttm_caching caching, pgoff_t start_page, pgoff_t end_page) { - struct page **pages = tt->pages; + struct page **pages = &tt->pages[start_page]; unsigned int order; pgoff_t i, nr; diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index 0035affc3..9b2d23516 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -93,6 +93,7 @@ static int virtio_gpu_probe(struct virtio_device *vdev) goto err_free; } + dma_set_max_seg_size(dev->dev, dma_max_mapping_size(dev->dev) ?: UINT_MAX); ret = virtio_gpu_init(vdev, dev); if (ret) goto err_free; diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c index b617aada5..7b097ab21 100644 --- a/drivers/hid/hidraw.c +++ b/drivers/hid/hidraw.c @@ -355,8 +355,11 @@ static int hidraw_release(struct inode * inode, struct file * file) down_write(&minors_rwsem); spin_lock_irqsave(&hidraw_table[minor]->list_lock, flags); - for (int i = list->tail; i < list->head; i++) - kfree(list->buffer[i].value); + while (list->tail != list->head) { + kfree(list->buffer[list->tail].value); + list->buffer[list->tail].value = NULL; + list->tail = (list->tail + 1) & (HIDRAW_BUFFER_SIZE - 1); + } list_del(&list->node); spin_unlock_irqrestore(&hidraw_table[minor]->list_lock, flags); kfree(list); diff --git a/drivers/hid/i2c-hid/i2c-hid-of.c b/drivers/hid/i2c-hid/i2c-hid-of.c index 97a27a803..6feb812fc 100644 --- a/drivers/hid/i2c-hid/i2c-hid-of.c +++ b/drivers/hid/i2c-hid/i2c-hid-of.c @@ -80,6 +80,7 @@ static int i2c_hid_of_probe(struct i2c_client *client, if (!ihid_of) return -ENOMEM; + ihid_of->client = client; ihid_of->ops.power_up = i2c_hid_of_power_up; ihid_of->ops.power_down = i2c_hid_of_power_down; diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index af163e8df..12d4c2874 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -2080,7 +2080,7 @@ static int wacom_allocate_inputs(struct wacom *wacom) return 0; } -static int wacom_register_inputs(struct wacom *wacom) +static int wacom_setup_inputs(struct wacom *wacom) { struct input_dev *pen_input_dev, *touch_input_dev, *pad_input_dev; struct wacom_wac *wacom_wac = &(wacom->wacom_wac); @@ -2099,10 +2099,6 @@ static int wacom_register_inputs(struct wacom *wacom) input_free_device(pen_input_dev); wacom_wac->pen_input = NULL; pen_input_dev = NULL; - } else { - error = input_register_device(pen_input_dev); - if (error) - goto fail; } error = wacom_setup_touch_input_capabilities(touch_input_dev, wacom_wac); @@ -2111,10 +2107,6 @@ static int wacom_register_inputs(struct wacom *wacom) input_free_device(touch_input_dev); wacom_wac->touch_input = NULL; touch_input_dev = NULL; - } else { - error = input_register_device(touch_input_dev); - if (error) - goto fail; } error = wacom_setup_pad_input_capabilities(pad_input_dev, wacom_wac); @@ -2123,7 +2115,34 @@ static int wacom_register_inputs(struct wacom *wacom) input_free_device(pad_input_dev); wacom_wac->pad_input = NULL; pad_input_dev = NULL; - } else { + } + + return 0; +} + +static int wacom_register_inputs(struct wacom *wacom) +{ + struct input_dev *pen_input_dev, *touch_input_dev, *pad_input_dev; + struct wacom_wac *wacom_wac = &(wacom->wacom_wac); + int error = 0; + + pen_input_dev = wacom_wac->pen_input; + touch_input_dev = wacom_wac->touch_input; + pad_input_dev = wacom_wac->pad_input; + + if (pen_input_dev) { + error = input_register_device(pen_input_dev); + if (error) + goto fail; + } + + if (touch_input_dev) { + error = input_register_device(touch_input_dev); + if (error) + goto fail; + } + + if (pad_input_dev) { error = input_register_device(pad_input_dev); if (error) goto fail; @@ -2379,6 +2398,20 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless) goto fail; } + error = wacom_setup_inputs(wacom); + if (error) + goto fail; + + if (features->type == HID_GENERIC) + connect_mask |= HID_CONNECT_DRIVER; + + /* Regular HID work starts now */ + error = hid_hw_start(hdev, connect_mask); + if (error) { + hid_err(hdev, "hw start failed\n"); + goto fail; + } + error = wacom_register_inputs(wacom); if (error) goto fail; @@ -2393,16 +2426,6 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless) goto fail; } - if (features->type == HID_GENERIC) - connect_mask |= HID_CONNECT_DRIVER; - - /* Regular HID work starts now */ - error = hid_hw_start(hdev, connect_mask); - if (error) { - hid_err(hdev, "hw start failed\n"); - goto fail; - } - if (!wireless) { /* Note that if query fails it is not a hard failure */ wacom_query_tablet_data(wacom); diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 165ed872f..53235b276 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -2571,7 +2571,14 @@ static void wacom_wac_pen_report(struct hid_device *hdev, wacom_wac->hid_data.tipswitch); input_report_key(input, wacom_wac->tool[0], sense); if (wacom_wac->serial[0]) { - input_event(input, EV_MSC, MSC_SERIAL, wacom_wac->serial[0]); + /* + * xf86-input-wacom does not accept a serial number + * of '0'. Report the low 32 bits if possible, but + * if they are zero, report the upper ones instead. + */ + __u32 serial_lo = wacom_wac->serial[0] & 0xFFFFFFFFu; + __u32 serial_hi = wacom_wac->serial[0] >> 32; + input_event(input, EV_MSC, MSC_SERIAL, (int)(serial_lo ? serial_lo : serial_hi)); input_report_abs(input, ABS_MISC, sense ? id : 0); } diff --git a/drivers/hwmon/aspeed-pwm-tacho.c b/drivers/hwmon/aspeed-pwm-tacho.c index d11f674e3..51f321bcd 100644 --- a/drivers/hwmon/aspeed-pwm-tacho.c +++ b/drivers/hwmon/aspeed-pwm-tacho.c @@ -194,6 +194,8 @@ struct aspeed_pwm_tacho_data { u8 fan_tach_ch_source[16]; struct aspeed_cooling_device *cdev[8]; const struct attribute_group *groups[3]; + /* protects access to shared ASPEED_PTCR_RESULT */ + struct mutex tach_lock; }; enum type { TYPEM, TYPEN, TYPEO }; @@ -528,6 +530,8 @@ static int aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv, u8 fan_tach_ch_source, type, mode, both; int ret; + mutex_lock(&priv->tach_lock); + regmap_write(priv->regmap, ASPEED_PTCR_TRIGGER, 0); regmap_write(priv->regmap, ASPEED_PTCR_TRIGGER, 0x1 << fan_tach_ch); @@ -545,6 +549,8 @@ static int aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv, ASPEED_RPM_STATUS_SLEEP_USEC, usec); + mutex_unlock(&priv->tach_lock); + /* return -ETIMEDOUT if we didn't get an answer. */ if (ret) return ret; @@ -904,6 +910,7 @@ static int aspeed_pwm_tacho_probe(struct platform_device *pdev) priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; + mutex_init(&priv->tach_lock); priv->regmap = devm_regmap_init(dev, NULL, (__force void *)regs, &aspeed_pwm_tacho_regmap_config); if (IS_ERR(priv->regmap)) diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 09aab5859..c0aa6bfa6 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -40,7 +40,7 @@ MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius"); #define PKG_SYSFS_ATTR_NO 1 /* Sysfs attribute for package temp */ #define BASE_SYSFS_ATTR_NO 2 /* Sysfs Base attr no for coretemp */ -#define NUM_REAL_CORES 128 /* Number of Real cores per cpu */ +#define NUM_REAL_CORES 512 /* Number of Real cores per cpu */ #define CORETEMP_NAME_LENGTH 28 /* String Length of attrs */ #define MAX_CORE_ATTRS 4 /* Maximum no of basic attrs */ #define TOTAL_ATTRS (MAX_CORE_ATTRS + 1) @@ -380,7 +380,7 @@ static int get_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) } static int create_core_attrs(struct temp_data *tdata, struct device *dev, - int attr_no) + int index) { int i; static ssize_t (*const rd_ptr[TOTAL_ATTRS]) (struct device *dev, @@ -392,13 +392,20 @@ static int create_core_attrs(struct temp_data *tdata, struct device *dev, }; for (i = 0; i < tdata->attr_size; i++) { + /* + * We map the attr number to core id of the CPU + * The attr number is always core id + 2 + * The Pkgtemp will always show up as temp1_*, if available + */ + int attr_no = tdata->is_pkg_data ? 1 : tdata->cpu_core_id + 2; + snprintf(tdata->attr_name[i], CORETEMP_NAME_LENGTH, "temp%d_%s", attr_no, suffixes[i]); sysfs_attr_init(&tdata->sd_attrs[i].dev_attr.attr); tdata->sd_attrs[i].dev_attr.attr.name = tdata->attr_name[i]; tdata->sd_attrs[i].dev_attr.attr.mode = 0444; tdata->sd_attrs[i].dev_attr.show = rd_ptr[i]; - tdata->sd_attrs[i].index = attr_no; + tdata->sd_attrs[i].index = index; tdata->attrs[i] = &tdata->sd_attrs[i].dev_attr.attr; } tdata->attr_group.attrs = tdata->attrs; @@ -456,27 +463,22 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu, struct platform_data *pdata = platform_get_drvdata(pdev); struct cpuinfo_x86 *c = &cpu_data(cpu); u32 eax, edx; - int err, index, attr_no; + int err, index; /* - * Find attr number for sysfs: - * We map the attr number to core id of the CPU - * The attr number is always core id + 2 - * The Pkgtemp will always show up as temp1_*, if available + * Get the index of tdata in pdata->core_data[] + * tdata for package: pdata->core_data[1] + * tdata for core: pdata->core_data[2] .. pdata->core_data[NUM_REAL_CORES + 1] */ if (pkg_flag) { - attr_no = PKG_SYSFS_ATTR_NO; + index = PKG_SYSFS_ATTR_NO; } else { - index = ida_alloc(&pdata->ida, GFP_KERNEL); + index = ida_alloc_max(&pdata->ida, NUM_REAL_CORES - 1, GFP_KERNEL); if (index < 0) return index; - pdata->cpu_map[index] = topology_core_id(cpu); - attr_no = index + BASE_SYSFS_ATTR_NO; - } - if (attr_no > MAX_CORE_DATA - 1) { - err = -ERANGE; - goto ida_free; + pdata->cpu_map[index] = topology_core_id(cpu); + index += BASE_SYSFS_ATTR_NO; } tdata = init_temp_data(cpu, pkg_flag); @@ -508,20 +510,20 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu, } } - pdata->core_data[attr_no] = tdata; + pdata->core_data[index] = tdata; /* Create sysfs interfaces */ - err = create_core_attrs(tdata, pdata->hwmon_dev, attr_no); + err = create_core_attrs(tdata, pdata->hwmon_dev, index); if (err) goto exit_free; return 0; exit_free: - pdata->core_data[attr_no] = NULL; + pdata->core_data[index] = NULL; kfree(tdata); ida_free: if (!pkg_flag) - ida_free(&pdata->ida, index); + ida_free(&pdata->ida, index - BASE_SYSFS_ATTR_NO); return err; } diff --git a/drivers/hwmon/nct6775-core.c b/drivers/hwmon/nct6775-core.c index 80310845f..9720ad214 100644 --- a/drivers/hwmon/nct6775-core.c +++ b/drivers/hwmon/nct6775-core.c @@ -2462,6 +2462,13 @@ store_pwm(struct device *dev, struct device_attribute *attr, const char *buf, int err; u16 reg; + /* + * The fan control mode should be set to manual if the user wants to adjust + * the fan speed. Otherwise, it will fail to set. + */ + if (index == 0 && data->pwm_enable[nr] > manual) + return -EBUSY; + err = kstrtoul(buf, 10, &val); if (err < 0) return err; diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile index e73cdb1d2..784a80327 100644 --- a/drivers/i2c/busses/Makefile +++ b/drivers/i2c/busses/Makefile @@ -89,10 +89,8 @@ obj-$(CONFIG_I2C_NPCM) += i2c-npcm7xx.o obj-$(CONFIG_I2C_OCORES) += i2c-ocores.o obj-$(CONFIG_I2C_OMAP) += i2c-omap.o obj-$(CONFIG_I2C_OWL) += i2c-owl.o -i2c-pasemi-objs := i2c-pasemi-core.o i2c-pasemi-pci.o -obj-$(CONFIG_I2C_PASEMI) += i2c-pasemi.o -i2c-apple-objs := i2c-pasemi-core.o i2c-pasemi-platform.o -obj-$(CONFIG_I2C_APPLE) += i2c-apple.o +obj-$(CONFIG_I2C_PASEMI) += i2c-pasemi-core.o i2c-pasemi-pci.o +obj-$(CONFIG_I2C_APPLE) += i2c-pasemi-core.o i2c-pasemi-platform.o obj-$(CONFIG_I2C_PCA_PLATFORM) += i2c-pca-platform.o obj-$(CONFIG_I2C_PNX) += i2c-pnx.o obj-$(CONFIG_I2C_PXA) += i2c-pxa.o diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 3159ffbb7..9a4e9bf30 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -500,11 +500,10 @@ static int i801_block_transaction_by_block(struct i801_priv *priv, /* Set block buffer mode */ outb_p(inb_p(SMBAUXCTL(priv)) | SMBAUXCTL_E32B, SMBAUXCTL(priv)); - inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */ - if (read_write == I2C_SMBUS_WRITE) { len = data->block[0]; outb_p(len, SMBHSTDAT0(priv)); + inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */ for (i = 0; i < len; i++) outb_p(data->block[i+1], SMBBLKDAT(priv)); } @@ -520,6 +519,7 @@ static int i801_block_transaction_by_block(struct i801_priv *priv, return -EPROTO; data->block[0] = len; + inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */ for (i = 0; i < len; i++) data->block[i + 1] = inb_p(SMBBLKDAT(priv)); } diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index fc70920c4..0c203c614 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -804,6 +804,11 @@ static irqreturn_t i2c_imx_slave_handle(struct imx_i2c_struct *i2c_imx, ctl &= ~I2CR_MTX; imx_i2c_write_reg(ctl, i2c_imx, IMX_I2C_I2CR); imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR); + + /* flag the last byte as processed */ + i2c_imx_slave_event(i2c_imx, + I2C_SLAVE_READ_PROCESSED, &value); + i2c_imx_slave_finish_op(i2c_imx); return IRQ_HANDLED; } diff --git a/drivers/i2c/busses/i2c-pasemi-core.c b/drivers/i2c/busses/i2c-pasemi-core.c index 9028ffb58..f297e4135 100644 --- a/drivers/i2c/busses/i2c-pasemi-core.c +++ b/drivers/i2c/busses/i2c-pasemi-core.c @@ -356,3 +356,8 @@ int pasemi_i2c_common_probe(struct pasemi_smbus *smbus) return 0; } +EXPORT_SYMBOL_GPL(pasemi_i2c_common_probe); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Olof Johansson <olof@lixom.net>"); +MODULE_DESCRIPTION("PA Semi PWRficient SMBus driver"); diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 8fce98bb7..75b9c3f26 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -605,20 +605,20 @@ static int geni_i2c_gpi_xfer(struct geni_i2c_dev *gi2c, struct i2c_msg msgs[], i peripheral.addr = msgs[i].addr; + ret = geni_i2c_gpi(gi2c, &msgs[i], &config, + &tx_addr, &tx_buf, I2C_WRITE, gi2c->tx_c); + if (ret) + goto err; + if (msgs[i].flags & I2C_M_RD) { ret = geni_i2c_gpi(gi2c, &msgs[i], &config, &rx_addr, &rx_buf, I2C_READ, gi2c->rx_c); if (ret) goto err; - } - - ret = geni_i2c_gpi(gi2c, &msgs[i], &config, - &tx_addr, &tx_buf, I2C_WRITE, gi2c->tx_c); - if (ret) - goto err; - if (msgs[i].flags & I2C_M_RD) dma_async_issue_pending(gi2c->rx_c); + } + dma_async_issue_pending(gi2c->tx_c); timeout = wait_for_completion_timeout(&gi2c->done, XFER_TIMEOUT); diff --git a/drivers/i3c/master/i3c-master-cdns.c b/drivers/i3c/master/i3c-master-cdns.c index 4a49c75a9..b9cfda6ae 100644 --- a/drivers/i3c/master/i3c-master-cdns.c +++ b/drivers/i3c/master/i3c-master-cdns.c @@ -77,7 +77,8 @@ #define PRESCL_CTRL0 0x14 #define PRESCL_CTRL0_I2C(x) ((x) << 16) #define PRESCL_CTRL0_I3C(x) (x) -#define PRESCL_CTRL0_MAX GENMASK(9, 0) +#define PRESCL_CTRL0_I3C_MAX GENMASK(9, 0) +#define PRESCL_CTRL0_I2C_MAX GENMASK(15, 0) #define PRESCL_CTRL1 0x18 #define PRESCL_CTRL1_PP_LOW_MASK GENMASK(15, 8) @@ -1234,7 +1235,7 @@ static int cdns_i3c_master_bus_init(struct i3c_master_controller *m) return -EINVAL; pres = DIV_ROUND_UP(sysclk_rate, (bus->scl_rate.i3c * 4)) - 1; - if (pres > PRESCL_CTRL0_MAX) + if (pres > PRESCL_CTRL0_I3C_MAX) return -ERANGE; bus->scl_rate.i3c = sysclk_rate / ((pres + 1) * 4); @@ -1247,7 +1248,7 @@ static int cdns_i3c_master_bus_init(struct i3c_master_controller *m) max_i2cfreq = bus->scl_rate.i2c; pres = (sysclk_rate / (max_i2cfreq * 5)) - 1; - if (pres > PRESCL_CTRL0_MAX) + if (pres > PRESCL_CTRL0_I2C_MAX) return -ERANGE; bus->scl_rate.i2c = sysclk_rate / ((pres + 1) * 5); diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig index ffac66db7..1f34747a6 100644 --- a/drivers/iio/accel/Kconfig +++ b/drivers/iio/accel/Kconfig @@ -219,10 +219,12 @@ config BMA400 config BMA400_I2C tristate + select REGMAP_I2C depends on BMA400 config BMA400_SPI tristate + select REGMAP_SPI depends on BMA400 config BMC150_ACCEL diff --git a/drivers/iio/imu/bno055/Kconfig b/drivers/iio/imu/bno055/Kconfig index 83e53acfb..c7f5866a1 100644 --- a/drivers/iio/imu/bno055/Kconfig +++ b/drivers/iio/imu/bno055/Kconfig @@ -8,6 +8,7 @@ config BOSCH_BNO055 config BOSCH_BNO055_SERIAL tristate "Bosch BNO055 attached via UART" depends on SERIAL_DEV_BUS + select REGMAP select BOSCH_BNO055 help Enable this to support Bosch BNO055 IMUs attached via UART. diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index c9614982c..a2f8278f0 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -1601,10 +1601,13 @@ static int iio_device_register_sysfs(struct iio_dev *indio_dev) ret = iio_device_register_sysfs_group(indio_dev, &iio_dev_opaque->chan_attr_group); if (ret) - goto error_clear_attrs; + goto error_free_chan_attrs; return 0; +error_free_chan_attrs: + kfree(iio_dev_opaque->chan_attr_group.attrs); + iio_dev_opaque->chan_attr_group.attrs = NULL; error_clear_attrs: iio_free_chan_devattr_list(&iio_dev_opaque->channel_attr_list); diff --git a/drivers/iio/light/hid-sensor-als.c b/drivers/iio/light/hid-sensor-als.c index 5a1a625d8..85097b769 100644 --- a/drivers/iio/light/hid-sensor-als.c +++ b/drivers/iio/light/hid-sensor-als.c @@ -228,6 +228,7 @@ static int als_capture_sample(struct hid_sensor_hub_device *hsdev, case HID_USAGE_SENSOR_TIME_TIMESTAMP: als_state->timestamp = hid_sensor_convert_timestamp(&als_state->common_attributes, *(s64 *)raw_data); + ret = 0; break; default: break; diff --git a/drivers/iio/magnetometer/rm3100-core.c b/drivers/iio/magnetometer/rm3100-core.c index 699382044..42b70cd42 100644 --- a/drivers/iio/magnetometer/rm3100-core.c +++ b/drivers/iio/magnetometer/rm3100-core.c @@ -530,6 +530,7 @@ int rm3100_common_probe(struct device *dev, struct regmap *regmap, int irq) struct rm3100_data *data; unsigned int tmp; int ret; + int samp_rate_index; indio_dev = devm_iio_device_alloc(dev, sizeof(*data)); if (!indio_dev) @@ -586,9 +587,14 @@ int rm3100_common_probe(struct device *dev, struct regmap *regmap, int irq) ret = regmap_read(regmap, RM3100_REG_TMRC, &tmp); if (ret < 0) return ret; + + samp_rate_index = tmp - RM3100_TMRC_OFFSET; + if (samp_rate_index < 0 || samp_rate_index >= RM3100_SAMP_NUM) { + dev_err(dev, "The value read from RM3100_REG_TMRC is invalid!\n"); + return -EINVAL; + } /* Initializing max wait time, which is double conversion time. */ - data->conversion_time = rm3100_samp_rates[tmp - RM3100_TMRC_OFFSET][2] - * 2; + data->conversion_time = rm3100_samp_rates[samp_rate_index][2] * 2; /* Cycle count values may not be what we want. */ if ((tmp - RM3100_TMRC_OFFSET) == 0) diff --git a/drivers/infiniband/core/cm_trace.h b/drivers/infiniband/core/cm_trace.h index e9d282679..944d90712 100644 --- a/drivers/infiniband/core/cm_trace.h +++ b/drivers/infiniband/core/cm_trace.h @@ -16,7 +16,7 @@ #include <linux/tracepoint.h> #include <rdma/ib_cm.h> -#include <trace/events/rdma.h> +#include <trace/misc/rdma.h> /* * enum ib_cm_state, from include/rdma/ib_cm.h diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 0773ca7ac..067d7f428 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -3547,121 +3547,6 @@ err: return ret; } -static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, - const struct sockaddr *dst_addr) -{ - struct sockaddr_storage zero_sock = {}; - - if (src_addr && src_addr->sa_family) - return rdma_bind_addr(id, src_addr); - - /* - * When the src_addr is not specified, automatically supply an any addr - */ - zero_sock.ss_family = dst_addr->sa_family; - if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) { - struct sockaddr_in6 *src_addr6 = - (struct sockaddr_in6 *)&zero_sock; - struct sockaddr_in6 *dst_addr6 = - (struct sockaddr_in6 *)dst_addr; - - src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; - if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) - id->route.addr.dev_addr.bound_dev_if = - dst_addr6->sin6_scope_id; - } else if (dst_addr->sa_family == AF_IB) { - ((struct sockaddr_ib *)&zero_sock)->sib_pkey = - ((struct sockaddr_ib *)dst_addr)->sib_pkey; - } - return rdma_bind_addr(id, (struct sockaddr *)&zero_sock); -} - -/* - * If required, resolve the source address for bind and leave the id_priv in - * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior - * calls made by ULP, a previously bound ID will not be re-bound and src_addr is - * ignored. - */ -static int resolve_prepare_src(struct rdma_id_private *id_priv, - struct sockaddr *src_addr, - const struct sockaddr *dst_addr) -{ - int ret; - - memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); - if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { - /* For a well behaved ULP state will be RDMA_CM_IDLE */ - ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr); - if (ret) - goto err_dst; - if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, - RDMA_CM_ADDR_QUERY))) { - ret = -EINVAL; - goto err_dst; - } - } - - if (cma_family(id_priv) != dst_addr->sa_family) { - ret = -EINVAL; - goto err_state; - } - return 0; - -err_state: - cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); -err_dst: - memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); - return ret; -} - -int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, - const struct sockaddr *dst_addr, unsigned long timeout_ms) -{ - struct rdma_id_private *id_priv = - container_of(id, struct rdma_id_private, id); - int ret; - - ret = resolve_prepare_src(id_priv, src_addr, dst_addr); - if (ret) - return ret; - - if (cma_any_addr(dst_addr)) { - ret = cma_resolve_loopback(id_priv); - } else { - if (dst_addr->sa_family == AF_IB) { - ret = cma_resolve_ib_addr(id_priv); - } else { - /* - * The FSM can return back to RDMA_CM_ADDR_BOUND after - * rdma_resolve_ip() is called, eg through the error - * path in addr_handler(). If this happens the existing - * request must be canceled before issuing a new one. - * Since canceling a request is a bit slow and this - * oddball path is rare, keep track once a request has - * been issued. The track turns out to be a permanent - * state since this is the only cancel as it is - * immediately before rdma_resolve_ip(). - */ - if (id_priv->used_resolve_ip) - rdma_addr_cancel(&id->route.addr.dev_addr); - else - id_priv->used_resolve_ip = 1; - ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr, - &id->route.addr.dev_addr, - timeout_ms, addr_handler, - false, id_priv); - } - } - if (ret) - goto err; - - return 0; -err: - cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); - return ret; -} -EXPORT_SYMBOL(rdma_resolve_addr); - int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) { struct rdma_id_private *id_priv; @@ -4064,27 +3949,26 @@ err: } EXPORT_SYMBOL(rdma_listen); -int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) +static int rdma_bind_addr_dst(struct rdma_id_private *id_priv, + struct sockaddr *addr, const struct sockaddr *daddr) { - struct rdma_id_private *id_priv; + struct sockaddr *id_daddr; int ret; - struct sockaddr *daddr; if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 && addr->sa_family != AF_IB) return -EAFNOSUPPORT; - id_priv = container_of(id, struct rdma_id_private, id); if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND)) return -EINVAL; - ret = cma_check_linklocal(&id->route.addr.dev_addr, addr); + ret = cma_check_linklocal(&id_priv->id.route.addr.dev_addr, addr); if (ret) goto err1; memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr)); if (!cma_any_addr(addr)) { - ret = cma_translate_addr(addr, &id->route.addr.dev_addr); + ret = cma_translate_addr(addr, &id_priv->id.route.addr.dev_addr); if (ret) goto err1; @@ -4104,8 +3988,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) } #endif } - daddr = cma_dst_addr(id_priv); - daddr->sa_family = addr->sa_family; + id_daddr = cma_dst_addr(id_priv); + if (daddr != id_daddr) + memcpy(id_daddr, daddr, rdma_addr_size(addr)); + id_daddr->sa_family = addr->sa_family; ret = cma_get_port(id_priv); if (ret) @@ -4121,6 +4007,129 @@ err1: cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); return ret; } + +static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, + const struct sockaddr *dst_addr) +{ + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); + struct sockaddr_storage zero_sock = {}; + + if (src_addr && src_addr->sa_family) + return rdma_bind_addr_dst(id_priv, src_addr, dst_addr); + + /* + * When the src_addr is not specified, automatically supply an any addr + */ + zero_sock.ss_family = dst_addr->sa_family; + if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) { + struct sockaddr_in6 *src_addr6 = + (struct sockaddr_in6 *)&zero_sock; + struct sockaddr_in6 *dst_addr6 = + (struct sockaddr_in6 *)dst_addr; + + src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; + if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) + id->route.addr.dev_addr.bound_dev_if = + dst_addr6->sin6_scope_id; + } else if (dst_addr->sa_family == AF_IB) { + ((struct sockaddr_ib *)&zero_sock)->sib_pkey = + ((struct sockaddr_ib *)dst_addr)->sib_pkey; + } + return rdma_bind_addr_dst(id_priv, (struct sockaddr *)&zero_sock, dst_addr); +} + +/* + * If required, resolve the source address for bind and leave the id_priv in + * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior + * calls made by ULP, a previously bound ID will not be re-bound and src_addr is + * ignored. + */ +static int resolve_prepare_src(struct rdma_id_private *id_priv, + struct sockaddr *src_addr, + const struct sockaddr *dst_addr) +{ + int ret; + + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { + /* For a well behaved ULP state will be RDMA_CM_IDLE */ + ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr); + if (ret) + return ret; + if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, + RDMA_CM_ADDR_QUERY))) + return -EINVAL; + + } else { + memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); + } + + if (cma_family(id_priv) != dst_addr->sa_family) { + ret = -EINVAL; + goto err_state; + } + return 0; + +err_state: + cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); + return ret; +} + +int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, + const struct sockaddr *dst_addr, unsigned long timeout_ms) +{ + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); + int ret; + + ret = resolve_prepare_src(id_priv, src_addr, dst_addr); + if (ret) + return ret; + + if (cma_any_addr(dst_addr)) { + ret = cma_resolve_loopback(id_priv); + } else { + if (dst_addr->sa_family == AF_IB) { + ret = cma_resolve_ib_addr(id_priv); + } else { + /* + * The FSM can return back to RDMA_CM_ADDR_BOUND after + * rdma_resolve_ip() is called, eg through the error + * path in addr_handler(). If this happens the existing + * request must be canceled before issuing a new one. + * Since canceling a request is a bit slow and this + * oddball path is rare, keep track once a request has + * been issued. The track turns out to be a permanent + * state since this is the only cancel as it is + * immediately before rdma_resolve_ip(). + */ + if (id_priv->used_resolve_ip) + rdma_addr_cancel(&id->route.addr.dev_addr); + else + id_priv->used_resolve_ip = 1; + ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr, + &id->route.addr.dev_addr, + timeout_ms, addr_handler, + false, id_priv); + } + } + if (ret) + goto err; + + return 0; +err: + cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); + return ret; +} +EXPORT_SYMBOL(rdma_resolve_addr); + +int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) +{ + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); + + return rdma_bind_addr_dst(id_priv, addr, cma_dst_addr(id_priv)); +} EXPORT_SYMBOL(rdma_bind_addr); static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv) diff --git a/drivers/infiniband/core/cma_trace.h b/drivers/infiniband/core/cma_trace.h index e45264267..47f3c6e4b 100644 --- a/drivers/infiniband/core/cma_trace.h +++ b/drivers/infiniband/core/cma_trace.h @@ -15,7 +15,7 @@ #define _TRACE_RDMA_CMA_H #include <linux/tracepoint.h> -#include <trace/events/rdma.h> +#include <trace/misc/rdma.h> DECLARE_EVENT_CLASS(cma_fsm_class, diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index d96c78e43..5c284dfbe 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -131,6 +131,11 @@ struct ib_umad_packet { struct ib_user_mad mad; }; +struct ib_rmpp_mad_hdr { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; +} __packed; + #define CREATE_TRACE_POINTS #include <trace/events/ib_umad.h> @@ -494,11 +499,11 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct ib_umad_file *file = filp->private_data; + struct ib_rmpp_mad_hdr *rmpp_mad_hdr; struct ib_umad_packet *packet; struct ib_mad_agent *agent; struct rdma_ah_attr ah_attr; struct ib_ah *ah; - struct ib_rmpp_mad *rmpp_mad; __be64 *tid; int ret, data_len, hdr_len, copy_offset, rmpp_active; u8 base_version; @@ -506,7 +511,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, if (count < hdr_size(file) + IB_MGMT_RMPP_HDR) return -EINVAL; - packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL); + packet = kzalloc(sizeof(*packet) + IB_MGMT_RMPP_HDR, GFP_KERNEL); if (!packet) return -ENOMEM; @@ -560,13 +565,13 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, goto err_up; } - rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data; - hdr_len = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class); + rmpp_mad_hdr = (struct ib_rmpp_mad_hdr *)packet->mad.data; + hdr_len = ib_get_mad_data_offset(rmpp_mad_hdr->mad_hdr.mgmt_class); - if (ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class) + if (ib_is_mad_class_rmpp(rmpp_mad_hdr->mad_hdr.mgmt_class) && ib_mad_kernel_rmpp_agent(agent)) { copy_offset = IB_MGMT_RMPP_HDR; - rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & + rmpp_active = ib_get_rmpp_flags(&rmpp_mad_hdr->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE; } else { copy_offset = IB_MGMT_MAD_HDR; @@ -615,12 +620,12 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid; *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | (be64_to_cpup(tid) & 0xffffffff)); - rmpp_mad->mad_hdr.tid = *tid; + rmpp_mad_hdr->mad_hdr.tid = *tid; } if (!ib_mad_kernel_rmpp_agent(agent) - && ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class) - && (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) { + && ib_is_mad_class_rmpp(rmpp_mad_hdr->mad_hdr.mgmt_class) + && (ib_get_rmpp_flags(&rmpp_mad_hdr->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) { spin_lock_irq(&file->send_lock); list_add_tail(&packet->list, &file->send_list); spin_unlock_irq(&file->send_lock); diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 4ed8814ef..6ed056874 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1710,7 +1710,7 @@ int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr, switch (srq_attr_mask) { case IB_SRQ_MAX_WR: /* SRQ resize is not supported */ - break; + return -EINVAL; case IB_SRQ_LIMIT: /* Change the SRQ threshold */ if (srq_attr->srq_limit > srq->qplib_srq.max_wqe) @@ -1725,13 +1725,12 @@ int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr, /* On success, update the shadow */ srq->srq_limit = srq_attr->srq_limit; /* No need to Build and send response back to udata */ - break; + return 0; default: ibdev_err(&rdev->ibdev, "Unsupported srq_attr_mask 0x%x", srq_attr_mask); return -EINVAL; } - return 0; } int bnxt_re_query_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr) diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 51ae58c02..802b0e580 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -2089,7 +2089,7 @@ int init_credit_return(struct hfi1_devdata *dd) "Unable to allocate credit return DMA range for NUMA %d\n", i); ret = -ENOMEM; - goto done; + goto free_cr_base; } } set_dev_node(&dd->pcidev->dev, dd->node); @@ -2097,6 +2097,10 @@ int init_credit_return(struct hfi1_devdata *dd) ret = 0; done: return ret; + +free_cr_base: + free_credit_return(dd); + goto done; } void free_credit_return(struct hfi1_devdata *dd) diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 26c621627..969c5c3ab 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -3158,7 +3158,7 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx) { int rval = 0; - if ((unlikely(tx->num_desc + 1 == tx->desc_limit))) { + if ((unlikely(tx->num_desc == tx->desc_limit))) { rval = _extend_sdma_tx_descs(dd, tx); if (rval) { __sdma_txclean(dd, tx); diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h index ad54260cb..ebe98fa2b 100644 --- a/drivers/infiniband/hw/irdma/defs.h +++ b/drivers/infiniband/hw/irdma/defs.h @@ -345,6 +345,7 @@ enum irdma_cqp_op_type { #define IRDMA_AE_LLP_TOO_MANY_KEEPALIVE_RETRIES 0x050b #define IRDMA_AE_LLP_DOUBT_REACHABILITY 0x050c #define IRDMA_AE_LLP_CONNECTION_ESTABLISHED 0x050e +#define IRDMA_AE_LLP_TOO_MANY_RNRS 0x050f #define IRDMA_AE_RESOURCE_EXHAUSTION 0x0520 #define IRDMA_AE_RESET_SENT 0x0601 #define IRDMA_AE_TERMINATE_SENT 0x0602 diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 311a1138e..918a2d783 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -379,6 +379,7 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) case IRDMA_AE_LLP_TOO_MANY_RETRIES: case IRDMA_AE_LCE_QP_CATASTROPHIC: case IRDMA_AE_LCE_FUNCTION_CATASTROPHIC: + case IRDMA_AE_LLP_TOO_MANY_RNRS: case IRDMA_AE_LCE_CQ_CATASTROPHIC: case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG: default: @@ -562,6 +563,13 @@ static void irdma_destroy_irq(struct irdma_pci_f *rf, dev->irq_ops->irdma_dis_irq(dev, msix_vec->idx); irq_update_affinity_hint(msix_vec->irq, NULL); free_irq(msix_vec->irq, dev_id); + if (rf == dev_id) { + tasklet_kill(&rf->dpc_tasklet); + } else { + struct irdma_ceq *iwceq = (struct irdma_ceq *)dev_id; + + tasklet_kill(&iwceq->dpc_tasklet); + } } /** diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 447e1bcc8..42c671f20 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -762,7 +762,9 @@ static int irdma_validate_qp_attrs(struct ib_qp_init_attr *init_attr, if (init_attr->cap.max_inline_data > uk_attrs->max_hw_inline || init_attr->cap.max_send_sge > uk_attrs->max_hw_wq_frags || - init_attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags) + init_attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags || + init_attr->cap.max_send_wr > uk_attrs->max_hw_wq_quanta || + init_attr->cap.max_recv_wr > uk_attrs->max_hw_rq_quanta) return -EINVAL; if (rdma_protocol_roce(&iwdev->ibdev, 1)) { @@ -2119,9 +2121,8 @@ static int irdma_create_cq(struct ib_cq *ibcq, info.cq_base_pa = iwcq->kmem.pa; } - if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) - info.shadow_read_threshold = min(info.cq_uk_init_info.cq_size / 2, - (u32)IRDMA_MAX_CQ_READ_THRESH); + info.shadow_read_threshold = min(info.cq_uk_init_info.cq_size / 2, + (u32)IRDMA_MAX_CQ_READ_THRESH); if (irdma_sc_cq_init(cq, &info)) { ibdev_dbg(&iwdev->ibdev, "VERBS: init cq fail\n"); @@ -2825,7 +2826,7 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, iwmr->ibmr.pd = pd; iwmr->ibmr.device = pd->device; iwmr->ibmr.iova = virt; - iwmr->page_size = PAGE_SIZE; + iwmr->page_size = SZ_4K; if (req.reg_type == IRDMA_MEMREG_TYPE_MEM) { iwmr->page_size = ib_umem_find_best_pgsz(region, @@ -2845,6 +2846,13 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, switch (req.reg_type) { case IRDMA_MEMREG_TYPE_QP: + /* iWarp: Catch page not starting on OS page boundary */ + if (!rdma_protocol_roce(&iwdev->ibdev, 1) && + ib_umem_offset(iwmr->region)) { + err = -EINVAL; + goto error; + } + total = req.sq_pages + req.rq_pages + shadow_pgcnt; if (total > iwmr->page_cnt) { err = -EINVAL; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index d745ce9dc..61755b5f3 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1879,8 +1879,17 @@ static int qedr_create_user_qp(struct qedr_dev *dev, /* RQ - read access only (0) */ rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr, ureq.rq_len, true, 0, alloc_and_init); - if (rc) + if (rc) { + ib_umem_release(qp->usq.umem); + qp->usq.umem = NULL; + if (rdma_protocol_roce(&dev->ibdev, 1)) { + qedr_free_pbl(dev, &qp->usq.pbl_info, + qp->usq.pbl_tbl); + } else { + kfree(qp->usq.pbl_tbl); + } return rc; + } } memset(&in_params, 0, sizeof(in_params)); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 5b3154503..319d4288e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -531,21 +531,18 @@ static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast) if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) rec.join_state = SENDONLY_FULLMEMBER_JOIN; } - spin_unlock_irq(&priv->lock); multicast = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port, - &rec, comp_mask, GFP_KERNEL, + &rec, comp_mask, GFP_ATOMIC, ipoib_mcast_join_complete, mcast); - spin_lock_irq(&priv->lock); if (IS_ERR(multicast)) { ret = PTR_ERR(multicast); ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret); /* Requeue this join task with a backoff delay */ __ipoib_mcast_schedule_join_thread(priv, mcast, 1); clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); - spin_unlock_irq(&priv->lock); complete(&mcast->done); - spin_lock_irq(&priv->lock); + return ret; } return 0; } diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 25e799dba..cffa93f11 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -79,12 +79,16 @@ module_param(srpt_srq_size, int, 0444); MODULE_PARM_DESC(srpt_srq_size, "Shared receive queue (SRQ) size."); +static int srpt_set_u64_x(const char *buffer, const struct kernel_param *kp) +{ + return kstrtou64(buffer, 16, (u64 *)kp->arg); +} static int srpt_get_u64_x(char *buffer, const struct kernel_param *kp) { return sprintf(buffer, "0x%016llx\n", *(u64 *)kp->arg); } -module_param_call(srpt_service_guid, NULL, srpt_get_u64_x, &srpt_service_guid, - 0444); +module_param_call(srpt_service_guid, srpt_set_u64_x, srpt_get_u64_x, + &srpt_service_guid, 0444); MODULE_PARM_DESC(srpt_service_guid, "Using this value for ioc_guid, id_ext, and cm_listen_id instead of using the node_guid of the first HCA."); @@ -210,10 +214,12 @@ static const char *get_ch_state_name(enum rdma_ch_state s) /** * srpt_qp_event - QP event callback function * @event: Description of the event that occurred. - * @ch: SRPT RDMA channel. + * @ptr: SRPT RDMA channel. */ -static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch) +static void srpt_qp_event(struct ib_event *event, void *ptr) { + struct srpt_rdma_ch *ch = ptr; + pr_debug("QP event %d on ch=%p sess_name=%s-%d state=%s\n", event->event, ch, ch->sess_name, ch->qp->qp_num, get_ch_state_name(ch->state)); @@ -1807,8 +1813,7 @@ retry: ch->cq_size = ch->rq_size + sq_size; qp_init->qp_context = (void *)ch; - qp_init->event_handler - = (void(*)(struct ib_event *, void*))srpt_qp_event; + qp_init->event_handler = srpt_qp_event; qp_init->send_cq = ch->cq; qp_init->recv_cq = ch->cq; qp_init->sq_sig_type = IB_SIGNAL_REQ_WR; diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index e8011d70d..13c36f51b 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -294,6 +294,7 @@ static const struct xpad_device { { 0x1689, 0xfd00, "Razer Onza Tournament Edition", 0, XTYPE_XBOX360 }, { 0x1689, 0xfd01, "Razer Onza Classic Edition", 0, XTYPE_XBOX360 }, { 0x1689, 0xfe00, "Razer Sabertooth", 0, XTYPE_XBOX360 }, + { 0x17ef, 0x6182, "Lenovo Legion Controller for Windows", 0, XTYPE_XBOX360 }, { 0x1949, 0x041a, "Amazon Game Controller", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, @@ -489,6 +490,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x15e4), /* Numark X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x162e), /* Joytech X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x1689), /* Razer Onza */ + XPAD_XBOX360_VENDOR(0x17ef), /* Lenovo */ XPAD_XBOX360_VENDOR(0x1949), /* Amazon controllers */ XPAD_XBOX360_VENDOR(0x1bad), /* Harminix Rock Band Guitar and Drums */ XPAD_XBOX360_VENDOR(0x20d6), /* PowerA Controllers */ @@ -562,6 +564,9 @@ struct xboxone_init_packet { #define GIP_MOTOR_LT BIT(3) #define GIP_MOTOR_ALL (GIP_MOTOR_R | GIP_MOTOR_L | GIP_MOTOR_RT | GIP_MOTOR_LT) +#define GIP_WIRED_INTF_DATA 0 +#define GIP_WIRED_INTF_AUDIO 1 + /* * This packet is required for all Xbox One pads with 2015 * or later firmware installed (or present from the factory). @@ -2006,7 +2011,7 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id } if (xpad->xtype == XTYPE_XBOXONE && - intf->cur_altsetting->desc.bInterfaceNumber != 0) { + intf->cur_altsetting->desc.bInterfaceNumber != GIP_WIRED_INTF_DATA) { /* * The Xbox One controller lists three interfaces all with the * same interface class, subclass and protocol. Differentiate by diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c index c4d8caade..661d6c8b0 100644 --- a/drivers/input/keyboard/atkbd.c +++ b/drivers/input/keyboard/atkbd.c @@ -792,7 +792,6 @@ static int atkbd_probe(struct atkbd *atkbd) { struct ps2dev *ps2dev = &atkbd->ps2dev; unsigned char param[2]; - bool skip_getid; /* * Some systems, where the bit-twiddling when testing the io-lines of the @@ -806,6 +805,11 @@ static int atkbd_probe(struct atkbd *atkbd) "keyboard reset failed on %s\n", ps2dev->serio->phys); + if (atkbd_skip_getid(atkbd)) { + atkbd->id = 0xab83; + return 0; + } + /* * Then we check the keyboard ID. We should get 0xab83 under normal conditions. * Some keyboards report different values, but the first byte is always 0xab or @@ -814,18 +818,17 @@ static int atkbd_probe(struct atkbd *atkbd) */ param[0] = param[1] = 0xa5; /* initialize with invalid values */ - skip_getid = atkbd_skip_getid(atkbd); - if (skip_getid || ps2_command(ps2dev, param, ATKBD_CMD_GETID)) { + if (ps2_command(ps2dev, param, ATKBD_CMD_GETID)) { /* - * If the get ID command was skipped or failed, we check if we can at least set + * If the get ID command failed, we check if we can at least set * the LEDs on the keyboard. This should work on every keyboard out there. * It also turns the LEDs off, which we want anyway. */ param[0] = 0; if (ps2_command(ps2dev, param, ATKBD_CMD_SETLEDS)) return -1; - atkbd->id = skip_getid ? 0xab83 : 0xabba; + atkbd->id = 0xabba; return 0; } diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index b585b1dab..dfc6c5818 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -635,6 +635,14 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .driver_data = (void *)(SERIO_QUIRK_NOAUX) }, { + /* Fujitsu Lifebook U728 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK U728"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOAUX) + }, + { /* Gigabyte M912 */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"), @@ -1210,6 +1218,12 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { }, { .matches = { + DMI_MATCH(DMI_BOARD_NAME, "NS5x_7xPU"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOAUX) + }, + { + .matches = { DMI_MATCH(DMI_BOARD_NAME, "NJ50_70CU"), }, .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index 3f0732db7..6de64b3f9 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -884,7 +884,8 @@ static int goodix_add_acpi_gpio_mappings(struct goodix_ts_data *ts) } } - if (ts->gpio_count == 2 && ts->gpio_int_idx == 0) { + /* Some devices with gpio_int_idx 0 list a third unused GPIO */ + if ((ts->gpio_count == 2 || ts->gpio_count == 3) && ts->gpio_int_idx == 0) { ts->irq_pin_access_method = IRQ_PIN_ACCESS_ACPI_GPIO; gpio_mapping = acpi_goodix_int_first_gpios; } else if (ts->gpio_count == 2 && ts->gpio_int_idx == 1) { diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index 1d9494f64..4526ff2e1 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -29,7 +29,6 @@ static LIST_HEAD(icc_providers); static int providers_count; static bool synced_state; static DEFINE_MUTEX(icc_lock); -static DEFINE_MUTEX(icc_bw_lock); static struct dentry *icc_debugfs_dir; static void icc_summary_show_one(struct seq_file *s, struct icc_node *n) @@ -636,7 +635,7 @@ int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw) if (WARN_ON(IS_ERR(path) || !path->num_nodes)) return -EINVAL; - mutex_lock(&icc_bw_lock); + mutex_lock(&icc_lock); old_avg = path->reqs[0].avg_bw; old_peak = path->reqs[0].peak_bw; @@ -668,7 +667,7 @@ int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw) apply_constraints(path); } - mutex_unlock(&icc_bw_lock); + mutex_unlock(&icc_lock); trace_icc_set_bw_end(path, ret); @@ -971,7 +970,6 @@ void icc_node_add(struct icc_node *node, struct icc_provider *provider) return; mutex_lock(&icc_lock); - mutex_lock(&icc_bw_lock); node->provider = provider; list_add_tail(&node->node_list, &provider->nodes); @@ -997,7 +995,6 @@ void icc_node_add(struct icc_node *node, struct icc_provider *provider) node->avg_bw = 0; node->peak_bw = 0; - mutex_unlock(&icc_bw_lock); mutex_unlock(&icc_lock); } EXPORT_SYMBOL_GPL(icc_node_add); @@ -1137,7 +1134,6 @@ void icc_sync_state(struct device *dev) return; mutex_lock(&icc_lock); - mutex_lock(&icc_bw_lock); synced_state = true; list_for_each_entry(p, &icc_providers, provider_list) { dev_dbg(p->dev, "interconnect provider is in synced state\n"); @@ -1150,21 +1146,13 @@ void icc_sync_state(struct device *dev) } } } - mutex_unlock(&icc_bw_lock); mutex_unlock(&icc_lock); } EXPORT_SYMBOL_GPL(icc_sync_state); static int __init icc_init(void) { - struct device_node *root; - - /* Teach lockdep about lock ordering wrt. shrinker: */ - fs_reclaim_acquire(GFP_KERNEL); - might_lock(&icc_bw_lock); - fs_reclaim_release(GFP_KERNEL); - - root = of_find_node_by_path("/"); + struct device_node *root = of_find_node_by_path("/"); providers_count = of_count_icc_providers(root); of_node_put(root); diff --git a/drivers/interconnect/qcom/sc8180x.c b/drivers/interconnect/qcom/sc8180x.c index 83461e317..d9ee193fb 100644 --- a/drivers/interconnect/qcom/sc8180x.c +++ b/drivers/interconnect/qcom/sc8180x.c @@ -1387,6 +1387,7 @@ static struct qcom_icc_bcm bcm_mm0 = { static struct qcom_icc_bcm bcm_co0 = { .name = "CO0", + .keepalive = true, .num_nodes = 1, .nodes = { &slv_qns_cdsp_mem_noc } }; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 8966f7d5a..82f100e59 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -152,6 +152,18 @@ static void queue_inc_cons(struct arm_smmu_ll_queue *q) q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons); } +static void queue_sync_cons_ovf(struct arm_smmu_queue *q) +{ + struct arm_smmu_ll_queue *llq = &q->llq; + + if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons))) + return; + + llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) | + Q_IDX(llq, llq->cons); + queue_sync_cons_out(q); +} + static int queue_sync_prod_in(struct arm_smmu_queue *q) { u32 prod; @@ -1583,8 +1595,7 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) } while (!queue_empty(llq)); /* Sync our overflow flag, as we believe we're up to speed */ - llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) | - Q_IDX(llq, llq->cons); + queue_sync_cons_ovf(q); return IRQ_HANDLED; } @@ -1642,9 +1653,7 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev) } while (!queue_empty(llq)); /* Sync our overflow flag, as we believe we're up to speed */ - llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) | - Q_IDX(llq, llq->cons); - queue_sync_cons_out(q); + queue_sync_cons_ovf(q); return IRQ_HANDLED; } diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index d80065c81..f15dcb9e4 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -267,13 +267,27 @@ static int qcom_smmu_init_context(struct arm_smmu_domain *smmu_domain, static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu) { - unsigned int last_s2cr = ARM_SMMU_GR0_S2CR(smmu->num_mapping_groups - 1); struct qcom_smmu *qsmmu = to_qcom_smmu(smmu); + unsigned int last_s2cr; u32 reg; u32 smr; int i; /* + * Some platforms support more than the Arm SMMU architected maximum of + * 128 stream matching groups. For unknown reasons, the additional + * groups don't exhibit the same behavior as the architected registers, + * so limit the groups to 128 until the behavior is fixed for the other + * groups. + */ + if (smmu->num_mapping_groups > 128) { + dev_notice(smmu->dev, "\tLimiting the stream matching groups to 128\n"); + smmu->num_mapping_groups = 128; + } + + last_s2cr = ARM_SMMU_GR0_S2CR(smmu->num_mapping_groups - 1); + + /* * With some firmware versions writes to S2CR of type FAULT are * ignored, and writing BYPASS will end up written as FAULT in the * register. Perform a write to S2CR to detect if this is the case and diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index 8261066de..e4358393f 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -152,13 +152,6 @@ static struct iommu_domain *sprd_iommu_domain_alloc(unsigned int domain_type) return &dom->domain; } -static void sprd_iommu_domain_free(struct iommu_domain *domain) -{ - struct sprd_iommu_domain *dom = to_sprd_domain(domain); - - kfree(dom); -} - static void sprd_iommu_first_vpn(struct sprd_iommu_domain *dom) { struct sprd_iommu_device *sdev = dom->sdev; @@ -231,6 +224,28 @@ static void sprd_iommu_hw_en(struct sprd_iommu_device *sdev, bool en) sprd_iommu_update_bits(sdev, reg_cfg, mask, 0, val); } +static void sprd_iommu_cleanup(struct sprd_iommu_domain *dom) +{ + size_t pgt_size; + + /* Nothing need to do if the domain hasn't been attached */ + if (!dom->sdev) + return; + + pgt_size = sprd_iommu_pgt_size(&dom->domain); + dma_free_coherent(dom->sdev->dev, pgt_size, dom->pgt_va, dom->pgt_pa); + dom->sdev = NULL; + sprd_iommu_hw_en(dom->sdev, false); +} + +static void sprd_iommu_domain_free(struct iommu_domain *domain) +{ + struct sprd_iommu_domain *dom = to_sprd_domain(domain); + + sprd_iommu_cleanup(dom); + kfree(dom); +} + static int sprd_iommu_attach_device(struct iommu_domain *domain, struct device *dev) { diff --git a/drivers/irqchip/irq-brcmstb-l2.c b/drivers/irqchip/irq-brcmstb-l2.c index 091b0fe7e..5d4421f75 100644 --- a/drivers/irqchip/irq-brcmstb-l2.c +++ b/drivers/irqchip/irq-brcmstb-l2.c @@ -2,7 +2,7 @@ /* * Generic Broadcom Set Top Box Level 2 Interrupt controller driver * - * Copyright (C) 2014-2017 Broadcom + * Copyright (C) 2014-2024 Broadcom */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -113,6 +113,9 @@ static void brcmstb_l2_intc_irq_handle(struct irq_desc *desc) generic_handle_domain_irq(b->domain, irq); } while (status); out: + /* Don't ack parent before all device writes are done */ + wmb(); + chained_irq_exit(chip, desc); } diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 895688150..4d03fb3a8 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3161,6 +3161,7 @@ static void its_cpu_init_lpis(void) val |= GICR_CTLR_ENABLE_LPIS; writel_relaxed(val, rbase + GICR_CTLR); +out: if (gic_rdists->has_vlpis && !gic_rdists->has_rvpeid) { void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); @@ -3196,7 +3197,6 @@ static void its_cpu_init_lpis(void) /* Make sure the GIC has seen the above */ dsb(sy); -out: gic_data_rdist()->flags |= RD_LOCAL_LPI_ENABLED; pr_info("GICv3: CPU%d: using %s LPI pending table @%pa\n", smp_processor_id(), @@ -3805,8 +3805,9 @@ static int its_vpe_set_affinity(struct irq_data *d, bool force) { struct its_vpe *vpe = irq_data_get_irq_chip_data(d); - int from, cpu = cpumask_first(mask_val); + struct cpumask common, *table_mask; unsigned long flags; + int from, cpu; /* * Changing affinity is mega expensive, so let's be as lazy as @@ -3822,19 +3823,22 @@ static int its_vpe_set_affinity(struct irq_data *d, * taken on any vLPI handling path that evaluates vpe->col_idx. */ from = vpe_to_cpuid_lock(vpe, &flags); - if (from == cpu) - goto out; - - vpe->col_idx = cpu; + table_mask = gic_data_rdist_cpu(from)->vpe_table_mask; /* - * GICv4.1 allows us to skip VMOVP if moving to a cpu whose RD - * is sharing its VPE table with the current one. + * If we are offered another CPU in the same GICv4.1 ITS + * affinity, pick this one. Otherwise, any CPU will do. */ - if (gic_data_rdist_cpu(cpu)->vpe_table_mask && - cpumask_test_cpu(from, gic_data_rdist_cpu(cpu)->vpe_table_mask)) + if (table_mask && cpumask_and(&common, mask_val, table_mask)) + cpu = cpumask_test_cpu(from, &common) ? from : cpumask_first(&common); + else + cpu = cpumask_first(mask_val); + + if (from == cpu) goto out; + vpe->col_idx = cpu; + its_send_vmovp(vpe); its_vpe_db_proxy_move(vpe, from, cpu); diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c index 3d99b8bdd..de115ee6e 100644 --- a/drivers/irqchip/irq-loongson-eiointc.c +++ b/drivers/irqchip/irq-loongson-eiointc.c @@ -242,7 +242,7 @@ static int eiointc_domain_alloc(struct irq_domain *domain, unsigned int virq, int ret; unsigned int i, type; unsigned long hwirq = 0; - struct eiointc *priv = domain->host_data; + struct eiointc_priv *priv = domain->host_data; ret = irq_domain_translate_onecell(domain, arg, &hwirq, &type); if (ret) diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index 2f4784860..be5e19a86 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -144,7 +144,13 @@ static void plic_irq_eoi(struct irq_data *d) { struct plic_handler *handler = this_cpu_ptr(&plic_handlers); - writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); + if (unlikely(irqd_irq_disabled(d))) { + plic_toggle(handler, d->hwirq, 1); + writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); + plic_toggle(handler, d->hwirq, 0); + } else { + writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); + } } #ifdef CONFIG_SMP diff --git a/drivers/leds/trigger/ledtrig-panic.c b/drivers/leds/trigger/ledtrig-panic.c index 64abf2e91..5a6b21bfe 100644 --- a/drivers/leds/trigger/ledtrig-panic.c +++ b/drivers/leds/trigger/ledtrig-panic.c @@ -64,10 +64,13 @@ static long led_panic_blink(int state) static int __init ledtrig_panic_init(void) { + led_trigger_register_simple("panic", &trigger); + if (!trigger) + return -ENOMEM; + atomic_notifier_chain_register(&panic_notifier_list, &led_trigger_panic_nb); - led_trigger_register_simple("panic", &trigger); panic_blink = led_panic_blink; return 0; } diff --git a/drivers/mailbox/arm_mhuv2.c b/drivers/mailbox/arm_mhuv2.c index c6d4957c4..0ec21dcdb 100644 --- a/drivers/mailbox/arm_mhuv2.c +++ b/drivers/mailbox/arm_mhuv2.c @@ -553,7 +553,8 @@ static irqreturn_t mhuv2_sender_interrupt(int irq, void *data) priv = chan->con_priv; if (!IS_PROTOCOL_DOORBELL(priv)) { - writel_relaxed(1, &mhu->send->ch_wn[priv->ch_wn_idx + priv->windows - 1].int_clr); + for (i = 0; i < priv->windows; i++) + writel_relaxed(1, &mhu->send->ch_wn[priv->ch_wn_idx + i].int_clr); if (chan->cl) { mbox_chan_txdone(chan, 0); diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 71dcd8fd4..6314210d3 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -21,6 +21,8 @@ #include "dm-ima.h" #define DM_RESERVED_MAX_IOS 1024 +#define DM_MAX_TARGETS 1048576 +#define DM_MAX_TARGET_PARAMS 1024 struct dm_io; diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index ff515437d..3e215aa85 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -61,6 +61,8 @@ struct convert_context { struct skcipher_request *req; struct aead_request *req_aead; } r; + bool aead_recheck; + bool aead_failed; }; @@ -72,10 +74,8 @@ struct dm_crypt_io { struct bio *base_bio; u8 *integrity_metadata; bool integrity_metadata_from_pool:1; - bool in_tasklet:1; struct work_struct work; - struct tasklet_struct tasklet; struct convert_context ctx; @@ -83,6 +83,8 @@ struct dm_crypt_io { blk_status_t error; sector_t sector; + struct bvec_iter saved_bi_iter; + struct rb_node rb_node; } CRYPTO_MINALIGN_ATTR; @@ -1367,10 +1369,13 @@ static int crypt_convert_block_aead(struct crypt_config *cc, if (r == -EBADMSG) { sector_t s = le64_to_cpu(*sector); - DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", - ctx->bio_in->bi_bdev, s); - dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", - ctx->bio_in, s, 0); + ctx->aead_failed = true; + if (ctx->aead_recheck) { + DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", + ctx->bio_in->bi_bdev, s); + dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", + ctx->bio_in, s, 0); + } } if (!r && cc->iv_gen_ops && cc->iv_gen_ops->post) @@ -1726,10 +1731,11 @@ static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc, io->base_bio = bio; io->sector = sector; io->error = 0; + io->ctx.aead_recheck = false; + io->ctx.aead_failed = false; io->ctx.r.req = NULL; io->integrity_metadata = NULL; io->integrity_metadata_from_pool = false; - io->in_tasklet = false; atomic_set(&io->io_pending, 0); } @@ -1738,11 +1744,7 @@ static void crypt_inc_pending(struct dm_crypt_io *io) atomic_inc(&io->io_pending); } -static void kcryptd_io_bio_endio(struct work_struct *work) -{ - struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); - bio_endio(io->base_bio); -} +static void kcryptd_queue_read(struct dm_crypt_io *io); /* * One of the bios was finished. Check for completion of @@ -1757,6 +1759,15 @@ static void crypt_dec_pending(struct dm_crypt_io *io) if (!atomic_dec_and_test(&io->io_pending)) return; + if (likely(!io->ctx.aead_recheck) && unlikely(io->ctx.aead_failed) && + cc->on_disk_tag_size && bio_data_dir(base_bio) == READ) { + io->ctx.aead_recheck = true; + io->ctx.aead_failed = false; + io->error = 0; + kcryptd_queue_read(io); + return; + } + if (io->ctx.r.req) crypt_free_req(cc, io->ctx.r.req, base_bio); @@ -1767,20 +1778,6 @@ static void crypt_dec_pending(struct dm_crypt_io *io) base_bio->bi_status = error; - /* - * If we are running this function from our tasklet, - * we can't call bio_endio() here, because it will call - * clone_endio() from dm.c, which in turn will - * free the current struct dm_crypt_io structure with - * our tasklet. In this case we need to delay bio_endio() - * execution to after the tasklet is done and dequeued. - */ - if (io->in_tasklet) { - INIT_WORK(&io->work, kcryptd_io_bio_endio); - queue_work(cc->io_queue, &io->work); - return; - } - bio_endio(base_bio); } @@ -1806,15 +1803,19 @@ static void crypt_endio(struct bio *clone) struct dm_crypt_io *io = clone->bi_private; struct crypt_config *cc = io->cc; unsigned int rw = bio_data_dir(clone); - blk_status_t error; + blk_status_t error = clone->bi_status; + + if (io->ctx.aead_recheck && !error) { + kcryptd_queue_crypt(io); + return; + } /* * free the processed pages */ - if (rw == WRITE) + if (rw == WRITE || io->ctx.aead_recheck) crypt_free_buffer_pages(cc, clone); - error = clone->bi_status; bio_put(clone); if (rw == READ && !error) { @@ -1835,6 +1836,22 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) struct crypt_config *cc = io->cc; struct bio *clone; + if (io->ctx.aead_recheck) { + if (!(gfp & __GFP_DIRECT_RECLAIM)) + return 1; + crypt_inc_pending(io); + clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size); + if (unlikely(!clone)) { + crypt_dec_pending(io); + return 1; + } + clone->bi_iter.bi_sector = cc->start + io->sector; + crypt_convert_init(cc, &io->ctx, clone, clone, io->sector); + io->saved_bi_iter = clone->bi_iter; + dm_submit_bio_remap(io->base_bio, clone); + return 0; + } + /* * We need the original biovec array in order to decrypt the whole bio * data *afterwards* -- thanks to immutable biovecs we don't need to @@ -2061,6 +2078,12 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) io->ctx.bio_out = clone; io->ctx.iter_out = clone->bi_iter; + if (crypt_integrity_aead(cc)) { + bio_copy_data(clone, io->base_bio); + io->ctx.bio_in = clone; + io->ctx.iter_in = clone->bi_iter; + } + sector += bio_sectors(clone); crypt_inc_pending(io); @@ -2097,6 +2120,14 @@ dec: static void kcryptd_crypt_read_done(struct dm_crypt_io *io) { + if (io->ctx.aead_recheck) { + if (!io->error) { + io->ctx.bio_in->bi_iter = io->saved_bi_iter; + bio_copy_data(io->base_bio, io->ctx.bio_in); + } + crypt_free_buffer_pages(io->cc, io->ctx.bio_in); + bio_put(io->ctx.bio_in); + } crypt_dec_pending(io); } @@ -2126,11 +2157,17 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) crypt_inc_pending(io); - crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio, - io->sector); + if (io->ctx.aead_recheck) { + io->ctx.cc_sector = io->sector + cc->iv_offset; + r = crypt_convert(cc, &io->ctx, + test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); + } else { + crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio, + io->sector); - r = crypt_convert(cc, &io->ctx, - test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); + r = crypt_convert(cc, &io->ctx, + test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); + } /* * Crypto API backlogged the request, because its queue was full * and we're in softirq context, so continue from a workqueue @@ -2173,10 +2210,13 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, if (error == -EBADMSG) { sector_t s = le64_to_cpu(*org_sector_of_dmreq(cc, dmreq)); - DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", - ctx->bio_in->bi_bdev, s); - dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", - ctx->bio_in, s, 0); + ctx->aead_failed = true; + if (ctx->aead_recheck) { + DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", + ctx->bio_in->bi_bdev, s); + dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", + ctx->bio_in, s, 0); + } io->error = BLK_STS_PROTECTION; } else if (error < 0) io->error = BLK_STS_IOERR; @@ -2213,11 +2253,6 @@ static void kcryptd_crypt(struct work_struct *work) kcryptd_crypt_write_convert(io); } -static void kcryptd_crypt_tasklet(unsigned long work) -{ - kcryptd_crypt((struct work_struct *)work); -} - static void kcryptd_queue_crypt(struct dm_crypt_io *io) { struct crypt_config *cc = io->cc; @@ -2229,15 +2264,10 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io) * irqs_disabled(): the kernel may run some IO completion from the idle thread, but * it is being executed with irqs disabled. */ - if (in_hardirq() || irqs_disabled()) { - io->in_tasklet = true; - tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work); - tasklet_schedule(&io->tasklet); + if (!(in_hardirq() || irqs_disabled())) { + kcryptd_crypt(&io->work); return; } - - kcryptd_crypt(&io->work); - return; } INIT_WORK(&io->work, kcryptd_crypt); @@ -3112,7 +3142,7 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar sval = strchr(opt_string + strlen("integrity:"), ':') + 1; if (!strcasecmp(sval, "aead")) { set_bit(CRYPT_MODE_INTEGRITY_AEAD, &cc->cipher_flags); - } else if (strcasecmp(sval, "none")) { + } else if (strcasecmp(sval, "none")) { ti->error = "Unknown integrity profile"; return -EINVAL; } diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 77fcff82c..3da4359f5 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -279,6 +279,8 @@ struct dm_integrity_c { atomic64_t number_of_mismatches; + mempool_t recheck_pool; + struct notifier_block reboot_notifier; }; @@ -1699,6 +1701,77 @@ failed: get_random_bytes(result, ic->tag_size); } +static noinline void integrity_recheck(struct dm_integrity_io *dio, char *checksum) +{ + struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); + struct dm_integrity_c *ic = dio->ic; + struct bvec_iter iter; + struct bio_vec bv; + sector_t sector, logical_sector, area, offset; + struct page *page; + void *buffer; + + get_area_and_offset(ic, dio->range.logical_sector, &area, &offset); + dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, + &dio->metadata_offset); + sector = get_data_sector(ic, area, offset); + logical_sector = dio->range.logical_sector; + + page = mempool_alloc(&ic->recheck_pool, GFP_NOIO); + buffer = page_to_virt(page); + + __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) { + unsigned pos = 0; + + do { + char *mem; + int r; + struct dm_io_request io_req; + struct dm_io_region io_loc; + io_req.bi_opf = REQ_OP_READ; + io_req.mem.type = DM_IO_KMEM; + io_req.mem.ptr.addr = buffer; + io_req.notify.fn = NULL; + io_req.client = ic->io; + io_loc.bdev = ic->dev->bdev; + io_loc.sector = sector; + io_loc.count = ic->sectors_per_block; + + r = dm_io(&io_req, 1, &io_loc, NULL); + if (unlikely(r)) { + dio->bi_status = errno_to_blk_status(r); + goto free_ret; + } + + integrity_sector_checksum(ic, logical_sector, buffer, checksum); + r = dm_integrity_rw_tag(ic, checksum, &dio->metadata_block, + &dio->metadata_offset, ic->tag_size, TAG_CMP); + if (r) { + if (r > 0) { + DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx", + bio->bi_bdev, logical_sector); + atomic64_inc(&ic->number_of_mismatches); + dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum", + bio, logical_sector, 0); + r = -EILSEQ; + } + dio->bi_status = errno_to_blk_status(r); + goto free_ret; + } + + mem = bvec_kmap_local(&bv); + memcpy(mem + pos, buffer, ic->sectors_per_block << SECTOR_SHIFT); + kunmap_local(mem); + + pos += ic->sectors_per_block << SECTOR_SHIFT; + sector += ic->sectors_per_block; + logical_sector += ic->sectors_per_block; + } while (pos < bv.bv_len); + } +free_ret: + mempool_free(page, &ic->recheck_pool); +} + static void integrity_metadata(struct work_struct *w) { struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work); @@ -1784,15 +1857,8 @@ again: checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE); if (unlikely(r)) { if (r > 0) { - sector_t s; - - s = sector - ((r + ic->tag_size - 1) / ic->tag_size); - DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx", - bio->bi_bdev, s); - r = -EILSEQ; - atomic64_inc(&ic->number_of_mismatches); - dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum", - bio, s, 0); + integrity_recheck(dio, checksums); + goto skip_io; } if (likely(checksums != checksums_onstack)) kfree(checksums); @@ -4208,6 +4274,12 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv goto bad; } + r = mempool_init_page_pool(&ic->recheck_pool, 1, 0); + if (r) { + ti->error = "Cannot allocate mempool"; + goto bad; + } + ic->metadata_wq = alloc_workqueue("dm-integrity-metadata", WQ_MEM_RECLAIM, METADATA_WORKQUEUE_MAX_ACTIVE); if (!ic->metadata_wq) { @@ -4572,6 +4644,7 @@ static void dm_integrity_dtr(struct dm_target *ti) kvfree(ic->bbs); if (ic->bufio) dm_bufio_client_destroy(ic->bufio); + mempool_exit(&ic->recheck_pool); mempool_exit(&ic->journal_io_mempool); if (ic->io) dm_io_client_destroy(ic->io); diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 206e6ce55..437675481 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1877,7 +1877,8 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern minimum_data_size - sizeof(param_kernel->version))) return -EFAULT; - if (param_kernel->data_size < minimum_data_size) { + if (unlikely(param_kernel->data_size < minimum_data_size) || + unlikely(param_kernel->data_size > DM_MAX_TARGETS * DM_MAX_TARGET_PARAMS)) { DMERR("Invalid data size in the ioctl structure: %u", param_kernel->data_size); return -EINVAL; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index dac6a5f25..e0367a672 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -128,7 +128,12 @@ static int alloc_targets(struct dm_table *t, unsigned int num) int dm_table_create(struct dm_table **result, fmode_t mode, unsigned int num_targets, struct mapped_device *md) { - struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL); + struct dm_table *t; + + if (num_targets > DM_MAX_TARGETS) + return -EOVERFLOW; + + t = kzalloc(sizeof(*t), GFP_KERNEL); if (!t) return -ENOMEM; @@ -143,7 +148,7 @@ int dm_table_create(struct dm_table **result, fmode_t mode, if (!num_targets) { kfree(t); - return -ENOMEM; + return -EOVERFLOW; } if (alloc_targets(t, num_targets)) { diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 24df610a2..b48e1b59e 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -474,6 +474,63 @@ int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io, return 0; } +static int verity_recheck_copy(struct dm_verity *v, struct dm_verity_io *io, + u8 *data, size_t len) +{ + memcpy(data, io->recheck_buffer, len); + io->recheck_buffer += len; + + return 0; +} + +static noinline int verity_recheck(struct dm_verity *v, struct dm_verity_io *io, + struct bvec_iter start, sector_t cur_block) +{ + struct page *page; + void *buffer; + int r; + struct dm_io_request io_req; + struct dm_io_region io_loc; + + page = mempool_alloc(&v->recheck_pool, GFP_NOIO); + buffer = page_to_virt(page); + + io_req.bi_opf = REQ_OP_READ; + io_req.mem.type = DM_IO_KMEM; + io_req.mem.ptr.addr = buffer; + io_req.notify.fn = NULL; + io_req.client = v->io; + io_loc.bdev = v->data_dev->bdev; + io_loc.sector = cur_block << (v->data_dev_block_bits - SECTOR_SHIFT); + io_loc.count = 1 << (v->data_dev_block_bits - SECTOR_SHIFT); + r = dm_io(&io_req, 1, &io_loc, NULL); + if (unlikely(r)) + goto free_ret; + + r = verity_hash(v, verity_io_hash_req(v, io), buffer, + 1 << v->data_dev_block_bits, + verity_io_real_digest(v, io), true); + if (unlikely(r)) + goto free_ret; + + if (memcmp(verity_io_real_digest(v, io), + verity_io_want_digest(v, io), v->digest_size)) { + r = -EIO; + goto free_ret; + } + + io->recheck_buffer = buffer; + r = verity_for_bv_block(v, io, &start, verity_recheck_copy); + if (unlikely(r)) + goto free_ret; + + r = 0; +free_ret: + mempool_free(page, &v->recheck_pool); + + return r; +} + static int verity_bv_zero(struct dm_verity *v, struct dm_verity_io *io, u8 *data, size_t len) { @@ -500,9 +557,7 @@ static int verity_verify_io(struct dm_verity_io *io) { bool is_zero; struct dm_verity *v = io->v; -#if defined(CONFIG_DM_VERITY_FEC) struct bvec_iter start; -#endif struct bvec_iter iter_copy; struct bvec_iter *iter; struct crypto_wait wait; @@ -553,10 +608,7 @@ static int verity_verify_io(struct dm_verity_io *io) if (unlikely(r < 0)) return r; -#if defined(CONFIG_DM_VERITY_FEC) - if (verity_fec_is_enabled(v)) - start = *iter; -#endif + start = *iter; r = verity_for_io_block(v, io, iter, &wait); if (unlikely(r < 0)) return r; @@ -578,6 +630,10 @@ static int verity_verify_io(struct dm_verity_io *io) * tasklet since it may sleep, so fallback to work-queue. */ return -EAGAIN; + } else if (verity_recheck(v, io, start, cur_block) == 0) { + if (v->validated_blocks) + set_bit(cur_block, v->validated_blocks); + continue; #if defined(CONFIG_DM_VERITY_FEC) } else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA, cur_block, NULL, &start) == 0) { @@ -634,23 +690,6 @@ static void verity_work(struct work_struct *w) verity_finish_io(io, errno_to_blk_status(verity_verify_io(io))); } -static void verity_tasklet(unsigned long data) -{ - struct dm_verity_io *io = (struct dm_verity_io *)data; - int err; - - io->in_tasklet = true; - err = verity_verify_io(io); - if (err == -EAGAIN || err == -ENOMEM) { - /* fallback to retrying with work-queue */ - INIT_WORK(&io->work, verity_work); - queue_work(io->v->verify_wq, &io->work); - return; - } - - verity_finish_io(io, errno_to_blk_status(err)); -} - static void verity_end_io(struct bio *bio) { struct dm_verity_io *io = bio->bi_private; @@ -663,13 +702,8 @@ static void verity_end_io(struct bio *bio) return; } - if (static_branch_unlikely(&use_tasklet_enabled) && io->v->use_tasklet) { - tasklet_init(&io->tasklet, verity_tasklet, (unsigned long)io); - tasklet_schedule(&io->tasklet); - } else { - INIT_WORK(&io->work, verity_work); - queue_work(io->v->verify_wq, &io->work); - } + INIT_WORK(&io->work, verity_work); + queue_work(io->v->verify_wq, &io->work); } /* @@ -950,6 +984,10 @@ static void verity_dtr(struct dm_target *ti) if (v->verify_wq) destroy_workqueue(v->verify_wq); + mempool_exit(&v->recheck_pool); + if (v->io) + dm_io_client_destroy(v->io); + if (v->bufio) dm_bufio_client_destroy(v->bufio); @@ -1386,6 +1424,20 @@ static int verity_ctr(struct dm_target *ti, unsigned int argc, char **argv) } v->hash_blocks = hash_position; + r = mempool_init_page_pool(&v->recheck_pool, 1, 0); + if (unlikely(r)) { + ti->error = "Cannot allocate mempool"; + goto bad; + } + + v->io = dm_io_client_create(); + if (IS_ERR(v->io)) { + r = PTR_ERR(v->io); + v->io = NULL; + ti->error = "Cannot allocate dm io"; + goto bad; + } + v->bufio = dm_bufio_client_create(v->hash_dev->bdev, 1 << v->hash_dev_block_bits, 1, sizeof(struct buffer_aux), dm_bufio_alloc_callback, NULL, diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index f9d522c87..4620a98c9 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -11,6 +11,7 @@ #ifndef DM_VERITY_H #define DM_VERITY_H +#include <linux/dm-io.h> #include <linux/dm-bufio.h> #include <linux/device-mapper.h> #include <linux/interrupt.h> @@ -68,6 +69,9 @@ struct dm_verity { unsigned long *validated_blocks; /* bitset blocks validated */ char *signature_key_desc; /* signature keyring reference */ + + struct dm_io_client *io; + mempool_t recheck_pool; }; struct dm_verity_io { @@ -83,7 +87,8 @@ struct dm_verity_io { struct bvec_iter iter; struct work_struct work; - struct tasklet_struct tasklet; + + char *recheck_buffer; /* * Three variably-size fields follow this struct: diff --git a/drivers/md/md.c b/drivers/md/md.c index 6120f26a7..846bdee4d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -564,8 +564,12 @@ static void submit_flushes(struct work_struct *ws) rcu_read_lock(); } rcu_read_unlock(); - if (atomic_dec_and_test(&mddev->flush_pending)) + if (atomic_dec_and_test(&mddev->flush_pending)) { + /* The pair is percpu_ref_get() from md_flush_request() */ + percpu_ref_put(&mddev->active_io); + queue_work(md_wq, &mddev->flush_work); + } } static void md_submit_flush_data(struct work_struct *ws) @@ -963,9 +967,10 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev, return; bio = bio_alloc_bioset(rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev, - 1, - REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA, - GFP_NOIO, &mddev->sync_set); + 1, + REQ_OP_WRITE | REQ_SYNC | REQ_IDLE | REQ_META + | REQ_PREFLUSH | REQ_FUA, + GFP_NOIO, &mddev->sync_set); atomic_inc(&rdev->nr_pending); @@ -1145,6 +1150,7 @@ struct super_type { struct md_rdev *refdev, int minor_version); int (*validate_super)(struct mddev *mddev, + struct md_rdev *freshest, struct md_rdev *rdev); void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev); @@ -1282,8 +1288,9 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor /* * validate_super for 0.90.0 + * note: we are not using "freshest" for 0.9 superblock */ -static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev) +static int super_90_validate(struct mddev *mddev, struct md_rdev *freshest, struct md_rdev *rdev) { mdp_disk_t *desc; mdp_super_t *sb = page_address(rdev->sb_page); @@ -1795,7 +1802,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ return ret; } -static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) +static int super_1_validate(struct mddev *mddev, struct md_rdev *freshest, struct md_rdev *rdev) { struct mdp_superblock_1 *sb = page_address(rdev->sb_page); __u64 ev1 = le64_to_cpu(sb->events); @@ -1891,13 +1898,15 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) } } else if (mddev->pers == NULL) { /* Insist of good event counter while assembling, except for - * spares (which don't need an event count) */ - ++ev1; + * spares (which don't need an event count). + * Similar to mdadm, we allow event counter difference of 1 + * from the freshest device. + */ if (rdev->desc_nr >= 0 && rdev->desc_nr < le32_to_cpu(sb->max_dev) && (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX || le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL)) - if (ev1 < mddev->events) + if (ev1 + 1 < mddev->events) return -EINVAL; } else if (mddev->bitmap) { /* If adding to array with a bitmap, then we can accept an @@ -1918,8 +1927,38 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) rdev->desc_nr >= le32_to_cpu(sb->max_dev)) { role = MD_DISK_ROLE_SPARE; rdev->desc_nr = -1; - } else + } else if (mddev->pers == NULL && freshest && ev1 < mddev->events) { + /* + * If we are assembling, and our event counter is smaller than the + * highest event counter, we cannot trust our superblock about the role. + * It could happen that our rdev was marked as Faulty, and all other + * superblocks were updated with +1 event counter. + * Then, before the next superblock update, which typically happens when + * remove_and_add_spares() removes the device from the array, there was + * a crash or reboot. + * If we allow current rdev without consulting the freshest superblock, + * we could cause data corruption. + * Note that in this case our event counter is smaller by 1 than the + * highest, otherwise, this rdev would not be allowed into array; + * both kernel and mdadm allow event counter difference of 1. + */ + struct mdp_superblock_1 *freshest_sb = page_address(freshest->sb_page); + u32 freshest_max_dev = le32_to_cpu(freshest_sb->max_dev); + + if (rdev->desc_nr >= freshest_max_dev) { + /* this is unexpected, better not proceed */ + pr_warn("md: %s: rdev[%pg]: desc_nr(%d) >= freshest(%pg)->sb->max_dev(%u)\n", + mdname(mddev), rdev->bdev, rdev->desc_nr, + freshest->bdev, freshest_max_dev); + return -EUCLEAN; + } + + role = le16_to_cpu(freshest_sb->dev_roles[rdev->desc_nr]); + pr_debug("md: %s: rdev[%pg]: role=%d(0x%x) according to freshest %pg\n", + mdname(mddev), rdev->bdev, role, role, freshest->bdev); + } else { role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); + } switch(role) { case MD_DISK_ROLE_SPARE: /* spare */ break; @@ -2861,7 +2900,7 @@ static int add_bound_rdev(struct md_rdev *rdev) * and should be added immediately. */ super_types[mddev->major_version]. - validate_super(mddev, rdev); + validate_super(mddev, NULL/*freshest*/, rdev); if (add_journal) mddev_suspend(mddev); err = mddev->pers->hot_add_disk(mddev, rdev); @@ -3775,7 +3814,7 @@ static int analyze_sbs(struct mddev *mddev) } super_types[mddev->major_version]. - validate_super(mddev, freshest); + validate_super(mddev, NULL/*freshest*/, freshest); i = 0; rdev_for_each_safe(rdev, tmp, mddev) { @@ -3790,7 +3829,7 @@ static int analyze_sbs(struct mddev *mddev) } if (rdev != freshest) { if (super_types[mddev->major_version]. - validate_super(mddev, rdev)) { + validate_super(mddev, freshest, rdev)) { pr_warn("md: kicking non-fresh %pg from array!\n", rdev->bdev); md_kick_rdev_from_array(rdev); @@ -6804,7 +6843,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info) rdev->saved_raid_disk = rdev->raid_disk; } else super_types[mddev->major_version]. - validate_super(mddev, rdev); + validate_super(mddev, NULL/*freshest*/, rdev); if ((info->state & (1<<MD_DISK_SYNC)) && rdev->raid_disk != info->raid_disk) { /* This was a hot-add request, but events doesn't diff --git a/drivers/media/i2c/imx335.c b/drivers/media/i2c/imx335.c index 078ede2b7..8e91767d6 100644 --- a/drivers/media/i2c/imx335.c +++ b/drivers/media/i2c/imx335.c @@ -971,8 +971,8 @@ static int imx335_init_controls(struct imx335 *imx335) imx335->hblank_ctrl = v4l2_ctrl_new_std(ctrl_hdlr, &imx335_ctrl_ops, V4L2_CID_HBLANK, - IMX335_REG_MIN, - IMX335_REG_MAX, + mode->hblank, + mode->hblank, 1, mode->hblank); if (imx335->hblank_ctrl) imx335->hblank_ctrl->flags |= V4L2_CTRL_FLAG_READ_ONLY; diff --git a/drivers/media/pci/ddbridge/ddbridge-main.c b/drivers/media/pci/ddbridge/ddbridge-main.c index 91733ab9f..363badab7 100644 --- a/drivers/media/pci/ddbridge/ddbridge-main.c +++ b/drivers/media/pci/ddbridge/ddbridge-main.c @@ -238,7 +238,7 @@ fail: ddb_unmap(dev); pci_set_drvdata(pdev, NULL); pci_disable_device(pdev); - return -1; + return stat; } /****************************************************************************/ diff --git a/drivers/media/platform/amphion/vpu.h b/drivers/media/platform/amphion/vpu.h index deb2288d4..4f3d23b55 100644 --- a/drivers/media/platform/amphion/vpu.h +++ b/drivers/media/platform/amphion/vpu.h @@ -152,7 +152,6 @@ struct vpu_core { struct vpu_mbox tx_type; struct vpu_mbox tx_data; struct vpu_mbox rx; - unsigned long cmd_seq; wait_queue_head_t ack_wq; struct completion cmp; @@ -251,6 +250,8 @@ struct vpu_inst { struct list_head cmd_q; void *pending; + unsigned long cmd_seq; + atomic_long_t last_response_cmd; struct vpu_inst_ops *ops; const struct vpu_format *formats; diff --git a/drivers/media/platform/amphion/vpu_cmds.c b/drivers/media/platform/amphion/vpu_cmds.c index 235b71398..c68163af2 100644 --- a/drivers/media/platform/amphion/vpu_cmds.c +++ b/drivers/media/platform/amphion/vpu_cmds.c @@ -34,6 +34,7 @@ struct vpu_cmd_t { struct vpu_cmd_request *request; struct vpu_rpc_event *pkt; unsigned long key; + atomic_long_t *last_response_cmd; }; static struct vpu_cmd_request vpu_cmd_requests[] = { @@ -117,6 +118,8 @@ static void vpu_free_cmd(struct vpu_cmd_t *cmd) { if (!cmd) return; + if (cmd->last_response_cmd) + atomic_long_set(cmd->last_response_cmd, cmd->key); vfree(cmd->pkt); vfree(cmd); } @@ -174,7 +177,8 @@ static int vpu_request_cmd(struct vpu_inst *inst, u32 id, void *data, return -ENOMEM; mutex_lock(&core->cmd_lock); - cmd->key = core->cmd_seq++; + cmd->key = ++inst->cmd_seq; + cmd->last_response_cmd = &inst->last_response_cmd; if (key) *key = cmd->key; if (sync) @@ -248,26 +252,12 @@ void vpu_clear_request(struct vpu_inst *inst) static bool check_is_responsed(struct vpu_inst *inst, unsigned long key) { - struct vpu_core *core = inst->core; - struct vpu_cmd_t *cmd; - bool flag = true; + unsigned long last_response = atomic_long_read(&inst->last_response_cmd); - mutex_lock(&core->cmd_lock); - cmd = inst->pending; - if (cmd && key == cmd->key) { - flag = false; - goto exit; - } - list_for_each_entry(cmd, &inst->cmd_q, list) { - if (key == cmd->key) { - flag = false; - break; - } - } -exit: - mutex_unlock(&core->cmd_lock); + if (key <= last_response && (last_response - key) < (ULONG_MAX >> 1)) + return true; - return flag; + return false; } static int sync_session_response(struct vpu_inst *inst, unsigned long key, long timeout, int try) diff --git a/drivers/media/platform/amphion/vpu_v4l2.c b/drivers/media/platform/amphion/vpu_v4l2.c index e5c8e1a75..e7a18948c 100644 --- a/drivers/media/platform/amphion/vpu_v4l2.c +++ b/drivers/media/platform/amphion/vpu_v4l2.c @@ -631,6 +631,7 @@ int vpu_v4l2_open(struct file *file, struct vpu_inst *inst) func = &vpu->decoder; atomic_set(&inst->ref_count, 0); + atomic_long_set(&inst->last_response_cmd, 0); vpu_inst_get(inst); inst->vpu = vpu; inst->core = vpu_request_core(vpu, inst->type); diff --git a/drivers/media/platform/rockchip/rga/rga.c b/drivers/media/platform/rockchip/rga/rga.c index 61b25fcf8..9b20cef5a 100644 --- a/drivers/media/platform/rockchip/rga/rga.c +++ b/drivers/media/platform/rockchip/rga/rga.c @@ -187,7 +187,7 @@ static int rga_setup_ctrls(struct rga_ctx *ctx) static struct rga_fmt formats[] = { { .fourcc = V4L2_PIX_FMT_ARGB32, - .color_swap = RGA_COLOR_RB_SWAP, + .color_swap = RGA_COLOR_ALPHA_SWAP, .hw_format = RGA_COLOR_FMT_ABGR8888, .depth = 32, .uv_factor = 1, @@ -195,17 +195,8 @@ static struct rga_fmt formats[] = { .x_div = 1, }, { - .fourcc = V4L2_PIX_FMT_XRGB32, - .color_swap = RGA_COLOR_RB_SWAP, - .hw_format = RGA_COLOR_FMT_XBGR8888, - .depth = 32, - .uv_factor = 1, - .y_div = 1, - .x_div = 1, - }, - { .fourcc = V4L2_PIX_FMT_ABGR32, - .color_swap = RGA_COLOR_ALPHA_SWAP, + .color_swap = RGA_COLOR_RB_SWAP, .hw_format = RGA_COLOR_FMT_ABGR8888, .depth = 32, .uv_factor = 1, @@ -214,7 +205,7 @@ static struct rga_fmt formats[] = { }, { .fourcc = V4L2_PIX_FMT_XBGR32, - .color_swap = RGA_COLOR_ALPHA_SWAP, + .color_swap = RGA_COLOR_RB_SWAP, .hw_format = RGA_COLOR_FMT_XBGR8888, .depth = 32, .uv_factor = 1, diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h b/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h index a1293c45a..f9ec1c613 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h @@ -61,6 +61,14 @@ struct dentry; RKISP1_CIF_ISP_EXP_END | \ RKISP1_CIF_ISP_HIST_MEASURE_RDY) +/* IRQ lines */ +enum rkisp1_irq_line { + RKISP1_IRQ_ISP = 0, + RKISP1_IRQ_MI, + RKISP1_IRQ_MIPI, + RKISP1_NUM_IRQS, +}; + /* enum for the resizer pads */ enum rkisp1_rsz_pad { RKISP1_RSZ_PAD_SINK, @@ -441,7 +449,6 @@ struct rkisp1_debug { * struct rkisp1_device - ISP platform device * * @base_addr: base register address - * @irq: the irq number * @dev: a pointer to the struct device * @clk_size: number of clocks * @clks: array of clocks @@ -459,6 +466,7 @@ struct rkisp1_debug { * @stream_lock: serializes {start/stop}_streaming callbacks between the capture devices. * @debug: debug params to be exposed on debugfs * @info: version-specific ISP information + * @irqs: IRQ line numbers */ struct rkisp1_device { void __iomem *base_addr; @@ -479,6 +487,7 @@ struct rkisp1_device { struct mutex stream_lock; /* serialize {start/stop}_streaming cb between capture devices */ struct rkisp1_debug debug; const struct rkisp1_info *info; + int irqs[RKISP1_NUM_IRQS]; }; /* diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c index d7acc94e1..e862f515c 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c @@ -141,8 +141,20 @@ static void rkisp1_csi_disable(struct rkisp1_csi *csi) struct rkisp1_device *rkisp1 = csi->rkisp1; u32 val; - /* Mask and clear interrupts. */ + /* Mask MIPI interrupts. */ rkisp1_write(rkisp1, RKISP1_CIF_MIPI_IMSC, 0); + + /* Flush posted writes */ + rkisp1_read(rkisp1, RKISP1_CIF_MIPI_IMSC); + + /* + * Wait until the IRQ handler has ended. The IRQ handler may get called + * even after this, but it will return immediately as the MIPI + * interrupts have been masked. + */ + synchronize_irq(rkisp1->irqs[RKISP1_IRQ_MIPI]); + + /* Clear MIPI interrupt status */ rkisp1_write(rkisp1, RKISP1_CIF_MIPI_ICR, ~0); val = rkisp1_read(rkisp1, RKISP1_CIF_MIPI_CTRL); diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c index 2b76339f9..41abb18b0 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c @@ -114,6 +114,7 @@ struct rkisp1_isr_data { const char *name; irqreturn_t (*isr)(int irq, void *ctx); + u32 line_mask; }; /* ---------------------------------------------------------------------------- @@ -442,17 +443,25 @@ error: static irqreturn_t rkisp1_isr(int irq, void *ctx) { + irqreturn_t ret = IRQ_NONE; + /* * Call rkisp1_capture_isr() first to handle the frame that * potentially completed using the current frame_sequence number before * it is potentially incremented by rkisp1_isp_isr() in the vertical * sync. */ - rkisp1_capture_isr(irq, ctx); - rkisp1_isp_isr(irq, ctx); - rkisp1_csi_isr(irq, ctx); - return IRQ_HANDLED; + if (rkisp1_capture_isr(irq, ctx) == IRQ_HANDLED) + ret = IRQ_HANDLED; + + if (rkisp1_isp_isr(irq, ctx) == IRQ_HANDLED) + ret = IRQ_HANDLED; + + if (rkisp1_csi_isr(irq, ctx) == IRQ_HANDLED) + ret = IRQ_HANDLED; + + return ret; } static const char * const px30_isp_clks[] = { @@ -463,9 +472,9 @@ static const char * const px30_isp_clks[] = { }; static const struct rkisp1_isr_data px30_isp_isrs[] = { - { "isp", rkisp1_isp_isr }, - { "mi", rkisp1_capture_isr }, - { "mipi", rkisp1_csi_isr }, + { "isp", rkisp1_isp_isr, BIT(RKISP1_IRQ_ISP) }, + { "mi", rkisp1_capture_isr, BIT(RKISP1_IRQ_MI) }, + { "mipi", rkisp1_csi_isr, BIT(RKISP1_IRQ_MIPI) }, }; static const struct rkisp1_info px30_isp_info = { @@ -484,7 +493,7 @@ static const char * const rk3399_isp_clks[] = { }; static const struct rkisp1_isr_data rk3399_isp_isrs[] = { - { NULL, rkisp1_isr }, + { NULL, rkisp1_isr, BIT(RKISP1_IRQ_ISP) | BIT(RKISP1_IRQ_MI) | BIT(RKISP1_IRQ_MIPI) }, }; static const struct rkisp1_info rk3399_isp_info = { @@ -535,6 +544,9 @@ static int rkisp1_probe(struct platform_device *pdev) if (IS_ERR(rkisp1->base_addr)) return PTR_ERR(rkisp1->base_addr); + for (unsigned int il = 0; il < ARRAY_SIZE(rkisp1->irqs); ++il) + rkisp1->irqs[il] = -1; + for (i = 0; i < info->isr_size; i++) { irq = info->isrs[i].name ? platform_get_irq_byname(pdev, info->isrs[i].name) @@ -542,6 +554,11 @@ static int rkisp1_probe(struct platform_device *pdev) if (irq < 0) return irq; + for (unsigned int il = 0; il < ARRAY_SIZE(rkisp1->irqs); ++il) { + if (info->isrs[i].line_mask & BIT(il)) + rkisp1->irqs[il] = irq; + } + ret = devm_request_irq(dev, irq, info->isrs[i].isr, IRQF_SHARED, dev_driver_string(dev), dev); if (ret) { diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c index 585cf3f53..00dca284c 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c @@ -281,11 +281,25 @@ static void rkisp1_isp_stop(struct rkisp1_isp *isp) * ISP(mi) stop in mi frame end -> Stop ISP(mipi) -> * Stop ISP(isp) ->wait for ISP isp off */ - /* stop and clear MI and ISP interrupts */ - rkisp1_write(rkisp1, RKISP1_CIF_ISP_IMSC, 0); - rkisp1_write(rkisp1, RKISP1_CIF_ISP_ICR, ~0); + /* Mask MI and ISP interrupts */ + rkisp1_write(rkisp1, RKISP1_CIF_ISP_IMSC, 0); rkisp1_write(rkisp1, RKISP1_CIF_MI_IMSC, 0); + + /* Flush posted writes */ + rkisp1_read(rkisp1, RKISP1_CIF_MI_IMSC); + + /* + * Wait until the IRQ handler has ended. The IRQ handler may get called + * even after this, but it will return immediately as the MI and ISP + * interrupts have been masked. + */ + synchronize_irq(rkisp1->irqs[RKISP1_IRQ_ISP]); + if (rkisp1->irqs[RKISP1_IRQ_ISP] != rkisp1->irqs[RKISP1_IRQ_MI]) + synchronize_irq(rkisp1->irqs[RKISP1_IRQ_MI]); + + /* Clear MI and ISP interrupt status */ + rkisp1_write(rkisp1, RKISP1_CIF_ISP_ICR, ~0); rkisp1_write(rkisp1, RKISP1_CIF_MI_ICR, ~0); /* stop ISP */ diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c index fe17c7f98..52d82cbe7 100644 --- a/drivers/media/rc/bpf-lirc.c +++ b/drivers/media/rc/bpf-lirc.c @@ -253,7 +253,7 @@ int lirc_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) if (attr->attach_flags) return -EINVAL; - rcdev = rc_dev_get_from_fd(attr->target_fd); + rcdev = rc_dev_get_from_fd(attr->target_fd, true); if (IS_ERR(rcdev)) return PTR_ERR(rcdev); @@ -278,7 +278,7 @@ int lirc_prog_detach(const union bpf_attr *attr) if (IS_ERR(prog)) return PTR_ERR(prog); - rcdev = rc_dev_get_from_fd(attr->target_fd); + rcdev = rc_dev_get_from_fd(attr->target_fd, true); if (IS_ERR(rcdev)) { bpf_prog_put(prog); return PTR_ERR(rcdev); @@ -303,7 +303,7 @@ int lirc_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) if (attr->query.query_flags) return -EINVAL; - rcdev = rc_dev_get_from_fd(attr->query.target_fd); + rcdev = rc_dev_get_from_fd(attr->query.target_fd, false); if (IS_ERR(rcdev)) return PTR_ERR(rcdev); diff --git a/drivers/media/rc/ir_toy.c b/drivers/media/rc/ir_toy.c index 196806709..69e630d85 100644 --- a/drivers/media/rc/ir_toy.c +++ b/drivers/media/rc/ir_toy.c @@ -332,6 +332,7 @@ static int irtoy_tx(struct rc_dev *rc, uint *txbuf, uint count) sizeof(COMMAND_SMODE_EXIT), STATE_COMMAND_NO_RESP); if (err) { dev_err(irtoy->dev, "exit sample mode: %d\n", err); + kfree(buf); return err; } @@ -339,6 +340,7 @@ static int irtoy_tx(struct rc_dev *rc, uint *txbuf, uint count) sizeof(COMMAND_SMODE_ENTER), STATE_COMMAND); if (err) { dev_err(irtoy->dev, "enter sample mode: %d\n", err); + kfree(buf); return err; } diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c index 184e0b357..adb8c794a 100644 --- a/drivers/media/rc/lirc_dev.c +++ b/drivers/media/rc/lirc_dev.c @@ -814,7 +814,7 @@ void __exit lirc_dev_exit(void) unregister_chrdev_region(lirc_base_dev, RC_DEV_MAX); } -struct rc_dev *rc_dev_get_from_fd(int fd) +struct rc_dev *rc_dev_get_from_fd(int fd, bool write) { struct fd f = fdget(fd); struct lirc_fh *fh; @@ -828,6 +828,9 @@ struct rc_dev *rc_dev_get_from_fd(int fd) return ERR_PTR(-EINVAL); } + if (write && !(f.file->f_mode & FMODE_WRITE)) + return ERR_PTR(-EPERM); + fh = f.file->private_data; dev = fh->rc; diff --git a/drivers/media/rc/rc-core-priv.h b/drivers/media/rc/rc-core-priv.h index ef1e95e1a..7df949fc6 100644 --- a/drivers/media/rc/rc-core-priv.h +++ b/drivers/media/rc/rc-core-priv.h @@ -325,7 +325,7 @@ void lirc_raw_event(struct rc_dev *dev, struct ir_raw_event ev); void lirc_scancode_event(struct rc_dev *dev, struct lirc_scancode *lsc); int lirc_register(struct rc_dev *dev); void lirc_unregister(struct rc_dev *dev); -struct rc_dev *rc_dev_get_from_fd(int fd); +struct rc_dev *rc_dev_get_from_fd(int fd, bool write); #else static inline int lirc_dev_init(void) { return 0; } static inline void lirc_dev_exit(void) {} diff --git a/drivers/media/usb/stk1160/stk1160-video.c b/drivers/media/usb/stk1160/stk1160-video.c index 4e966f6bf..366f0e4a5 100644 --- a/drivers/media/usb/stk1160/stk1160-video.c +++ b/drivers/media/usb/stk1160/stk1160-video.c @@ -107,8 +107,7 @@ void stk1160_copy_video(struct stk1160 *dev, u8 *src, int len) /* * TODO: These stk1160_dbg are very spammy! - * We should 1) check why we are getting them - * and 2) add ratelimit. + * We should check why we are getting them. * * UPDATE: One of the reasons (the only one?) for getting these * is incorrect standard (mismatch between expected and configured). @@ -151,7 +150,7 @@ void stk1160_copy_video(struct stk1160 *dev, u8 *src, int len) /* Let the bug hunt begin! sanity checks! */ if (lencopy < 0) { - stk1160_dbg("copy skipped: negative lencopy\n"); + printk_ratelimited(KERN_DEBUG "copy skipped: negative lencopy\n"); return; } diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 9da8235cb..43cb511fd 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -1440,6 +1440,7 @@ config MFD_DAVINCI_VOICECODEC config MFD_TI_AM335X_TSCADC tristate "TI ADC / Touch Screen chip support" + depends on ARCH_OMAP2PLUS || ARCH_K3 || COMPILE_TEST select MFD_CORE select REGMAP select REGMAP_MMIO diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index cc57cc820..69cc24962 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -1990,7 +1990,7 @@ static int fastrpc_cb_remove(struct platform_device *pdev) int i; spin_lock_irqsave(&cctx->lock, flags); - for (i = 1; i < FASTRPC_MAX_SESSIONS; i++) { + for (i = 0; i < FASTRPC_MAX_SESSIONS; i++) { if (cctx->session[i].sid == sess->sid) { cctx->session[i].valid = false; cctx->sesscount--; diff --git a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c index d7daa01fe..fdec2c30e 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c +++ b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c @@ -151,6 +151,7 @@ static int lis3lv02d_i2c_probe(struct i2c_client *client, lis3_dev.init = lis3_i2c_init; lis3_dev.read = lis3_i2c_read; lis3_dev.write = lis3_i2c_write; + lis3_dev.reg_ctrl = lis3_reg_ctrl; lis3_dev.irq = client->irq; lis3_dev.ac = lis3lv02d_axis_map; lis3_dev.pm_dev = &client->dev; diff --git a/drivers/misc/open-dice.c b/drivers/misc/open-dice.c index c61be3404..504b836a7 100644 --- a/drivers/misc/open-dice.c +++ b/drivers/misc/open-dice.c @@ -142,7 +142,6 @@ static int __init open_dice_probe(struct platform_device *pdev) return -ENOMEM; *drvdata = (struct open_dice_drvdata){ - .lock = __MUTEX_INITIALIZER(drvdata->lock), .rmem = rmem, .misc = (struct miscdevice){ .parent = dev, @@ -152,6 +151,7 @@ static int __init open_dice_probe(struct platform_device *pdev) .mode = 0600, }, }; + mutex_init(&drvdata->lock); /* Index overflow check not needed, misc_register() will fail. */ snprintf(drvdata->name, sizeof(drvdata->name), DRIVER_NAME"%u", dev_idx++); diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index a46ce0868..3a927452a 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1007,10 +1007,12 @@ static int mmc_select_bus_width(struct mmc_card *card) static unsigned ext_csd_bits[] = { EXT_CSD_BUS_WIDTH_8, EXT_CSD_BUS_WIDTH_4, + EXT_CSD_BUS_WIDTH_1, }; static unsigned bus_widths[] = { MMC_BUS_WIDTH_8, MMC_BUS_WIDTH_4, + MMC_BUS_WIDTH_1, }; struct mmc_host *host = card->host; unsigned idx, bus_width = 0; diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c index dd2a4b6ab..e3c69c6b8 100644 --- a/drivers/mmc/core/slot-gpio.c +++ b/drivers/mmc/core/slot-gpio.c @@ -62,11 +62,15 @@ int mmc_gpio_alloc(struct mmc_host *host) int mmc_gpio_get_ro(struct mmc_host *host) { struct mmc_gpio *ctx = host->slot.handler_priv; + int cansleep; if (!ctx || !ctx->ro_gpio) return -ENOSYS; - return gpiod_get_value_cansleep(ctx->ro_gpio); + cansleep = gpiod_cansleep(ctx->ro_gpio); + return cansleep ? + gpiod_get_value_cansleep(ctx->ro_gpio) : + gpiod_get_value(ctx->ro_gpio); } EXPORT_SYMBOL(mmc_gpio_get_ro); diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c index 60bca78a7..0511583ff 100644 --- a/drivers/mmc/host/mmci_stm32_sdmmc.c +++ b/drivers/mmc/host/mmci_stm32_sdmmc.c @@ -200,6 +200,8 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl) struct scatterlist *sg; int i; + host->dma_in_progress = true; + if (!host->variant->dma_lli || data->sg_len == 1 || idma->use_bounce_buffer) { u32 dma_addr; @@ -238,9 +240,30 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl) return 0; } +static void sdmmc_idma_error(struct mmci_host *host) +{ + struct mmc_data *data = host->data; + struct sdmmc_idma *idma = host->dma_priv; + + if (!dma_inprogress(host)) + return; + + writel_relaxed(0, host->base + MMCI_STM32_IDMACTRLR); + host->dma_in_progress = false; + data->host_cookie = 0; + + if (!idma->use_bounce_buffer) + dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, + mmc_get_dma_dir(data)); +} + static void sdmmc_idma_finalize(struct mmci_host *host, struct mmc_data *data) { + if (!dma_inprogress(host)) + return; + writel_relaxed(0, host->base + MMCI_STM32_IDMACTRLR); + host->dma_in_progress = false; if (!data->host_cookie) sdmmc_idma_unprep_data(host, data, 0); @@ -567,6 +590,7 @@ static struct mmci_host_ops sdmmc_variant_ops = { .dma_setup = sdmmc_idma_setup, .dma_start = sdmmc_idma_start, .dma_finalize = sdmmc_idma_finalize, + .dma_error = sdmmc_idma_error, .set_clkreg = mmci_sdmmc_set_clkreg, .set_pwrreg = mmci_sdmmc_set_pwrreg, .busy_complete = sdmmc_busy_complete, diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c index bca1d095b..24bb0e980 100644 --- a/drivers/mmc/host/sdhci-pci-o2micro.c +++ b/drivers/mmc/host/sdhci-pci-o2micro.c @@ -602,6 +602,35 @@ static void sdhci_pci_o2_set_clock(struct sdhci_host *host, unsigned int clock) sdhci_o2_enable_clk(host, clk); } +static void sdhci_pci_o2_set_power(struct sdhci_host *host, unsigned char mode, unsigned short vdd) +{ + struct sdhci_pci_chip *chip; + struct sdhci_pci_slot *slot = sdhci_priv(host); + u32 scratch_32 = 0; + u8 scratch_8 = 0; + + chip = slot->chip; + + if (mode == MMC_POWER_OFF) { + /* UnLock WP */ + pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch_8); + scratch_8 &= 0x7f; + pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch_8); + + /* Set PCR 0x354[16] to switch Clock Source back to OPE Clock */ + pci_read_config_dword(chip->pdev, O2_SD_OUTPUT_CLK_SOURCE_SWITCH, &scratch_32); + scratch_32 &= ~(O2_SD_SEL_DLL); + pci_write_config_dword(chip->pdev, O2_SD_OUTPUT_CLK_SOURCE_SWITCH, scratch_32); + + /* Lock WP */ + pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch_8); + scratch_8 |= 0x80; + pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch_8); + } + + sdhci_set_power(host, mode, vdd); +} + static int sdhci_pci_o2_probe_slot(struct sdhci_pci_slot *slot) { struct sdhci_pci_chip *chip; @@ -911,6 +940,7 @@ static const struct sdhci_ops sdhci_pci_o2_ops = { .set_bus_width = sdhci_set_bus_width, .reset = sdhci_reset, .set_uhs_signaling = sdhci_set_uhs_signaling, + .set_power = sdhci_pci_o2_set_power, }; const struct sdhci_pci_fixes sdhci_o2 = { diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c index 8cf3a375d..cc9d28b75 100644 --- a/drivers/mmc/host/sdhci-xenon-phy.c +++ b/drivers/mmc/host/sdhci-xenon-phy.c @@ -11,6 +11,7 @@ #include <linux/slab.h> #include <linux/delay.h> #include <linux/ktime.h> +#include <linux/iopoll.h> #include <linux/of_address.h> #include "sdhci-pltfm.h" @@ -109,6 +110,8 @@ #define XENON_EMMC_PHY_LOGIC_TIMING_ADJUST (XENON_EMMC_PHY_REG_BASE + 0x18) #define XENON_LOGIC_TIMING_VALUE 0x00AA8977 +#define XENON_MAX_PHY_TIMEOUT_LOOPS 100 + /* * List offset of PHY registers and some special register values * in eMMC PHY 5.0 or eMMC PHY 5.1 @@ -216,6 +219,19 @@ static int xenon_alloc_emmc_phy(struct sdhci_host *host) return 0; } +static int xenon_check_stability_internal_clk(struct sdhci_host *host) +{ + u32 reg; + int err; + + err = read_poll_timeout(sdhci_readw, reg, reg & SDHCI_CLOCK_INT_STABLE, + 1100, 20000, false, host, SDHCI_CLOCK_CONTROL); + if (err) + dev_err(mmc_dev(host->mmc), "phy_init: Internal clock never stabilized.\n"); + + return err; +} + /* * eMMC 5.0/5.1 PHY init/re-init. * eMMC PHY init should be executed after: @@ -232,6 +248,11 @@ static int xenon_emmc_phy_init(struct sdhci_host *host) struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host); struct xenon_emmc_phy_regs *phy_regs = priv->emmc_phy_regs; + int ret = xenon_check_stability_internal_clk(host); + + if (ret) + return ret; + reg = sdhci_readl(host, phy_regs->timing_adj); reg |= XENON_PHY_INITIALIZAION; sdhci_writel(host, reg, phy_regs->timing_adj); @@ -259,18 +280,27 @@ static int xenon_emmc_phy_init(struct sdhci_host *host) /* get the wait time */ wait /= clock; wait++; - /* wait for host eMMC PHY init completes */ - udelay(wait); - reg = sdhci_readl(host, phy_regs->timing_adj); - reg &= XENON_PHY_INITIALIZAION; - if (reg) { + /* + * AC5X spec says bit must be polled until zero. + * We see cases in which timeout can take longer + * than the standard calculation on AC5X, which is + * expected following the spec comment above. + * According to the spec, we must wait as long as + * it takes for that bit to toggle on AC5X. + * Cap that with 100 delay loops so we won't get + * stuck here forever: + */ + + ret = read_poll_timeout(sdhci_readl, reg, + !(reg & XENON_PHY_INITIALIZAION), + wait, XENON_MAX_PHY_TIMEOUT_LOOPS * wait, + false, host, phy_regs->timing_adj); + if (ret) dev_err(mmc_dev(host->mmc), "eMMC PHY init cannot complete after %d us\n", - wait); - return -ETIMEDOUT; - } + wait * XENON_MAX_PHY_TIMEOUT_LOOPS); - return 0; + return ret; } #define ARMADA_3700_SOC_PAD_1_8V 0x1 diff --git a/drivers/mtd/nand/spi/gigadevice.c b/drivers/mtd/nand/spi/gigadevice.c index 6b043e248..9116ee7f0 100644 --- a/drivers/mtd/nand/spi/gigadevice.c +++ b/drivers/mtd/nand/spi/gigadevice.c @@ -186,7 +186,7 @@ static int gd5fxgq4uexxg_ecc_get_status(struct spinand_device *spinand, { u8 status2; struct spi_mem_op op = SPINAND_GET_FEATURE_OP(GD5FXGQXXEXXG_REG_STATUS2, - &status2); + spinand->scratchbuf); int ret; switch (status & STATUS_ECC_MASK) { @@ -207,6 +207,7 @@ static int gd5fxgq4uexxg_ecc_get_status(struct spinand_device *spinand, * report the maximum of 4 in this case */ /* bits sorted this way (3...0): ECCS1,ECCS0,ECCSE1,ECCSE0 */ + status2 = *(spinand->scratchbuf); return ((status & STATUS_ECC_MASK) >> 2) | ((status2 & STATUS_ECC_MASK) >> 4); @@ -228,7 +229,7 @@ static int gd5fxgq5xexxg_ecc_get_status(struct spinand_device *spinand, { u8 status2; struct spi_mem_op op = SPINAND_GET_FEATURE_OP(GD5FXGQXXEXXG_REG_STATUS2, - &status2); + spinand->scratchbuf); int ret; switch (status & STATUS_ECC_MASK) { @@ -248,6 +249,7 @@ static int gd5fxgq5xexxg_ecc_get_status(struct spinand_device *spinand, * 1 ... 4 bits are flipped (and corrected) */ /* bits sorted this way (1...0): ECCSE1, ECCSE0 */ + status2 = *(spinand->scratchbuf); return ((status2 & STATUS_ECC_MASK) >> 4) + 1; case STATUS_ECC_UNCOR_ERROR: diff --git a/drivers/mtd/parsers/ofpart_core.c b/drivers/mtd/parsers/ofpart_core.c index 192190c42..e7b8e9d0a 100644 --- a/drivers/mtd/parsers/ofpart_core.c +++ b/drivers/mtd/parsers/ofpart_core.c @@ -122,6 +122,25 @@ static int parse_fixed_partitions(struct mtd_info *master, a_cells = of_n_addr_cells(pp); s_cells = of_n_size_cells(pp); + if (!dedicated && s_cells == 0) { + /* + * This is a ugly workaround to not create + * regression on devices that are still creating + * partitions as direct children of the nand controller. + * This can happen in case the nand controller node has + * #size-cells equal to 0 and the firmware (e.g. + * U-Boot) just add the partitions there assuming + * 32-bit addressing. + * + * If you get this warning your firmware and/or DTS + * should be really fixed. + * + * This is working only for devices smaller than 4GiB. + */ + pr_warn("%s: ofpart partition %pOF (%pOF) #size-cells is wrongly set to <0>, assuming <1> for parsing partitions.\n", + master->name, pp, mtd_node); + s_cells = 1; + } if (len / 4 != a_cells + s_cells) { pr_debug("%s: ofpart partition %pOF (%pOF) error parsing reg property.\n", master->name, pp, diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index fc5da5d77..9c4c2c7d9 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -985,7 +985,8 @@ static int alb_upper_dev_walk(struct net_device *upper, if (netif_is_macvlan(upper) && !strict_match) { tags = bond_verify_device_path(bond->dev, upper, 0); if (IS_ERR_OR_NULL(tags)) - BUG(); + return -ENOMEM; + alb_send_lp_vid(slave, upper->dev_addr, tags[0].vlan_proto, tags[0].vlan_id); kfree(tags); diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c index 8efa22d9f..053d375ea 100644 --- a/drivers/net/can/dev/netlink.c +++ b/drivers/net/can/dev/netlink.c @@ -311,7 +311,7 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[], /* Neither of TDC parameters nor TDC flags are * provided: do calculation */ - can_calc_tdco(&priv->tdc, priv->tdc_const, &priv->data_bittiming, + can_calc_tdco(&priv->tdc, priv->tdc_const, &dbt, &priv->ctrlmode, priv->ctrlmode_supported); } /* else: both CAN_CTRLMODE_TDC_{AUTO,MANUAL} are explicitly * turned off. TDC is disabled: do nothing diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index c63e082dc..934600ecc 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -49,9 +49,9 @@ static int ksz8_ind_write8(struct ksz_device *dev, u8 table, u16 addr, u8 data) mutex_lock(&dev->alu_mutex); ctrl_addr = IND_ACC_TABLE(table) | addr; - ret = ksz_write8(dev, regs[REG_IND_BYTE], data); + ret = ksz_write16(dev, regs[REG_IND_CTRL_0], ctrl_addr); if (!ret) - ret = ksz_write16(dev, regs[REG_IND_CTRL_0], ctrl_addr); + ret = ksz_write8(dev, regs[REG_IND_BYTE], data); mutex_unlock(&dev->alu_mutex); diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index e693154cf..97a47d874 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -601,8 +601,8 @@ struct mv88e6xxx_ops { int (*serdes_get_sset_count)(struct mv88e6xxx_chip *chip, int port); int (*serdes_get_strings)(struct mv88e6xxx_chip *chip, int port, uint8_t *data); - int (*serdes_get_stats)(struct mv88e6xxx_chip *chip, int port, - uint64_t *data); + size_t (*serdes_get_stats)(struct mv88e6xxx_chip *chip, int port, + uint64_t *data); /* SERDES registers for ethtool */ int (*serdes_get_regs_len)(struct mv88e6xxx_chip *chip, int port); diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c index d94150d8f..4c38df498 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.c +++ b/drivers/net/dsa/mv88e6xxx/serdes.c @@ -342,8 +342,8 @@ static uint64_t mv88e6352_serdes_get_stat(struct mv88e6xxx_chip *chip, return val; } -int mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port, - uint64_t *data) +size_t mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port, + uint64_t *data) { struct mv88e6xxx_port *mv88e6xxx_port = &chip->ports[port]; struct mv88e6352_serdes_hw_stat *stat; @@ -352,7 +352,7 @@ int mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port, err = mv88e6352_g2_scratch_port_has_serdes(chip, port); if (err <= 0) - return err; + return 0; BUILD_BUG_ON(ARRAY_SIZE(mv88e6352_serdes_hw_stats) > ARRAY_SIZE(mv88e6xxx_port->serdes_stats)); @@ -798,8 +798,8 @@ static uint64_t mv88e6390_serdes_get_stat(struct mv88e6xxx_chip *chip, int lane, return reg[0] | ((u64)reg[1] << 16) | ((u64)reg[2] << 32); } -int mv88e6390_serdes_get_stats(struct mv88e6xxx_chip *chip, int port, - uint64_t *data) +size_t mv88e6390_serdes_get_stats(struct mv88e6xxx_chip *chip, int port, + uint64_t *data) { struct mv88e6390_serdes_hw_stat *stat; int lane; diff --git a/drivers/net/dsa/mv88e6xxx/serdes.h b/drivers/net/dsa/mv88e6xxx/serdes.h index 29bb4e91e..673690549 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.h +++ b/drivers/net/dsa/mv88e6xxx/serdes.h @@ -165,13 +165,13 @@ irqreturn_t mv88e6393x_serdes_irq_status(struct mv88e6xxx_chip *chip, int port, int mv88e6352_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port); int mv88e6352_serdes_get_strings(struct mv88e6xxx_chip *chip, int port, uint8_t *data); -int mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port, - uint64_t *data); +size_t mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port, + uint64_t *data); int mv88e6390_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port); int mv88e6390_serdes_get_strings(struct mv88e6xxx_chip *chip, int port, uint8_t *data); -int mv88e6390_serdes_get_stats(struct mv88e6xxx_chip *chip, int port, - uint64_t *data); +size_t mv88e6390_serdes_get_stats(struct mv88e6xxx_chip *chip, int port, + uint64_t *data); int mv88e6352_serdes_get_regs_len(struct mv88e6xxx_chip *chip, int port); void mv88e6352_serdes_get_regs(struct mv88e6xxx_chip *chip, int port, void *_p); diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c index 1e94ba103..641692f71 100644 --- a/drivers/net/dsa/qca/qca8k-8xxx.c +++ b/drivers/net/dsa/qca/qca8k-8xxx.c @@ -842,10 +842,15 @@ qca8k_mdio_register(struct qca8k_priv *priv) struct dsa_switch *ds = priv->ds; struct device_node *mdio; struct mii_bus *bus; + int err; + + mdio = of_get_child_by_name(priv->dev->of_node, "mdio"); bus = devm_mdiobus_alloc(ds->dev); - if (!bus) - return -ENOMEM; + if (!bus) { + err = -ENOMEM; + goto out_put_node; + } bus->priv = (void *)priv; snprintf(bus->id, MII_BUS_ID_SIZE, "qca8k-%d.%d", @@ -855,12 +860,12 @@ qca8k_mdio_register(struct qca8k_priv *priv) ds->slave_mii_bus = bus; /* Check if the devicetree declare the port:phy mapping */ - mdio = of_get_child_by_name(priv->dev->of_node, "mdio"); if (of_device_is_available(mdio)) { bus->name = "qca8k slave mii"; bus->read = qca8k_internal_mdio_read; bus->write = qca8k_internal_mdio_write; - return devm_of_mdiobus_register(priv->dev, bus, mdio); + err = devm_of_mdiobus_register(priv->dev, bus, mdio); + goto out_put_node; } /* If a mapping can't be found the legacy mapping is used, @@ -869,7 +874,13 @@ qca8k_mdio_register(struct qca8k_priv *priv) bus->name = "qca8k-legacy slave mii"; bus->read = qca8k_legacy_mdio_read; bus->write = qca8k_legacy_mdio_write; - return devm_mdiobus_register(priv->dev, bus); + + err = devm_mdiobus_register(priv->dev, bus); + +out_put_node: + of_node_put(mdio); + + return err; } static int @@ -1924,12 +1935,11 @@ qca8k_sw_probe(struct mdio_device *mdiodev) priv->info = of_device_get_match_data(priv->dev); priv->reset_gpio = devm_gpiod_get_optional(priv->dev, "reset", - GPIOD_ASIS); + GPIOD_OUT_HIGH); if (IS_ERR(priv->reset_gpio)) return PTR_ERR(priv->reset_gpio); if (priv->reset_gpio) { - gpiod_set_value_cansleep(priv->reset_gpio, 1); /* The active low duration must be greater than 10 ms * and checkpatch.pl wants 20 ms. */ diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index 1917da784..5a274b99f 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -84,7 +84,6 @@ source "drivers/net/ethernet/huawei/Kconfig" source "drivers/net/ethernet/i825xx/Kconfig" source "drivers/net/ethernet/ibm/Kconfig" source "drivers/net/ethernet/intel/Kconfig" -source "drivers/net/ethernet/wangxun/Kconfig" source "drivers/net/ethernet/xscale/Kconfig" config JME @@ -189,6 +188,7 @@ source "drivers/net/ethernet/toshiba/Kconfig" source "drivers/net/ethernet/tundra/Kconfig" source "drivers/net/ethernet/vertexcom/Kconfig" source "drivers/net/ethernet/via/Kconfig" +source "drivers/net/ethernet/wangxun/Kconfig" source "drivers/net/ethernet/wiznet/Kconfig" source "drivers/net/ethernet/xilinx/Kconfig" source "drivers/net/ethernet/xircom/Kconfig" diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c index 28c9b6f1a..5acb3e16b 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c @@ -953,8 +953,6 @@ int aq_ptp_ring_alloc(struct aq_nic_s *aq_nic) { struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp; unsigned int tx_ring_idx, rx_ring_idx; - struct aq_ring_s *hwts; - struct aq_ring_s *ring; int err; if (!aq_ptp) @@ -962,29 +960,23 @@ int aq_ptp_ring_alloc(struct aq_nic_s *aq_nic) tx_ring_idx = aq_ptp_ring_idx(aq_nic->aq_nic_cfg.tc_mode); - ring = aq_ring_tx_alloc(&aq_ptp->ptp_tx, aq_nic, - tx_ring_idx, &aq_nic->aq_nic_cfg); - if (!ring) { - err = -ENOMEM; + err = aq_ring_tx_alloc(&aq_ptp->ptp_tx, aq_nic, + tx_ring_idx, &aq_nic->aq_nic_cfg); + if (err) goto err_exit; - } rx_ring_idx = aq_ptp_ring_idx(aq_nic->aq_nic_cfg.tc_mode); - ring = aq_ring_rx_alloc(&aq_ptp->ptp_rx, aq_nic, - rx_ring_idx, &aq_nic->aq_nic_cfg); - if (!ring) { - err = -ENOMEM; + err = aq_ring_rx_alloc(&aq_ptp->ptp_rx, aq_nic, + rx_ring_idx, &aq_nic->aq_nic_cfg); + if (err) goto err_exit_ptp_tx; - } - hwts = aq_ring_hwts_rx_alloc(&aq_ptp->hwts_rx, aq_nic, PTP_HWST_RING_IDX, - aq_nic->aq_nic_cfg.rxds, - aq_nic->aq_nic_cfg.aq_hw_caps->rxd_size); - if (!hwts) { - err = -ENOMEM; + err = aq_ring_hwts_rx_alloc(&aq_ptp->hwts_rx, aq_nic, PTP_HWST_RING_IDX, + aq_nic->aq_nic_cfg.rxds, + aq_nic->aq_nic_cfg.aq_hw_caps->rxd_size); + if (err) goto err_exit_ptp_rx; - } err = aq_ptp_skb_ring_init(&aq_ptp->skb_ring, aq_nic->aq_nic_cfg.rxds); if (err != 0) { @@ -1001,7 +993,7 @@ int aq_ptp_ring_alloc(struct aq_nic_s *aq_nic) return 0; err_exit_hwts_rx: - aq_ring_free(&aq_ptp->hwts_rx); + aq_ring_hwts_rx_free(&aq_ptp->hwts_rx); err_exit_ptp_rx: aq_ring_free(&aq_ptp->ptp_rx); err_exit_ptp_tx: @@ -1019,7 +1011,7 @@ void aq_ptp_ring_free(struct aq_nic_s *aq_nic) aq_ring_free(&aq_ptp->ptp_tx); aq_ring_free(&aq_ptp->ptp_rx); - aq_ring_free(&aq_ptp->hwts_rx); + aq_ring_hwts_rx_free(&aq_ptp->hwts_rx); aq_ptp_skb_ring_release(&aq_ptp->skb_ring); } diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 4d9d7d1ed..0eaaba3a1 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -132,8 +132,8 @@ static int aq_get_rxpages(struct aq_ring_s *self, struct aq_ring_buff_s *rxbuf) return 0; } -static struct aq_ring_s *aq_ring_alloc(struct aq_ring_s *self, - struct aq_nic_s *aq_nic) +static int aq_ring_alloc(struct aq_ring_s *self, + struct aq_nic_s *aq_nic) { int err = 0; @@ -156,46 +156,29 @@ static struct aq_ring_s *aq_ring_alloc(struct aq_ring_s *self, err_exit: if (err < 0) { aq_ring_free(self); - self = NULL; } - return self; + return err; } -struct aq_ring_s *aq_ring_tx_alloc(struct aq_ring_s *self, - struct aq_nic_s *aq_nic, - unsigned int idx, - struct aq_nic_cfg_s *aq_nic_cfg) +int aq_ring_tx_alloc(struct aq_ring_s *self, + struct aq_nic_s *aq_nic, + unsigned int idx, + struct aq_nic_cfg_s *aq_nic_cfg) { - int err = 0; - self->aq_nic = aq_nic; self->idx = idx; self->size = aq_nic_cfg->txds; self->dx_size = aq_nic_cfg->aq_hw_caps->txd_size; - self = aq_ring_alloc(self, aq_nic); - if (!self) { - err = -ENOMEM; - goto err_exit; - } - -err_exit: - if (err < 0) { - aq_ring_free(self); - self = NULL; - } - - return self; + return aq_ring_alloc(self, aq_nic); } -struct aq_ring_s *aq_ring_rx_alloc(struct aq_ring_s *self, - struct aq_nic_s *aq_nic, - unsigned int idx, - struct aq_nic_cfg_s *aq_nic_cfg) +int aq_ring_rx_alloc(struct aq_ring_s *self, + struct aq_nic_s *aq_nic, + unsigned int idx, + struct aq_nic_cfg_s *aq_nic_cfg) { - int err = 0; - self->aq_nic = aq_nic; self->idx = idx; self->size = aq_nic_cfg->rxds; @@ -217,22 +200,10 @@ struct aq_ring_s *aq_ring_rx_alloc(struct aq_ring_s *self, self->tail_size = 0; } - self = aq_ring_alloc(self, aq_nic); - if (!self) { - err = -ENOMEM; - goto err_exit; - } - -err_exit: - if (err < 0) { - aq_ring_free(self); - self = NULL; - } - - return self; + return aq_ring_alloc(self, aq_nic); } -struct aq_ring_s * +int aq_ring_hwts_rx_alloc(struct aq_ring_s *self, struct aq_nic_s *aq_nic, unsigned int idx, unsigned int size, unsigned int dx_size) { @@ -250,10 +221,10 @@ aq_ring_hwts_rx_alloc(struct aq_ring_s *self, struct aq_nic_s *aq_nic, GFP_KERNEL); if (!self->dx_ring) { aq_ring_free(self); - return NULL; + return -ENOMEM; } - return self; + return 0; } int aq_ring_init(struct aq_ring_s *self, const enum atl_ring_type ring_type) @@ -948,6 +919,19 @@ void aq_ring_free(struct aq_ring_s *self) } } +void aq_ring_hwts_rx_free(struct aq_ring_s *self) +{ + if (!self) + return; + + if (self->dx_ring) { + dma_free_coherent(aq_nic_get_dev(self->aq_nic), + self->size * self->dx_size + AQ_CFG_RXDS_DEF, + self->dx_ring, self->dx_ring_pa); + self->dx_ring = NULL; + } +} + unsigned int aq_ring_fill_stats_data(struct aq_ring_s *self, u64 *data) { unsigned int count; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h index 0a6c34438..d627ace85 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h @@ -183,14 +183,14 @@ static inline unsigned int aq_ring_avail_dx(struct aq_ring_s *self) self->sw_head - self->sw_tail - 1); } -struct aq_ring_s *aq_ring_tx_alloc(struct aq_ring_s *self, - struct aq_nic_s *aq_nic, - unsigned int idx, - struct aq_nic_cfg_s *aq_nic_cfg); -struct aq_ring_s *aq_ring_rx_alloc(struct aq_ring_s *self, - struct aq_nic_s *aq_nic, - unsigned int idx, - struct aq_nic_cfg_s *aq_nic_cfg); +int aq_ring_tx_alloc(struct aq_ring_s *self, + struct aq_nic_s *aq_nic, + unsigned int idx, + struct aq_nic_cfg_s *aq_nic_cfg); +int aq_ring_rx_alloc(struct aq_ring_s *self, + struct aq_nic_s *aq_nic, + unsigned int idx, + struct aq_nic_cfg_s *aq_nic_cfg); int aq_ring_init(struct aq_ring_s *self, const enum atl_ring_type ring_type); void aq_ring_rx_deinit(struct aq_ring_s *self); @@ -207,9 +207,10 @@ int aq_ring_rx_clean(struct aq_ring_s *self, int budget); int aq_ring_rx_fill(struct aq_ring_s *self); -struct aq_ring_s *aq_ring_hwts_rx_alloc(struct aq_ring_s *self, - struct aq_nic_s *aq_nic, unsigned int idx, - unsigned int size, unsigned int dx_size); +int aq_ring_hwts_rx_alloc(struct aq_ring_s *self, + struct aq_nic_s *aq_nic, unsigned int idx, + unsigned int size, unsigned int dx_size); +void aq_ring_hwts_rx_free(struct aq_ring_s *self); void aq_ring_hwts_rx_clean(struct aq_ring_s *self, struct aq_nic_s *aq_nic); unsigned int aq_ring_fill_stats_data(struct aq_ring_s *self, u64 *data); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c index f5db1c44e..9769ab4f9 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c @@ -136,35 +136,32 @@ int aq_vec_ring_alloc(struct aq_vec_s *self, struct aq_nic_s *aq_nic, const unsigned int idx_ring = AQ_NIC_CFG_TCVEC2RING(aq_nic_cfg, i, idx); - ring = aq_ring_tx_alloc(&self->ring[i][AQ_VEC_TX_ID], aq_nic, - idx_ring, aq_nic_cfg); - if (!ring) { - err = -ENOMEM; + ring = &self->ring[i][AQ_VEC_TX_ID]; + err = aq_ring_tx_alloc(ring, aq_nic, idx_ring, aq_nic_cfg); + if (err) goto err_exit; - } ++self->tx_rings; aq_nic_set_tx_ring(aq_nic, idx_ring, ring); - if (xdp_rxq_info_reg(&self->ring[i][AQ_VEC_RX_ID].xdp_rxq, + ring = &self->ring[i][AQ_VEC_RX_ID]; + if (xdp_rxq_info_reg(&ring->xdp_rxq, aq_nic->ndev, idx, self->napi.napi_id) < 0) { err = -ENOMEM; goto err_exit; } - if (xdp_rxq_info_reg_mem_model(&self->ring[i][AQ_VEC_RX_ID].xdp_rxq, + if (xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, MEM_TYPE_PAGE_SHARED, NULL) < 0) { - xdp_rxq_info_unreg(&self->ring[i][AQ_VEC_RX_ID].xdp_rxq); + xdp_rxq_info_unreg(&ring->xdp_rxq); err = -ENOMEM; goto err_exit; } - ring = aq_ring_rx_alloc(&self->ring[i][AQ_VEC_RX_ID], aq_nic, - idx_ring, aq_nic_cfg); - if (!ring) { - xdp_rxq_info_unreg(&self->ring[i][AQ_VEC_RX_ID].xdp_rxq); - err = -ENOMEM; + err = aq_ring_rx_alloc(ring, aq_nic, idx_ring, aq_nic_cfg); + if (err) { + xdp_rxq_info_unreg(&ring->xdp_rxq); goto err_exit; } diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index 588d64819..e84e944d7 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c @@ -350,6 +350,7 @@ static void gve_tx_fill_pkt_desc_dqo(struct gve_tx_ring *tx, u32 *desc_idx, /* Validates and prepares `skb` for TSO. * * Returns header length, or < 0 if invalid. + * Warning : Might change skb->head (and thus skb_shinfo). */ static int gve_prep_tso(struct sk_buff *skb) { @@ -451,8 +452,8 @@ gve_tx_fill_general_ctx_desc(struct gve_tx_general_context_desc_dqo *desc, static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx, struct sk_buff *skb) { - const struct skb_shared_info *shinfo = skb_shinfo(skb); const bool is_gso = skb_is_gso(skb); + struct skb_shared_info *shinfo; u32 desc_idx = tx->dqo_tx.tail; struct gve_tx_pending_packet_dqo *pkt; @@ -477,6 +478,8 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx, desc_idx = (desc_idx + 1) & tx->mask; } + /* Must get after gve_prep_tso(), which can change shinfo. */ + shinfo = skb_shinfo(skb); gve_tx_fill_general_ctx_desc(&tx->dqo.tx_ring[desc_idx].general_ctx, &metadata); desc_idx = (desc_idx + 1) & tx->mask; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 63d43ef86..d8a7fb21b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5333,7 +5333,7 @@ static int i40e_pf_wait_queues_disabled(struct i40e_pf *pf) { int v, ret = 0; - for (v = 0; v < pf->hw.func_caps.num_vsis; v++) { + for (v = 0; v < pf->num_alloc_vsi; v++) { if (pf->vsi[v]) { ret = i40e_vsi_wait_queues_disabled(pf->vsi[v]); if (ret) @@ -13569,9 +13569,9 @@ int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair) return err; i40e_queue_pair_disable_irq(vsi, queue_pair); + i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */); err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */); i40e_clean_rx_ring(vsi->rx_rings[queue_pair]); - i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */); i40e_queue_pair_clean_rings(vsi, queue_pair); i40e_queue_pair_reset_stats(vsi, queue_pair); diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index c7d761426..ed4be80fe 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2603,6 +2603,14 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg) int aq_ret = 0; int i; + if (vf->is_disabled_from_host) { + aq_ret = -EPERM; + dev_info(&pf->pdev->dev, + "Admin has disabled VF %d, will not enable queues\n", + vf->vf_id); + goto error_param; + } + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { aq_ret = I40E_ERR_PARAM; goto error_param; @@ -2838,6 +2846,24 @@ error_param: (u8 *)&stats, sizeof(stats)); } +/** + * i40e_can_vf_change_mac + * @vf: pointer to the VF info + * + * Return true if the VF is allowed to change its MAC filters, false otherwise + */ +static bool i40e_can_vf_change_mac(struct i40e_vf *vf) +{ + /* If the VF MAC address has been set administratively (via the + * ndo_set_vf_mac command), then deny permission to the VF to + * add/delete unicast MAC addresses, unless the VF is trusted + */ + if (vf->pf_set_mac && !vf->trusted) + return false; + + return true; +} + #define I40E_MAX_MACVLAN_PER_HW 3072 #define I40E_MAX_MACVLAN_PER_PF(num_ports) (I40E_MAX_MACVLAN_PER_HW / \ (num_ports)) @@ -2897,8 +2923,8 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf, * The VF may request to set the MAC address filter already * assigned to it so do not return an error in that case. */ - if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) && - !is_multicast_ether_addr(addr) && vf->pf_set_mac && + if (!i40e_can_vf_change_mac(vf) && + !is_multicast_ether_addr(addr) && !ether_addr_equal(addr, vf->default_lan_addr.addr)) { dev_err(&pf->pdev->dev, "VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n"); @@ -3041,19 +3067,29 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg) ret = I40E_ERR_INVALID_MAC_ADDR; goto error_param; } - if (ether_addr_equal(al->list[i].addr, vf->default_lan_addr.addr)) - was_unimac_deleted = true; } vsi = pf->vsi[vf->lan_vsi_idx]; spin_lock_bh(&vsi->mac_filter_hash_lock); /* delete addresses from the list */ - for (i = 0; i < al->num_elements; i++) + for (i = 0; i < al->num_elements; i++) { + const u8 *addr = al->list[i].addr; + + /* Allow to delete VF primary MAC only if it was not set + * administratively by PF or if VF is trusted. + */ + if (ether_addr_equal(addr, vf->default_lan_addr.addr) && + i40e_can_vf_change_mac(vf)) + was_unimac_deleted = true; + else + continue; + if (i40e_del_mac_filter(vsi, al->list[i].addr)) { ret = I40E_ERR_INVALID_MAC_ADDR; spin_unlock_bh(&vsi->mac_filter_hash_lock); goto error_param; } + } spin_unlock_bh(&vsi->mac_filter_hash_lock); @@ -4656,9 +4692,12 @@ int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link) struct i40e_link_status *ls = &pf->hw.phy.link_info; struct virtchnl_pf_event pfe; struct i40e_hw *hw = &pf->hw; + struct i40e_vsi *vsi; + unsigned long q_map; struct i40e_vf *vf; int abs_vf_id; int ret = 0; + int tmp; if (test_and_set_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state)) { dev_warn(&pf->pdev->dev, "Unable to configure VFs, other operation is pending.\n"); @@ -4681,17 +4720,38 @@ int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link) switch (link) { case IFLA_VF_LINK_STATE_AUTO: vf->link_forced = false; + vf->is_disabled_from_host = false; + /* reset needed to reinit VF resources */ + i40e_vc_reset_vf(vf, true); i40e_set_vf_link_state(vf, &pfe, ls); break; case IFLA_VF_LINK_STATE_ENABLE: vf->link_forced = true; vf->link_up = true; + vf->is_disabled_from_host = false; + /* reset needed to reinit VF resources */ + i40e_vc_reset_vf(vf, true); i40e_set_vf_link_state(vf, &pfe, ls); break; case IFLA_VF_LINK_STATE_DISABLE: vf->link_forced = true; vf->link_up = false; i40e_set_vf_link_state(vf, &pfe, ls); + + vsi = pf->vsi[vf->lan_vsi_idx]; + q_map = BIT(vsi->num_queue_pairs) - 1; + + vf->is_disabled_from_host = true; + + /* Try to stop both Tx&Rx rings even if one of the calls fails + * to ensure we stop the rings even in case of errors. + * If any of them returns with an error then the first + * error that occurred will be returned. + */ + tmp = i40e_ctrl_vf_tx_rings(vsi, q_map, false); + ret = i40e_ctrl_vf_rx_rings(vsi, q_map, false); + + ret = tmp ? tmp : ret; break; default: ret = -EINVAL; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index bd497cc53..97e9c34d7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -98,6 +98,7 @@ struct i40e_vf { bool link_forced; bool link_up; /* only valid if VF link is forced */ bool spoofchk; + bool is_disabled_from_host; /* PF ctrl of VF enable/disable */ u16 num_vlan; /* ADq related variables */ diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 1bdc70aa9..fe48164dc 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -413,10 +413,10 @@ struct ice_aqc_vsi_props { #define ICE_AQ_VSI_INNER_VLAN_INSERT_PVID BIT(2) #define ICE_AQ_VSI_INNER_VLAN_EMODE_S 3 #define ICE_AQ_VSI_INNER_VLAN_EMODE_M (0x3 << ICE_AQ_VSI_INNER_VLAN_EMODE_S) -#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR_BOTH (0x0 << ICE_AQ_VSI_INNER_VLAN_EMODE_S) -#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR_UP (0x1 << ICE_AQ_VSI_INNER_VLAN_EMODE_S) -#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR (0x2 << ICE_AQ_VSI_INNER_VLAN_EMODE_S) -#define ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING (0x3 << ICE_AQ_VSI_INNER_VLAN_EMODE_S) +#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR_BOTH 0x0U +#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR_UP 0x1U +#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR 0x2U +#define ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING 0x3U u8 inner_vlan_reserved2[3]; /* ingress egress up sections */ __le32 ingress_table; /* bitmap, 3 bits per up */ @@ -482,11 +482,11 @@ struct ice_aqc_vsi_props { #define ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_S 2 #define ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_M (0xF << ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_S) #define ICE_AQ_VSI_Q_OPT_RSS_HASH_S 6 -#define ICE_AQ_VSI_Q_OPT_RSS_HASH_M (0x3 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) -#define ICE_AQ_VSI_Q_OPT_RSS_TPLZ (0x0 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) -#define ICE_AQ_VSI_Q_OPT_RSS_SYM_TPLZ (0x1 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) -#define ICE_AQ_VSI_Q_OPT_RSS_XOR (0x2 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) -#define ICE_AQ_VSI_Q_OPT_RSS_JHASH (0x3 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) +#define ICE_AQ_VSI_Q_OPT_RSS_HASH_M GENMASK(7, 6) +#define ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ 0x0U +#define ICE_AQ_VSI_Q_OPT_RSS_HASH_SYM_TPLZ 0x1U +#define ICE_AQ_VSI_Q_OPT_RSS_HASH_XOR 0x2U +#define ICE_AQ_VSI_Q_OPT_RSS_HASH_JHASH 0x3U u8 q_opt_tc; #define ICE_AQ_VSI_Q_OPT_TC_OVR_S 0 #define ICE_AQ_VSI_Q_OPT_TC_OVR_M (0x1F << ICE_AQ_VSI_Q_OPT_TC_OVR_S) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index c051503c3..cc6c04a69 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -878,7 +878,8 @@ static void ice_set_dflt_vsi_ctx(struct ice_hw *hw, struct ice_vsi_ctx *ctxt) */ if (ice_is_dvm_ena(hw)) { ctxt->info.inner_vlan_flags |= - ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING; + FIELD_PREP(ICE_AQ_VSI_INNER_VLAN_EMODE_M, + ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING); ctxt->info.outer_vlan_flags = (ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ALL << ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S) & @@ -1085,12 +1086,12 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi) case ICE_VSI_PF: /* PF VSI will inherit RSS instance of PF */ lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_PF; - hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ; + hash_type = ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ; break; case ICE_VSI_VF: /* VF VSI will gets a small RSS table which is a VSI LUT type */ lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI; - hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ; + hash_type = ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ; break; default: dev_dbg(dev, "Unsupported VSI type %s\n", diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index ab46cfca4..3117f6525 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -7681,6 +7681,8 @@ ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, pf_sw = pf->first_sw; /* find the attribute in the netlink message */ br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (!br_spec) + return -EINVAL; nla_for_each_nested(attr, br_spec, rem) { __u16 mode; diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index 2b4c791b6..4b71392f6 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -440,7 +440,6 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vf->driver_caps = *(u32 *)msg; else vf->driver_caps = VIRTCHNL_VF_OFFLOAD_L2 | - VIRTCHNL_VF_OFFLOAD_RSS_REG | VIRTCHNL_VF_OFFLOAD_VLAN; vfres->vf_cap_flags = VIRTCHNL_VF_OFFLOAD_L2; @@ -453,14 +452,8 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vfres->vf_cap_flags |= ice_vc_get_vlan_caps(hw, vf, vsi, vf->driver_caps); - if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) { + if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PF; - } else { - if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_AQ) - vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_AQ; - else - vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_REG; - } if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_FDIR_PF) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_FDIR_PF; @@ -817,8 +810,8 @@ static int ice_vc_handle_rss_cfg(struct ice_vf *vf, u8 *msg, bool add) int status; lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI; - hash_type = add ? ICE_AQ_VSI_Q_OPT_RSS_XOR : - ICE_AQ_VSI_Q_OPT_RSS_TPLZ; + hash_type = add ? ICE_AQ_VSI_Q_OPT_RSS_HASH_XOR : + ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) { @@ -826,11 +819,9 @@ static int ice_vc_handle_rss_cfg(struct ice_vf *vf, u8 *msg, bool add) goto error_param; } - ctx->info.q_opt_rss = ((lut_type << - ICE_AQ_VSI_Q_OPT_RSS_LUT_S) & - ICE_AQ_VSI_Q_OPT_RSS_LUT_M) | - (hash_type & - ICE_AQ_VSI_Q_OPT_RSS_HASH_M); + ctx->info.q_opt_rss = + FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_LUT_M, lut_type) | + FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_HASH_M, hash_type); /* Preserve existing queueing option setting */ ctx->info.q_opt_rss |= (vsi->info.q_opt_rss & diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c index 5a82216e7..63e83e8b9 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c @@ -13,8 +13,6 @@ * - opcodes needed by VF when caps are activated * * Caps that don't use new opcodes (no opcodes should be allowed): - * - VIRTCHNL_VF_OFFLOAD_RSS_AQ - * - VIRTCHNL_VF_OFFLOAD_RSS_REG * - VIRTCHNL_VF_OFFLOAD_WB_ON_ITR * - VIRTCHNL_VF_OFFLOAD_CRC * - VIRTCHNL_VF_OFFLOAD_RX_POLLING diff --git a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c index 5b4a0abb4..239266e9d 100644 --- a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c @@ -131,6 +131,7 @@ static int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena) { struct ice_hw *hw = &vsi->back->hw; struct ice_vsi_ctx *ctxt; + u8 *ivf; int err; /* do not allow modifying VLAN stripping when a port VLAN is configured @@ -143,19 +144,24 @@ static int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena) if (!ctxt) return -ENOMEM; + ivf = &ctxt->info.inner_vlan_flags; + /* Here we are configuring what the VSI should do with the VLAN tag in * the Rx packet. We can either leave the tag in the packet or put it in * the Rx descriptor. */ - if (ena) + if (ena) { /* Strip VLAN tag from Rx packet and put it in the desc */ - ctxt->info.inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_EMODE_STR_BOTH; - else + *ivf = FIELD_PREP(ICE_AQ_VSI_INNER_VLAN_EMODE_M, + ICE_AQ_VSI_INNER_VLAN_EMODE_STR_BOTH); + } else { /* Disable stripping. Leave tag in packet */ - ctxt->info.inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING; + *ivf = FIELD_PREP(ICE_AQ_VSI_INNER_VLAN_EMODE_M, + ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING); + } /* Allow all packets untagged/tagged */ - ctxt->info.inner_vlan_flags |= ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL; + *ivf |= ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL; ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID); diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 41ee081eb..48cf24709 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -171,6 +171,10 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) return -EBUSY; usleep_range(1000, 2000); } + + ice_qvec_dis_irq(vsi, rx_ring, q_vector); + ice_qvec_toggle_napi(vsi, q_vector, false); + netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); ice_fill_txq_meta(vsi, tx_ring, &txq_meta); @@ -187,13 +191,10 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) if (err) return err; } - ice_qvec_dis_irq(vsi, rx_ring, q_vector); - err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true); if (err) return err; - ice_qvec_toggle_napi(vsi, q_vector, false); ice_qp_clean_rings(vsi, q_idx); ice_qp_reset_stats(vsi, q_idx); @@ -256,11 +257,11 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) if (err) goto free_buf; - clear_bit(ICE_CFG_BUSY, vsi->state); ice_qvec_toggle_napi(vsi, q_vector, true); ice_qvec_ena_irq(vsi, q_vector); netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); + clear_bit(ICE_CFG_BUSY, vsi->state); free_buf: kfree(qg_buf); return err; diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 07171e574..36e62197f 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -976,7 +976,7 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) igb_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval); /* adjust timestamp for the TX latency based on link speed */ - if (adapter->hw.mac.type == e1000_i210) { + if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) { switch (adapter->link_speed) { case SPEED_10: adjust = IGB_I210_TX_LATENCY_10; @@ -1022,6 +1022,7 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, ktime_t *timestamp) { struct igb_adapter *adapter = q_vector->adapter; + struct e1000_hw *hw = &adapter->hw; struct skb_shared_hwtstamps ts; __le64 *regval = (__le64 *)va; int adjust = 0; @@ -1041,7 +1042,7 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, igb_ptp_systim_to_hwtstamp(adapter, &ts, le64_to_cpu(regval[1])); /* adjust timestamp for the RX latency based on link speed */ - if (adapter->hw.mac.type == e1000_i210) { + if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) { switch (adapter->link_speed) { case SPEED_10: adjust = IGB_I210_RX_LATENCY_10; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 4b6f882b3..e052f49cc 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6330,7 +6330,7 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, int cpu = smp_processor_id(); struct netdev_queue *nq; struct igc_ring *ring; - int i, drops; + int i, nxmit; if (unlikely(!netif_carrier_ok(dev))) return -ENETDOWN; @@ -6346,16 +6346,15 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, /* Avoid transmit queue timeout since we share it with the slow path */ txq_trans_cond_update(nq); - drops = 0; + nxmit = 0; for (i = 0; i < num_frames; i++) { int err; struct xdp_frame *xdpf = frames[i]; err = igc_xdp_init_tx_descriptor(ring, xdpf); - if (err) { - xdp_return_frame_rx_napi(xdpf); - drops++; - } + if (err) + break; + nxmit++; } if (flags & XDP_XMIT_FLUSH) @@ -6363,7 +6362,7 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, __netif_tx_unlock(nq); - return num_frames - drops; + return nxmit; } static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c index 100388968..3d56481e1 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c @@ -123,14 +123,14 @@ static s32 ixgbe_init_phy_ops_82598(struct ixgbe_hw *hw) if (ret_val) return ret_val; if (hw->phy.sfp_type == ixgbe_sfp_type_unknown) - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; /* Check to see if SFP+ module is supported */ ret_val = ixgbe_get_sfp_init_sequence_offsets(hw, &list_offset, &data_offset); if (ret_val) - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; break; default: break; @@ -213,7 +213,7 @@ static s32 ixgbe_get_link_capabilities_82598(struct ixgbe_hw *hw, break; default: - return IXGBE_ERR_LINK_SETUP; + return -EIO; } return 0; @@ -283,7 +283,7 @@ static s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw) /* Validate the water mark configuration */ if (!hw->fc.pause_time) - return IXGBE_ERR_INVALID_LINK_SETTINGS; + return -EINVAL; /* Low water mark of zero causes XOFF floods */ for (i = 0; i < MAX_TRAFFIC_CLASS; i++) { @@ -292,7 +292,7 @@ static s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw) if (!hw->fc.low_water[i] || hw->fc.low_water[i] >= hw->fc.high_water[i]) { hw_dbg(hw, "Invalid water mark configuration\n"); - return IXGBE_ERR_INVALID_LINK_SETTINGS; + return -EINVAL; } } } @@ -369,7 +369,7 @@ static s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw) break; default: hw_dbg(hw, "Flow control param set incorrectly\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } /* Set 802.3x based flow control settings. */ @@ -438,7 +438,7 @@ static s32 ixgbe_start_mac_link_82598(struct ixgbe_hw *hw, msleep(100); } if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) { - status = IXGBE_ERR_AUTONEG_NOT_COMPLETE; + status = -EIO; hw_dbg(hw, "Autonegotiation did not complete.\n"); } } @@ -478,7 +478,7 @@ static s32 ixgbe_validate_link_ready(struct ixgbe_hw *hw) if (timeout == IXGBE_VALIDATE_LINK_READY_TIMEOUT) { hw_dbg(hw, "Link was indicated but link is down\n"); - return IXGBE_ERR_LINK_SETUP; + return -EIO; } return 0; @@ -594,7 +594,7 @@ static s32 ixgbe_setup_mac_link_82598(struct ixgbe_hw *hw, speed &= link_capabilities; if (speed == IXGBE_LINK_SPEED_UNKNOWN) - return IXGBE_ERR_LINK_SETUP; + return -EINVAL; /* Set KX4/KX support according to speed requested */ else if (link_mode == IXGBE_AUTOC_LMS_KX4_AN || @@ -701,9 +701,9 @@ static s32 ixgbe_reset_hw_82598(struct ixgbe_hw *hw) /* Init PHY and function pointers, perform SFP setup */ phy_status = hw->phy.ops.init(hw); - if (phy_status == IXGBE_ERR_SFP_NOT_SUPPORTED) + if (phy_status == -EOPNOTSUPP) return phy_status; - if (phy_status == IXGBE_ERR_SFP_NOT_PRESENT) + if (phy_status == -ENOENT) goto mac_reset_top; hw->phy.ops.reset(hw); @@ -727,7 +727,7 @@ mac_reset_top: udelay(1); } if (ctrl & IXGBE_CTRL_RST) { - status = IXGBE_ERR_RESET_FAILED; + status = -EIO; hw_dbg(hw, "Reset polling failed to complete.\n"); } @@ -789,7 +789,7 @@ static s32 ixgbe_set_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq) /* Make sure we are using a valid rar index range */ if (rar >= rar_entries) { hw_dbg(hw, "RAR index %d is out of range.\n", rar); - return IXGBE_ERR_INVALID_ARGUMENT; + return -EINVAL; } rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(rar)); @@ -814,7 +814,7 @@ static s32 ixgbe_clear_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq) /* Make sure we are using a valid rar index range */ if (rar >= rar_entries) { hw_dbg(hw, "RAR index %d is out of range.\n", rar); - return IXGBE_ERR_INVALID_ARGUMENT; + return -EINVAL; } rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(rar)); @@ -845,7 +845,7 @@ static s32 ixgbe_set_vfta_82598(struct ixgbe_hw *hw, u32 vlan, u32 vind, u32 vftabyte; if (vlan > 4095) - return IXGBE_ERR_PARAM; + return -EINVAL; /* Determine 32-bit word position in array */ regindex = (vlan >> 5) & 0x7F; /* upper seven bits */ @@ -964,7 +964,7 @@ static s32 ixgbe_read_i2c_phy_82598(struct ixgbe_hw *hw, u8 dev_addr, gssr = IXGBE_GSSR_PHY0_SM; if (hw->mac.ops.acquire_swfw_sync(hw, gssr) != 0) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; if (hw->phy.type == ixgbe_phy_nl) { /* @@ -993,7 +993,7 @@ static s32 ixgbe_read_i2c_phy_82598(struct ixgbe_hw *hw, u8 dev_addr, if (sfp_stat != IXGBE_I2C_EEPROM_STATUS_PASS) { hw_dbg(hw, "EEPROM read did not pass.\n"); - status = IXGBE_ERR_SFP_NOT_PRESENT; + status = -ENOENT; goto out; } @@ -1003,7 +1003,7 @@ static s32 ixgbe_read_i2c_phy_82598(struct ixgbe_hw *hw, u8 dev_addr, *eeprom_data = (u8)(sfp_data >> 8); } else { - status = IXGBE_ERR_PHY; + status = -EIO; } out: diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c index 58ea959a4..339e106a5 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c @@ -117,7 +117,7 @@ static s32 ixgbe_setup_sfp_modules_82599(struct ixgbe_hw *hw) ret_val = hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM); if (ret_val) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; if (hw->eeprom.ops.read(hw, ++data_offset, &data_value)) goto setup_sfp_err; @@ -144,7 +144,7 @@ static s32 ixgbe_setup_sfp_modules_82599(struct ixgbe_hw *hw) if (ret_val) { hw_dbg(hw, " sfp module setup not complete\n"); - return IXGBE_ERR_SFP_SETUP_NOT_COMPLETE; + return -EIO; } } @@ -159,7 +159,7 @@ setup_sfp_err: usleep_range(hw->eeprom.semaphore_delay * 1000, hw->eeprom.semaphore_delay * 2000); hw_err(hw, "eeprom read at offset %d failed\n", data_offset); - return IXGBE_ERR_SFP_SETUP_NOT_COMPLETE; + return -EIO; } /** @@ -184,7 +184,7 @@ static s32 prot_autoc_read_82599(struct ixgbe_hw *hw, bool *locked, ret_val = hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM); if (ret_val) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; *locked = true; } @@ -219,7 +219,7 @@ static s32 prot_autoc_write_82599(struct ixgbe_hw *hw, u32 autoc, bool locked) ret_val = hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM); if (ret_val) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; locked = true; } @@ -400,7 +400,7 @@ static s32 ixgbe_get_link_capabilities_82599(struct ixgbe_hw *hw, break; default: - return IXGBE_ERR_LINK_SETUP; + return -EIO; } if (hw->phy.multispeed_fiber) { @@ -541,7 +541,7 @@ static s32 ixgbe_start_mac_link_82599(struct ixgbe_hw *hw, msleep(100); } if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) { - status = IXGBE_ERR_AUTONEG_NOT_COMPLETE; + status = -EIO; hw_dbg(hw, "Autoneg did not complete.\n"); } } @@ -794,7 +794,7 @@ static s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw, speed &= link_capabilities; if (speed == IXGBE_LINK_SPEED_UNKNOWN) - return IXGBE_ERR_LINK_SETUP; + return -EINVAL; /* Use stored value (EEPROM defaults) of AUTOC to find KR/KX4 support*/ if (hw->mac.orig_link_settings_stored) @@ -861,8 +861,7 @@ static s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw, msleep(100); } if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) { - status = - IXGBE_ERR_AUTONEG_NOT_COMPLETE; + status = -EIO; hw_dbg(hw, "Autoneg did not complete.\n"); } } @@ -927,7 +926,7 @@ static s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw) /* Identify PHY and related function pointers */ status = hw->phy.ops.init(hw); - if (status == IXGBE_ERR_SFP_NOT_SUPPORTED) + if (status == -EOPNOTSUPP) return status; /* Setup SFP module if there is one present. */ @@ -936,7 +935,7 @@ static s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw) hw->phy.sfp_setup_needed = false; } - if (status == IXGBE_ERR_SFP_NOT_SUPPORTED) + if (status == -EOPNOTSUPP) return status; /* Reset PHY */ @@ -974,7 +973,7 @@ mac_reset_top: } if (ctrl & IXGBE_CTRL_RST_MASK) { - status = IXGBE_ERR_RESET_FAILED; + status = -EIO; hw_dbg(hw, "Reset polling failed to complete.\n"); } @@ -1093,7 +1092,7 @@ static s32 ixgbe_fdir_check_cmd_complete(struct ixgbe_hw *hw, u32 *fdircmd) udelay(10); } - return IXGBE_ERR_FDIR_CMD_INCOMPLETE; + return -EIO; } /** @@ -1155,7 +1154,7 @@ s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw) } if (i >= IXGBE_FDIR_INIT_DONE_POLL) { hw_dbg(hw, "Flow Director Signature poll time exceeded!\n"); - return IXGBE_ERR_FDIR_REINIT_FAILED; + return -EIO; } /* Clear FDIR statistics registers (read to clear) */ @@ -1387,7 +1386,7 @@ s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw, break; default: hw_dbg(hw, " Error on flow type input\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } /* configure FDIRCMD register */ @@ -1546,7 +1545,7 @@ s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw, break; default: hw_dbg(hw, " Error on vm pool mask\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } switch (input_mask->formatted.flow_type & IXGBE_ATR_L4TYPE_MASK) { @@ -1555,14 +1554,14 @@ s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw, if (input_mask->formatted.dst_port || input_mask->formatted.src_port) { hw_dbg(hw, " Error on src/dst port mask\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } break; case IXGBE_ATR_L4TYPE_MASK: break; default: hw_dbg(hw, " Error on flow type mask\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } switch (ntohs(input_mask->formatted.vlan_id) & 0xEFFF) { @@ -1583,7 +1582,7 @@ s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw, break; default: hw_dbg(hw, " Error on VLAN mask\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } switch ((__force u16)input_mask->formatted.flex_bytes & 0xFFFF) { @@ -1595,7 +1594,7 @@ s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw, break; default: hw_dbg(hw, " Error on flexible byte mask\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } /* Now mask VM pool and destination IPv6 - bits 5 and 2 */ @@ -1824,7 +1823,7 @@ static s32 ixgbe_identify_phy_82599(struct ixgbe_hw *hw) /* Return error if SFP module has been detected but is not supported */ if (hw->phy.type == ixgbe_phy_sfp_unsupported) - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; return status; } @@ -1863,13 +1862,13 @@ static s32 ixgbe_enable_rx_dma_82599(struct ixgbe_hw *hw, u32 regval) * Verifies that installed the firmware version is 0.6 or higher * for SFI devices. All 82599 SFI devices should have version 0.6 or higher. * - * Returns IXGBE_ERR_EEPROM_VERSION if the FW is not present or - * if the FW version is not supported. + * Return: -EACCES if the FW is not present or if the FW version is + * not supported. **/ static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw) { - s32 status = IXGBE_ERR_EEPROM_VERSION; u16 fw_offset, fw_ptp_cfg_offset; + s32 status = -EACCES; u16 offset; u16 fw_version = 0; @@ -1883,7 +1882,7 @@ static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw) goto fw_version_err; if (fw_offset == 0 || fw_offset == 0xFFFF) - return IXGBE_ERR_EEPROM_VERSION; + return -EACCES; /* get the offset to the Pass Through Patch Configuration block */ offset = fw_offset + IXGBE_FW_PASSTHROUGH_PATCH_CONFIG_PTR; @@ -1891,7 +1890,7 @@ static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw) goto fw_version_err; if (fw_ptp_cfg_offset == 0 || fw_ptp_cfg_offset == 0xFFFF) - return IXGBE_ERR_EEPROM_VERSION; + return -EACCES; /* get the firmware version */ offset = fw_ptp_cfg_offset + IXGBE_FW_PATCH_VERSION_4; @@ -1905,7 +1904,7 @@ static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw) fw_version_err: hw_err(hw, "eeprom read at offset %d failed\n", offset); - return IXGBE_ERR_EEPROM_VERSION; + return -EACCES; } /** @@ -2038,7 +2037,7 @@ static s32 ixgbe_reset_pipeline_82599(struct ixgbe_hw *hw) if (!(anlp1_reg & IXGBE_ANLP1_AN_STATE_MASK)) { hw_dbg(hw, "auto negotiation not completed\n"); - ret_val = IXGBE_ERR_RESET_FAILED; + ret_val = -EIO; goto reset_pipeline_out; } @@ -2087,7 +2086,7 @@ static s32 ixgbe_read_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset, if (!timeout) { hw_dbg(hw, "Driver can't access resource, acquiring I2C bus timeout.\n"); - status = IXGBE_ERR_I2C; + status = -EIO; goto release_i2c_access; } } @@ -2141,7 +2140,7 @@ static s32 ixgbe_write_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset, if (!timeout) { hw_dbg(hw, "Driver can't access resource, acquiring I2C bus timeout.\n"); - status = IXGBE_ERR_I2C; + status = -EIO; goto release_i2c_access; } } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 38c4609bd..5688a6ad4 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -124,7 +124,7 @@ s32 ixgbe_setup_fc_generic(struct ixgbe_hw *hw) */ if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) { hw_dbg(hw, "ixgbe_fc_rx_pause not valid in strict IEEE mode\n"); - return IXGBE_ERR_INVALID_LINK_SETTINGS; + return -EINVAL; } /* @@ -215,7 +215,7 @@ s32 ixgbe_setup_fc_generic(struct ixgbe_hw *hw) break; default: hw_dbg(hw, "Flow control param set incorrectly\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } if (hw->mac.type != ixgbe_mac_X540) { @@ -500,7 +500,7 @@ s32 ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num, if (pba_num == NULL) { hw_dbg(hw, "PBA string buffer was null\n"); - return IXGBE_ERR_INVALID_ARGUMENT; + return -EINVAL; } ret_val = hw->eeprom.ops.read(hw, IXGBE_PBANUM0_PTR, &data); @@ -526,7 +526,7 @@ s32 ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num, /* we will need 11 characters to store the PBA */ if (pba_num_size < 11) { hw_dbg(hw, "PBA string buffer too small\n"); - return IXGBE_ERR_NO_SPACE; + return -ENOSPC; } /* extract hex string from data and pba_ptr */ @@ -563,13 +563,13 @@ s32 ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num, if (length == 0xFFFF || length == 0) { hw_dbg(hw, "NVM PBA number section invalid length\n"); - return IXGBE_ERR_PBA_SECTION; + return -EIO; } /* check if pba_num buffer is big enough */ if (pba_num_size < (((u32)length * 2) - 1)) { hw_dbg(hw, "PBA string buffer too small\n"); - return IXGBE_ERR_NO_SPACE; + return -ENOSPC; } /* trim pba length from start of string */ @@ -805,7 +805,7 @@ s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index) u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); if (index > 3) - return IXGBE_ERR_PARAM; + return -EINVAL; /* To turn on the LED, set mode to ON. */ led_reg &= ~IXGBE_LED_MODE_MASK(index); @@ -826,7 +826,7 @@ s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index) u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); if (index > 3) - return IXGBE_ERR_PARAM; + return -EINVAL; /* To turn off the LED, set mode to OFF. */ led_reg &= ~IXGBE_LED_MODE_MASK(index); @@ -904,11 +904,8 @@ s32 ixgbe_write_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset, hw->eeprom.ops.init_params(hw); - if (words == 0) - return IXGBE_ERR_INVALID_ARGUMENT; - - if (offset + words > hw->eeprom.word_size) - return IXGBE_ERR_EEPROM; + if (words == 0 || (offset + words > hw->eeprom.word_size)) + return -EINVAL; /* * The EEPROM page size cannot be queried from the chip. We do lazy @@ -962,7 +959,7 @@ static s32 ixgbe_write_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset, if (ixgbe_ready_eeprom(hw) != 0) { ixgbe_release_eeprom(hw); - return IXGBE_ERR_EEPROM; + return -EIO; } for (i = 0; i < words; i++) { @@ -1028,7 +1025,7 @@ s32 ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data) hw->eeprom.ops.init_params(hw); if (offset >= hw->eeprom.word_size) - return IXGBE_ERR_EEPROM; + return -EINVAL; return ixgbe_write_eeprom_buffer_bit_bang(hw, offset, 1, &data); } @@ -1050,11 +1047,8 @@ s32 ixgbe_read_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset, hw->eeprom.ops.init_params(hw); - if (words == 0) - return IXGBE_ERR_INVALID_ARGUMENT; - - if (offset + words > hw->eeprom.word_size) - return IXGBE_ERR_EEPROM; + if (words == 0 || (offset + words > hw->eeprom.word_size)) + return -EINVAL; /* * We cannot hold synchronization semaphores for too long @@ -1099,7 +1093,7 @@ static s32 ixgbe_read_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset, if (ixgbe_ready_eeprom(hw) != 0) { ixgbe_release_eeprom(hw); - return IXGBE_ERR_EEPROM; + return -EIO; } for (i = 0; i < words; i++) { @@ -1142,7 +1136,7 @@ s32 ixgbe_read_eeprom_bit_bang_generic(struct ixgbe_hw *hw, u16 offset, hw->eeprom.ops.init_params(hw); if (offset >= hw->eeprom.word_size) - return IXGBE_ERR_EEPROM; + return -EINVAL; return ixgbe_read_eeprom_buffer_bit_bang(hw, offset, 1, data); } @@ -1165,11 +1159,8 @@ s32 ixgbe_read_eerd_buffer_generic(struct ixgbe_hw *hw, u16 offset, hw->eeprom.ops.init_params(hw); - if (words == 0) - return IXGBE_ERR_INVALID_ARGUMENT; - - if (offset >= hw->eeprom.word_size) - return IXGBE_ERR_EEPROM; + if (words == 0 || offset >= hw->eeprom.word_size) + return -EINVAL; for (i = 0; i < words; i++) { eerd = ((offset + i) << IXGBE_EEPROM_RW_ADDR_SHIFT) | @@ -1262,11 +1253,8 @@ s32 ixgbe_write_eewr_buffer_generic(struct ixgbe_hw *hw, u16 offset, hw->eeprom.ops.init_params(hw); - if (words == 0) - return IXGBE_ERR_INVALID_ARGUMENT; - - if (offset >= hw->eeprom.word_size) - return IXGBE_ERR_EEPROM; + if (words == 0 || offset >= hw->eeprom.word_size) + return -EINVAL; for (i = 0; i < words; i++) { eewr = ((offset + i) << IXGBE_EEPROM_RW_ADDR_SHIFT) | @@ -1328,7 +1316,7 @@ static s32 ixgbe_poll_eerd_eewr_done(struct ixgbe_hw *hw, u32 ee_reg) } udelay(5); } - return IXGBE_ERR_EEPROM; + return -EIO; } /** @@ -1344,7 +1332,7 @@ static s32 ixgbe_acquire_eeprom(struct ixgbe_hw *hw) u32 i; if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) != 0) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw)); @@ -1366,7 +1354,7 @@ static s32 ixgbe_acquire_eeprom(struct ixgbe_hw *hw) hw_dbg(hw, "Could not acquire EEPROM grant\n"); hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); - return IXGBE_ERR_EEPROM; + return -EIO; } /* Setup EEPROM for Read/Write */ @@ -1419,7 +1407,7 @@ static s32 ixgbe_get_eeprom_semaphore(struct ixgbe_hw *hw) swsm = IXGBE_READ_REG(hw, IXGBE_SWSM(hw)); if (swsm & IXGBE_SWSM_SMBI) { hw_dbg(hw, "Software semaphore SMBI between device drivers not granted.\n"); - return IXGBE_ERR_EEPROM; + return -EIO; } } @@ -1447,7 +1435,7 @@ static s32 ixgbe_get_eeprom_semaphore(struct ixgbe_hw *hw) if (i >= timeout) { hw_dbg(hw, "SWESMBI Software EEPROM semaphore not granted.\n"); ixgbe_release_eeprom_semaphore(hw); - return IXGBE_ERR_EEPROM; + return -EIO; } return 0; @@ -1503,7 +1491,7 @@ static s32 ixgbe_ready_eeprom(struct ixgbe_hw *hw) */ if (i >= IXGBE_EEPROM_MAX_RETRY_SPI) { hw_dbg(hw, "SPI EEPROM Status error\n"); - return IXGBE_ERR_EEPROM; + return -EIO; } return 0; @@ -1715,7 +1703,7 @@ s32 ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw) for (i = IXGBE_PCIE_ANALOG_PTR; i < IXGBE_FW_PTR; i++) { if (hw->eeprom.ops.read(hw, i, &pointer)) { hw_dbg(hw, "EEPROM read failed\n"); - return IXGBE_ERR_EEPROM; + return -EIO; } /* If the pointer seems invalid */ @@ -1724,7 +1712,7 @@ s32 ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw) if (hw->eeprom.ops.read(hw, pointer, &length)) { hw_dbg(hw, "EEPROM read failed\n"); - return IXGBE_ERR_EEPROM; + return -EIO; } if (length == 0xFFFF || length == 0) @@ -1733,7 +1721,7 @@ s32 ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw) for (j = pointer + 1; j <= pointer + length; j++) { if (hw->eeprom.ops.read(hw, j, &word)) { hw_dbg(hw, "EEPROM read failed\n"); - return IXGBE_ERR_EEPROM; + return -EIO; } checksum += word; } @@ -1786,7 +1774,7 @@ s32 ixgbe_validate_eeprom_checksum_generic(struct ixgbe_hw *hw, * calculated checksum */ if (read_checksum != checksum) - status = IXGBE_ERR_EEPROM_CHECKSUM; + status = -EIO; /* If the user cares, return the calculated checksum */ if (checksum_val) @@ -1845,7 +1833,7 @@ s32 ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq, /* Make sure we are using a valid rar index range */ if (index >= rar_entries) { hw_dbg(hw, "RAR index %d is out of range.\n", index); - return IXGBE_ERR_INVALID_ARGUMENT; + return -EINVAL; } /* setup VMDq pool selection before this RAR gets enabled */ @@ -1897,7 +1885,7 @@ s32 ixgbe_clear_rar_generic(struct ixgbe_hw *hw, u32 index) /* Make sure we are using a valid rar index range */ if (index >= rar_entries) { hw_dbg(hw, "RAR index %d is out of range.\n", index); - return IXGBE_ERR_INVALID_ARGUMENT; + return -EINVAL; } /* @@ -2146,7 +2134,7 @@ s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw) /* Validate the water mark configuration. */ if (!hw->fc.pause_time) - return IXGBE_ERR_INVALID_LINK_SETTINGS; + return -EINVAL; /* Low water mark of zero causes XOFF floods */ for (i = 0; i < MAX_TRAFFIC_CLASS; i++) { @@ -2155,7 +2143,7 @@ s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw) if (!hw->fc.low_water[i] || hw->fc.low_water[i] >= hw->fc.high_water[i]) { hw_dbg(hw, "Invalid water mark configuration\n"); - return IXGBE_ERR_INVALID_LINK_SETTINGS; + return -EINVAL; } } } @@ -2212,7 +2200,7 @@ s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw) break; default: hw_dbg(hw, "Flow control param set incorrectly\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } /* Set 802.3x based flow control settings. */ @@ -2269,7 +2257,7 @@ s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg, u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm) { if ((!(adv_reg)) || (!(lp_reg))) - return IXGBE_ERR_FC_NOT_NEGOTIATED; + return -EINVAL; if ((adv_reg & adv_sym) && (lp_reg & lp_sym)) { /* @@ -2321,7 +2309,7 @@ static s32 ixgbe_fc_autoneg_fiber(struct ixgbe_hw *hw) linkstat = IXGBE_READ_REG(hw, IXGBE_PCS1GLSTA); if ((!!(linkstat & IXGBE_PCS1GLSTA_AN_COMPLETE) == 0) || (!!(linkstat & IXGBE_PCS1GLSTA_AN_TIMED_OUT) == 1)) - return IXGBE_ERR_FC_NOT_NEGOTIATED; + return -EIO; pcs_anadv_reg = IXGBE_READ_REG(hw, IXGBE_PCS1GANA); pcs_lpab_reg = IXGBE_READ_REG(hw, IXGBE_PCS1GANLP); @@ -2353,12 +2341,12 @@ static s32 ixgbe_fc_autoneg_backplane(struct ixgbe_hw *hw) */ links = IXGBE_READ_REG(hw, IXGBE_LINKS); if ((links & IXGBE_LINKS_KX_AN_COMP) == 0) - return IXGBE_ERR_FC_NOT_NEGOTIATED; + return -EIO; if (hw->mac.type == ixgbe_mac_82599EB) { links2 = IXGBE_READ_REG(hw, IXGBE_LINKS2); if ((links2 & IXGBE_LINKS2_AN_SUPPORTED) == 0) - return IXGBE_ERR_FC_NOT_NEGOTIATED; + return -EIO; } /* * Read the 10g AN autoc and LP ability registers and resolve @@ -2407,8 +2395,8 @@ static s32 ixgbe_fc_autoneg_copper(struct ixgbe_hw *hw) **/ void ixgbe_fc_autoneg(struct ixgbe_hw *hw) { - s32 ret_val = IXGBE_ERR_FC_NOT_NEGOTIATED; ixgbe_link_speed speed; + s32 ret_val = -EIO; bool link_up; /* @@ -2510,7 +2498,7 @@ static u32 ixgbe_pcie_timeout_poll(struct ixgbe_hw *hw) * @hw: pointer to hardware structure * * Disables PCI-Express primary access and verifies there are no pending - * requests. IXGBE_ERR_PRIMARY_REQUESTS_PENDING is returned if primary disable + * requests. -EALREADY is returned if primary disable * bit hasn't caused the primary requests to be disabled, else 0 * is returned signifying primary requests disabled. **/ @@ -2575,7 +2563,7 @@ gio_disable_fail: } hw_dbg(hw, "PCIe transaction pending bit also did not clear.\n"); - return IXGBE_ERR_PRIMARY_REQUESTS_PENDING; + return -EALREADY; } /** @@ -2600,7 +2588,7 @@ s32 ixgbe_acquire_swfw_sync(struct ixgbe_hw *hw, u32 mask) * SW_FW_SYNC bits (not just NVM) */ if (ixgbe_get_eeprom_semaphore(hw)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; gssr = IXGBE_READ_REG(hw, IXGBE_GSSR); if (!(gssr & (fwmask | swmask))) { @@ -2620,7 +2608,7 @@ s32 ixgbe_acquire_swfw_sync(struct ixgbe_hw *hw, u32 mask) ixgbe_release_swfw_sync(hw, gssr & (fwmask | swmask)); usleep_range(5000, 10000); - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; } /** @@ -2757,7 +2745,7 @@ s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index) s32 ret_val; if (index > 3) - return IXGBE_ERR_PARAM; + return -EINVAL; /* * Link must be up to auto-blink the LEDs; @@ -2803,7 +2791,7 @@ s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index) s32 ret_val; if (index > 3) - return IXGBE_ERR_PARAM; + return -EINVAL; ret_val = hw->mac.ops.prot_autoc_read(hw, &locked, &autoc_reg); if (ret_val) @@ -2963,7 +2951,7 @@ s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq) /* Make sure we are using a valid rar index range */ if (rar >= rar_entries) { hw_dbg(hw, "RAR index %d is out of range.\n", rar); - return IXGBE_ERR_INVALID_ARGUMENT; + return -EINVAL; } mpsar_lo = IXGBE_READ_REG(hw, IXGBE_MPSAR_LO(rar)); @@ -3014,7 +3002,7 @@ s32 ixgbe_set_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq) /* Make sure we are using a valid rar index range */ if (rar >= rar_entries) { hw_dbg(hw, "RAR index %d is out of range.\n", rar); - return IXGBE_ERR_INVALID_ARGUMENT; + return -EINVAL; } if (vmdq < 32) { @@ -3091,7 +3079,7 @@ static s32 ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan, bool vlvf_bypass) * will simply bypass the VLVF if there are no entries present in the * VLVF that contain our VLAN */ - first_empty_slot = vlvf_bypass ? IXGBE_ERR_NO_SPACE : 0; + first_empty_slot = vlvf_bypass ? -ENOSPC : 0; /* add VLAN enable bit for comparison */ vlan |= IXGBE_VLVF_VIEN; @@ -3115,7 +3103,7 @@ static s32 ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan, bool vlvf_bypass) if (!first_empty_slot) hw_dbg(hw, "No space in VLVF.\n"); - return first_empty_slot ? : IXGBE_ERR_NO_SPACE; + return first_empty_slot ? : -ENOSPC; } /** @@ -3135,7 +3123,7 @@ s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, s32 vlvf_index; if ((vlan > 4095) || (vind > 63)) - return IXGBE_ERR_PARAM; + return -EINVAL; /* * this is a 2 part operation - first the VFTA, then the @@ -3596,7 +3584,8 @@ u8 ixgbe_calculate_checksum(u8 *buffer, u32 length) * * Communicates with the manageability block. On success return 0 * else returns semaphore error when encountering an error acquiring - * semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails. + * semaphore, -EINVAL when incorrect parameters passed or -EIO when + * command fails. * * This function assumes that the IXGBE_GSSR_SW_MNG_SM semaphore is held * by the caller. @@ -3609,7 +3598,7 @@ s32 ixgbe_hic_unlocked(struct ixgbe_hw *hw, u32 *buffer, u32 length, if (!length || length > IXGBE_HI_MAX_BLOCK_BYTE_LENGTH) { hw_dbg(hw, "Buffer length failure buffersize-%d.\n", length); - return IXGBE_ERR_HOST_INTERFACE_COMMAND; + return -EINVAL; } /* Set bit 9 of FWSTS clearing FW reset indication */ @@ -3620,13 +3609,13 @@ s32 ixgbe_hic_unlocked(struct ixgbe_hw *hw, u32 *buffer, u32 length, hicr = IXGBE_READ_REG(hw, IXGBE_HICR); if (!(hicr & IXGBE_HICR_EN)) { hw_dbg(hw, "IXGBE_HOST_EN bit disabled.\n"); - return IXGBE_ERR_HOST_INTERFACE_COMMAND; + return -EIO; } /* Calculate length in DWORDs. We must be DWORD aligned */ if (length % sizeof(u32)) { hw_dbg(hw, "Buffer length failure, not aligned to dword"); - return IXGBE_ERR_INVALID_ARGUMENT; + return -EINVAL; } dword_len = length >> 2; @@ -3651,7 +3640,7 @@ s32 ixgbe_hic_unlocked(struct ixgbe_hw *hw, u32 *buffer, u32 length, /* Check command successful completion. */ if ((timeout && i == timeout) || !(IXGBE_READ_REG(hw, IXGBE_HICR) & IXGBE_HICR_SV)) - return IXGBE_ERR_HOST_INTERFACE_COMMAND; + return -EIO; return 0; } @@ -3671,7 +3660,7 @@ s32 ixgbe_hic_unlocked(struct ixgbe_hw *hw, u32 *buffer, u32 length, * in these cases. * * Communicates with the manageability block. On success return 0 - * else return IXGBE_ERR_HOST_INTERFACE_COMMAND. + * else return -EIO or -EINVAL. **/ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, void *buffer, u32 length, u32 timeout, @@ -3686,7 +3675,7 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, void *buffer, if (!length || length > IXGBE_HI_MAX_BLOCK_BYTE_LENGTH) { hw_dbg(hw, "Buffer length failure buffersize-%d.\n", length); - return IXGBE_ERR_HOST_INTERFACE_COMMAND; + return -EINVAL; } /* Take management host interface semaphore */ status = hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_SW_MNG_SM); @@ -3716,7 +3705,7 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, void *buffer, if (length < round_up(buf_len, 4) + hdr_size) { hw_dbg(hw, "Buffer not large enough for reply message.\n"); - status = IXGBE_ERR_HOST_INTERFACE_COMMAND; + status = -EIO; goto rel_out; } @@ -3747,8 +3736,8 @@ rel_out: * * Sends driver version number to firmware through the manageability * block. On success return 0 - * else returns IXGBE_ERR_SWFW_SYNC when encountering an error acquiring - * semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails. + * else returns -EBUSY when encountering an error acquiring + * semaphore or -EIO when command fails. **/ s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min, u8 build, u8 sub, __always_unused u16 len, @@ -3784,7 +3773,7 @@ s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min, FW_CEM_RESP_STATUS_SUCCESS) ret_val = 0; else - ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND; + ret_val = -EIO; break; } @@ -3882,14 +3871,14 @@ static s32 ixgbe_get_ets_data(struct ixgbe_hw *hw, u16 *ets_cfg, return status; if ((*ets_offset == 0x0000) || (*ets_offset == 0xFFFF)) - return IXGBE_NOT_IMPLEMENTED; + return -EOPNOTSUPP; status = hw->eeprom.ops.read(hw, *ets_offset, ets_cfg); if (status) return status; if ((*ets_cfg & IXGBE_ETS_TYPE_MASK) != IXGBE_ETS_TYPE_EMC_SHIFTED) - return IXGBE_NOT_IMPLEMENTED; + return -EOPNOTSUPP; return 0; } @@ -3912,7 +3901,7 @@ s32 ixgbe_get_thermal_sensor_data_generic(struct ixgbe_hw *hw) /* Only support thermal sensors attached to physical port 0 */ if ((IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1)) - return IXGBE_NOT_IMPLEMENTED; + return -EOPNOTSUPP; status = ixgbe_get_ets_data(hw, &ets_cfg, &ets_offset); if (status) @@ -3972,7 +3961,7 @@ s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw) /* Only support thermal sensors attached to physical port 0 */ if ((IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1)) - return IXGBE_NOT_IMPLEMENTED; + return -EOPNOTSUPP; status = ixgbe_get_ets_data(hw, &ets_cfg, &ets_offset); if (status) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 0051aa676..f8e65e182 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -3339,7 +3339,7 @@ static int ixgbe_get_module_eeprom(struct net_device *dev, { struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_hw *hw = &adapter->hw; - s32 status = IXGBE_ERR_PHY_ADDR_INVALID; + s32 status = -EFAULT; u8 databyte = 0xFF; int i = 0; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 9e0e13638..086cc2573 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2764,7 +2764,6 @@ static void ixgbe_check_overtemp_subtask(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 eicr = adapter->interrupt_event; - s32 rc; if (test_bit(__IXGBE_DOWN, &adapter->state)) return; @@ -2798,14 +2797,13 @@ static void ixgbe_check_overtemp_subtask(struct ixgbe_adapter *adapter) } /* Check if this is not due to overtemp */ - if (hw->phy.ops.check_overtemp(hw) != IXGBE_ERR_OVERTEMP) + if (!hw->phy.ops.check_overtemp(hw)) return; break; case IXGBE_DEV_ID_X550EM_A_1G_T: case IXGBE_DEV_ID_X550EM_A_1G_T_L: - rc = hw->phy.ops.check_overtemp(hw); - if (rc != IXGBE_ERR_OVERTEMP) + if (!hw->phy.ops.check_overtemp(hw)) return; break; default: @@ -2949,8 +2947,8 @@ static void ixgbe_check_lsc(struct ixgbe_adapter *adapter) static inline void ixgbe_irq_enable_queues(struct ixgbe_adapter *adapter, u64 qmask) { - u32 mask; struct ixgbe_hw *hw = &adapter->hw; + u32 mask; switch (hw->mac.type) { case ixgbe_mac_82598EB: @@ -5520,7 +5518,7 @@ static int ixgbe_non_sfp_link_config(struct ixgbe_hw *hw) { u32 speed; bool autoneg, link_up = false; - int ret = IXGBE_ERR_LINK_SETUP; + int ret = -EIO; if (hw->mac.ops.check_link) ret = hw->mac.ops.check_link(hw, &speed, &link_up, false); @@ -5991,13 +5989,13 @@ void ixgbe_reset(struct ixgbe_adapter *adapter) err = hw->mac.ops.init_hw(hw); switch (err) { case 0: - case IXGBE_ERR_SFP_NOT_PRESENT: - case IXGBE_ERR_SFP_NOT_SUPPORTED: + case -ENOENT: + case -EOPNOTSUPP: break; - case IXGBE_ERR_PRIMARY_REQUESTS_PENDING: + case -EALREADY: e_dev_err("primary disable timed out\n"); break; - case IXGBE_ERR_EEPROM_VERSION: + case -EACCES: /* We are running on a pre-production device, log a warning */ e_dev_warn("This device is a pre-production adapter/LOM. " "Please be aware there may be issues associated with " @@ -7837,10 +7835,10 @@ static void ixgbe_sfp_detection_subtask(struct ixgbe_adapter *adapter) adapter->sfp_poll_time = jiffies + IXGBE_SFP_POLL_JIFFIES - 1; err = hw->phy.ops.identify_sfp(hw); - if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) + if (err == -EOPNOTSUPP) goto sfp_out; - if (err == IXGBE_ERR_SFP_NOT_PRESENT) { + if (err == -ENOENT) { /* If no cable is present, then we need to reset * the next time we find a good cable. */ adapter->flags2 |= IXGBE_FLAG2_SFP_NEEDS_RESET; @@ -7866,7 +7864,7 @@ static void ixgbe_sfp_detection_subtask(struct ixgbe_adapter *adapter) else err = hw->mac.ops.setup_sfp(hw); - if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) + if (err == -EOPNOTSUPP) goto sfp_out; adapter->flags |= IXGBE_FLAG_NEED_LINK_CONFIG; @@ -7875,8 +7873,8 @@ static void ixgbe_sfp_detection_subtask(struct ixgbe_adapter *adapter) sfp_out: clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state); - if ((err == IXGBE_ERR_SFP_NOT_SUPPORTED) && - (adapter->netdev->reg_state == NETREG_REGISTERED)) { + if (err == -EOPNOTSUPP && + adapter->netdev->reg_state == NETREG_REGISTERED) { e_dev_err("failed to initialize because an unsupported " "SFP+ module type was detected.\n"); e_dev_err("Reload the driver after installing a " @@ -7946,7 +7944,7 @@ static void ixgbe_service_timer(struct timer_list *t) static void ixgbe_phy_interrupt_subtask(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; - u32 status; + bool overtemp; if (!(adapter->flags2 & IXGBE_FLAG2_PHY_INTERRUPT)) return; @@ -7956,11 +7954,9 @@ static void ixgbe_phy_interrupt_subtask(struct ixgbe_adapter *adapter) if (!hw->phy.ops.handle_lasi) return; - status = hw->phy.ops.handle_lasi(&adapter->hw); - if (status != IXGBE_ERR_OVERTEMP) - return; - - e_crit(drv, "%s\n", ixgbe_overheat_msg); + hw->phy.ops.handle_lasi(&adapter->hw, &overtemp); + if (overtemp) + e_crit(drv, "%s\n", ixgbe_overheat_msg); } static void ixgbe_reset_subtask(struct ixgbe_adapter *adapter) @@ -10548,6 +10544,44 @@ static void ixgbe_reset_rxr_stats(struct ixgbe_ring *rx_ring) } /** + * ixgbe_irq_disable_single - Disable single IRQ vector + * @adapter: adapter structure + * @ring: ring index + **/ +static void ixgbe_irq_disable_single(struct ixgbe_adapter *adapter, u32 ring) +{ + struct ixgbe_hw *hw = &adapter->hw; + u64 qmask = BIT_ULL(ring); + u32 mask; + + switch (adapter->hw.mac.type) { + case ixgbe_mac_82598EB: + mask = qmask & IXGBE_EIMC_RTX_QUEUE; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, mask); + break; + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + case ixgbe_mac_x550em_a: + mask = (qmask & 0xFFFFFFFF); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask); + mask = (qmask >> 32); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask); + break; + default: + break; + } + IXGBE_WRITE_FLUSH(&adapter->hw); + if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) + synchronize_irq(adapter->msix_entries[ring].vector); + else + synchronize_irq(adapter->pdev->irq); +} + +/** * ixgbe_txrx_ring_disable - Disable Rx/Tx/XDP Tx rings * @adapter: adapter structure * @ring: ring index @@ -10563,6 +10597,11 @@ void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring) tx_ring = adapter->tx_ring[ring]; xdp_ring = adapter->xdp_ring[ring]; + ixgbe_irq_disable_single(adapter, ring); + + /* Rx/Tx/XDP Tx share the same napi context. */ + napi_disable(&rx_ring->q_vector->napi); + ixgbe_disable_txr(adapter, tx_ring); if (xdp_ring) ixgbe_disable_txr(adapter, xdp_ring); @@ -10571,9 +10610,6 @@ void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring) if (xdp_ring) synchronize_rcu(); - /* Rx/Tx/XDP Tx share the same napi context. */ - napi_disable(&rx_ring->q_vector->napi); - ixgbe_clean_tx_ring(tx_ring); if (xdp_ring) ixgbe_clean_tx_ring(xdp_ring); @@ -10601,9 +10637,6 @@ void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring) tx_ring = adapter->tx_ring[ring]; xdp_ring = adapter->xdp_ring[ring]; - /* Rx/Tx/XDP Tx share the same napi context. */ - napi_enable(&rx_ring->q_vector->napi); - ixgbe_configure_tx_ring(adapter, tx_ring); if (xdp_ring) ixgbe_configure_tx_ring(adapter, xdp_ring); @@ -10612,6 +10645,11 @@ void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring) clear_bit(__IXGBE_TX_DISABLED, &tx_ring->state); if (xdp_ring) clear_bit(__IXGBE_TX_DISABLED, &xdp_ring->state); + + /* Rx/Tx/XDP Tx share the same napi context. */ + napi_enable(&rx_ring->q_vector->napi); + ixgbe_irq_enable_queues(adapter, BIT_ULL(ring)); + IXGBE_WRITE_FLUSH(&adapter->hw); } /** @@ -10943,9 +10981,9 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = hw->mac.ops.reset_hw(hw); hw->phy.reset_if_overtemp = false; ixgbe_set_eee_capable(adapter); - if (err == IXGBE_ERR_SFP_NOT_PRESENT) { + if (err == -ENOENT) { err = 0; - } else if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { + } else if (err == -EOPNOTSUPP) { e_dev_err("failed to load because an unsupported SFP+ or QSFP module type was detected.\n"); e_dev_err("Reload the driver after installing a supported module.\n"); goto err_sw_init; @@ -11161,7 +11199,7 @@ skip_sriov: /* reset the hardware with the new settings */ err = hw->mac.ops.start_hw(hw); - if (err == IXGBE_ERR_EEPROM_VERSION) { + if (err == -EACCES) { /* We are running on a pre-production device, log a warning */ e_dev_warn("This device is a pre-production adapter/LOM. " "Please be aware there may be issues associated " diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c index 5679293e5..fe7ef5773 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c @@ -24,7 +24,7 @@ s32 ixgbe_read_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size, u16 mbx_id) size = mbx->size; if (!mbx->ops) - return IXGBE_ERR_MBX; + return -EIO; return mbx->ops->read(hw, msg, size, mbx_id); } @@ -43,10 +43,10 @@ s32 ixgbe_write_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size, u16 mbx_id) struct ixgbe_mbx_info *mbx = &hw->mbx; if (size > mbx->size) - return IXGBE_ERR_MBX; + return -EINVAL; if (!mbx->ops) - return IXGBE_ERR_MBX; + return -EIO; return mbx->ops->write(hw, msg, size, mbx_id); } @@ -63,7 +63,7 @@ s32 ixgbe_check_for_msg(struct ixgbe_hw *hw, u16 mbx_id) struct ixgbe_mbx_info *mbx = &hw->mbx; if (!mbx->ops) - return IXGBE_ERR_MBX; + return -EIO; return mbx->ops->check_for_msg(hw, mbx_id); } @@ -80,7 +80,7 @@ s32 ixgbe_check_for_ack(struct ixgbe_hw *hw, u16 mbx_id) struct ixgbe_mbx_info *mbx = &hw->mbx; if (!mbx->ops) - return IXGBE_ERR_MBX; + return -EIO; return mbx->ops->check_for_ack(hw, mbx_id); } @@ -97,7 +97,7 @@ s32 ixgbe_check_for_rst(struct ixgbe_hw *hw, u16 mbx_id) struct ixgbe_mbx_info *mbx = &hw->mbx; if (!mbx->ops) - return IXGBE_ERR_MBX; + return -EIO; return mbx->ops->check_for_rst(hw, mbx_id); } @@ -115,12 +115,12 @@ static s32 ixgbe_poll_for_msg(struct ixgbe_hw *hw, u16 mbx_id) int countdown = mbx->timeout; if (!countdown || !mbx->ops) - return IXGBE_ERR_MBX; + return -EIO; while (mbx->ops->check_for_msg(hw, mbx_id)) { countdown--; if (!countdown) - return IXGBE_ERR_MBX; + return -EIO; udelay(mbx->usec_delay); } @@ -140,12 +140,12 @@ static s32 ixgbe_poll_for_ack(struct ixgbe_hw *hw, u16 mbx_id) int countdown = mbx->timeout; if (!countdown || !mbx->ops) - return IXGBE_ERR_MBX; + return -EIO; while (mbx->ops->check_for_ack(hw, mbx_id)) { countdown--; if (!countdown) - return IXGBE_ERR_MBX; + return -EIO; udelay(mbx->usec_delay); } @@ -169,7 +169,7 @@ static s32 ixgbe_read_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size, s32 ret_val; if (!mbx->ops) - return IXGBE_ERR_MBX; + return -EIO; ret_val = ixgbe_poll_for_msg(hw, mbx_id); if (ret_val) @@ -197,7 +197,7 @@ static s32 ixgbe_write_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size, /* exit if either we can't write or there isn't a defined timeout */ if (!mbx->ops || !mbx->timeout) - return IXGBE_ERR_MBX; + return -EIO; /* send msg */ ret_val = mbx->ops->write(hw, msg, size, mbx_id); @@ -217,7 +217,7 @@ static s32 ixgbe_check_for_bit_pf(struct ixgbe_hw *hw, u32 mask, s32 index) return 0; } - return IXGBE_ERR_MBX; + return -EIO; } /** @@ -238,7 +238,7 @@ static s32 ixgbe_check_for_msg_pf(struct ixgbe_hw *hw, u16 vf_number) return 0; } - return IXGBE_ERR_MBX; + return -EIO; } /** @@ -259,7 +259,7 @@ static s32 ixgbe_check_for_ack_pf(struct ixgbe_hw *hw, u16 vf_number) return 0; } - return IXGBE_ERR_MBX; + return -EIO; } /** @@ -295,7 +295,7 @@ static s32 ixgbe_check_for_rst_pf(struct ixgbe_hw *hw, u16 vf_number) return 0; } - return IXGBE_ERR_MBX; + return -EIO; } /** @@ -317,7 +317,7 @@ static s32 ixgbe_obtain_mbx_lock_pf(struct ixgbe_hw *hw, u16 vf_number) if (p2v_mailbox & IXGBE_PFMAILBOX_PFU) return 0; - return IXGBE_ERR_MBX; + return -EIO; } /** diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h index 8f4316b19..6434c190e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h @@ -7,7 +7,6 @@ #include "ixgbe_type.h" #define IXGBE_VFMAILBOX_SIZE 16 /* 16 32 bit words - 64 bytes */ -#define IXGBE_ERR_MBX -100 #define IXGBE_VFMAILBOX 0x002FC #define IXGBE_VFMBMEM 0x00200 diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index 123dca9ce..305afb823 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -102,7 +102,7 @@ s32 ixgbe_read_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr, csum = ~csum; do { if (lock && hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; ixgbe_i2c_start(hw); /* Device Address and write indication */ if (ixgbe_out_i2c_byte_ack(hw, addr)) @@ -150,7 +150,7 @@ fail: hw_dbg(hw, "I2C byte read combined error.\n"); } while (retry < max_retry); - return IXGBE_ERR_I2C; + return -EIO; } /** @@ -179,7 +179,7 @@ s32 ixgbe_write_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr, csum = ~csum; do { if (lock && hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; ixgbe_i2c_start(hw); /* Device Address and write indication */ if (ixgbe_out_i2c_byte_ack(hw, addr)) @@ -215,7 +215,7 @@ fail: hw_dbg(hw, "I2C byte write combined error.\n"); } while (retry < max_retry); - return IXGBE_ERR_I2C; + return -EIO; } /** @@ -262,8 +262,8 @@ static bool ixgbe_probe_phy(struct ixgbe_hw *hw, u16 phy_addr) **/ s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw) { + u32 status = -EFAULT; u32 phy_addr; - u32 status = IXGBE_ERR_PHY_ADDR_INVALID; if (!hw->phy.phy_semaphore_mask) { if (hw->bus.lan_id) @@ -282,7 +282,7 @@ s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw) if (ixgbe_probe_phy(hw, phy_addr)) return 0; else - return IXGBE_ERR_PHY_ADDR_INVALID; + return -EFAULT; } for (phy_addr = 0; phy_addr < IXGBE_MAX_PHY_ADDR; phy_addr++) { @@ -408,8 +408,7 @@ s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw) return status; /* Don't reset PHY if it's shut down due to overtemp. */ - if (!hw->phy.reset_if_overtemp && - (IXGBE_ERR_OVERTEMP == hw->phy.ops.check_overtemp(hw))) + if (!hw->phy.reset_if_overtemp && hw->phy.ops.check_overtemp(hw)) return 0; /* Blocked by MNG FW so bail */ @@ -457,7 +456,7 @@ s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw) if (ctrl & MDIO_CTRL1_RESET) { hw_dbg(hw, "PHY reset polling failed to complete.\n"); - return IXGBE_ERR_RESET_FAILED; + return -EIO; } return 0; @@ -500,7 +499,7 @@ s32 ixgbe_read_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) { hw_dbg(hw, "PHY address command did not complete.\n"); - return IXGBE_ERR_PHY; + return -EIO; } /* Address cycle complete, setup and write the read @@ -527,7 +526,7 @@ s32 ixgbe_read_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) { hw_dbg(hw, "PHY read command didn't complete\n"); - return IXGBE_ERR_PHY; + return -EIO; } /* Read operation is complete. Get the data @@ -559,7 +558,7 @@ s32 ixgbe_read_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr, phy_data); hw->mac.ops.release_swfw_sync(hw, gssr); } else { - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; } return status; @@ -604,7 +603,7 @@ s32 ixgbe_write_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr, if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) { hw_dbg(hw, "PHY address cmd didn't complete\n"); - return IXGBE_ERR_PHY; + return -EIO; } /* @@ -632,7 +631,7 @@ s32 ixgbe_write_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr, if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) { hw_dbg(hw, "PHY write cmd didn't complete\n"); - return IXGBE_ERR_PHY; + return -EIO; } return 0; @@ -657,7 +656,7 @@ s32 ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr, phy_data); hw->mac.ops.release_swfw_sync(hw, gssr); } else { - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; } return status; @@ -1303,7 +1302,7 @@ s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw) if ((phy_data & MDIO_CTRL1_RESET) != 0) { hw_dbg(hw, "PHY reset did not complete.\n"); - return IXGBE_ERR_PHY; + return -EIO; } /* Get init offsets */ @@ -1360,12 +1359,12 @@ s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw) hw_dbg(hw, "SOL\n"); } else { hw_dbg(hw, "Bad control value\n"); - return IXGBE_ERR_PHY; + return -EIO; } break; default: hw_dbg(hw, "Bad control type\n"); - return IXGBE_ERR_PHY; + return -EIO; } } @@ -1373,7 +1372,7 @@ s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw) err_eeprom: hw_err(hw, "eeprom read at offset %d failed\n", data_offset); - return IXGBE_ERR_PHY; + return -EIO; } /** @@ -1391,10 +1390,10 @@ s32 ixgbe_identify_module_generic(struct ixgbe_hw *hw) return ixgbe_identify_qsfp_module_generic(hw); default: hw->phy.sfp_type = ixgbe_sfp_type_not_present; - return IXGBE_ERR_SFP_NOT_PRESENT; + return -ENOENT; } - return IXGBE_ERR_SFP_NOT_PRESENT; + return -ENOENT; } /** @@ -1419,7 +1418,7 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_fiber) { hw->phy.sfp_type = ixgbe_sfp_type_not_present; - return IXGBE_ERR_SFP_NOT_PRESENT; + return -ENOENT; } /* LAN ID is needed for sfp_type determination */ @@ -1434,7 +1433,7 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) if (identifier != IXGBE_SFF_IDENTIFIER_SFP) { hw->phy.type = ixgbe_phy_sfp_unsupported; - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; } status = hw->phy.ops.read_i2c_eeprom(hw, IXGBE_SFF_1GBE_COMP_CODES, @@ -1625,7 +1624,7 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 || hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1)) { hw->phy.type = ixgbe_phy_sfp_unsupported; - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; } /* Anything else 82598-based is supported */ @@ -1649,7 +1648,7 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) } hw_dbg(hw, "SFP+ module not supported\n"); hw->phy.type = ixgbe_phy_sfp_unsupported; - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; } return 0; @@ -1659,7 +1658,7 @@ err_read_i2c_eeprom: hw->phy.id = 0; hw->phy.type = ixgbe_phy_unknown; } - return IXGBE_ERR_SFP_NOT_PRESENT; + return -ENOENT; } /** @@ -1686,7 +1685,7 @@ static s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw) if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_fiber_qsfp) { hw->phy.sfp_type = ixgbe_sfp_type_not_present; - return IXGBE_ERR_SFP_NOT_PRESENT; + return -ENOENT; } /* LAN ID is needed for sfp_type determination */ @@ -1700,7 +1699,7 @@ static s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw) if (identifier != IXGBE_SFF_IDENTIFIER_QSFP_PLUS) { hw->phy.type = ixgbe_phy_sfp_unsupported; - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; } hw->phy.id = identifier; @@ -1768,7 +1767,7 @@ static s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw) } else { /* unsupported module type */ hw->phy.type = ixgbe_phy_sfp_unsupported; - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; } } @@ -1828,7 +1827,7 @@ static s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw) } hw_dbg(hw, "QSFP module not supported\n"); hw->phy.type = ixgbe_phy_sfp_unsupported; - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; } return 0; } @@ -1839,7 +1838,7 @@ err_read_i2c_eeprom: hw->phy.id = 0; hw->phy.type = ixgbe_phy_unknown; - return IXGBE_ERR_SFP_NOT_PRESENT; + return -ENOENT; } /** @@ -1859,14 +1858,14 @@ s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, u16 sfp_type = hw->phy.sfp_type; if (hw->phy.sfp_type == ixgbe_sfp_type_unknown) - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; if (hw->phy.sfp_type == ixgbe_sfp_type_not_present) - return IXGBE_ERR_SFP_NOT_PRESENT; + return -ENOENT; if ((hw->device_id == IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM) && (hw->phy.sfp_type == ixgbe_sfp_type_da_cu)) - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; /* * Limiting active cables and 1G Phys must be initialized as @@ -1887,11 +1886,11 @@ s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, if (hw->eeprom.ops.read(hw, IXGBE_PHY_INIT_OFFSET_NL, list_offset)) { hw_err(hw, "eeprom read at %d failed\n", IXGBE_PHY_INIT_OFFSET_NL); - return IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT; + return -EIO; } if ((!*list_offset) || (*list_offset == 0xFFFF)) - return IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT; + return -EIO; /* Shift offset to first ID word */ (*list_offset)++; @@ -1910,7 +1909,7 @@ s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, goto err_phy; if ((!*data_offset) || (*data_offset == 0xFFFF)) { hw_dbg(hw, "SFP+ module not supported\n"); - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; } else { break; } @@ -1923,14 +1922,14 @@ s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, if (sfp_id == IXGBE_PHY_INIT_END_NL) { hw_dbg(hw, "No matching SFP+ module found\n"); - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; } return 0; err_phy: hw_err(hw, "eeprom read at offset %d failed\n", *list_offset); - return IXGBE_ERR_PHY; + return -EIO; } /** @@ -2025,7 +2024,7 @@ static s32 ixgbe_read_i2c_byte_generic_int(struct ixgbe_hw *hw, u8 byte_offset, do { if (lock && hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; ixgbe_i2c_start(hw); @@ -2141,7 +2140,7 @@ static s32 ixgbe_write_i2c_byte_generic_int(struct ixgbe_hw *hw, u8 byte_offset, u32 swfw_mask = hw->phy.phy_semaphore_mask; if (lock && hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; do { ixgbe_i2c_start(hw); @@ -2383,7 +2382,7 @@ static s32 ixgbe_get_i2c_ack(struct ixgbe_hw *hw) if (ack == 1) { hw_dbg(hw, "I2C ack was not received.\n"); - status = IXGBE_ERR_I2C; + status = -EIO; } ixgbe_lower_i2c_clk(hw, &i2cctl); @@ -2455,7 +2454,7 @@ static s32 ixgbe_clock_out_i2c_bit(struct ixgbe_hw *hw, bool data) udelay(IXGBE_I2C_T_LOW); } else { hw_dbg(hw, "I2C data was not set to %X\n", data); - return IXGBE_ERR_I2C; + return -EIO; } return 0; @@ -2551,7 +2550,7 @@ static s32 ixgbe_set_i2c_data(struct ixgbe_hw *hw, u32 *i2cctl, bool data) *i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL(hw)); if (data != ixgbe_get_i2c_data(hw, i2cctl)) { hw_dbg(hw, "Error - I2C data was not set to %X.\n", data); - return IXGBE_ERR_I2C; + return -EIO; } return 0; @@ -2621,22 +2620,24 @@ static void ixgbe_i2c_bus_clear(struct ixgbe_hw *hw) * @hw: pointer to hardware structure * * Checks if the LASI temp alarm status was triggered due to overtemp + * + * Return true when an overtemp event detected, otherwise false. **/ -s32 ixgbe_tn_check_overtemp(struct ixgbe_hw *hw) +bool ixgbe_tn_check_overtemp(struct ixgbe_hw *hw) { u16 phy_data = 0; + u32 status; if (hw->device_id != IXGBE_DEV_ID_82599_T3_LOM) - return 0; + return false; /* Check that the LASI temp alarm status was triggered */ - hw->phy.ops.read_reg(hw, IXGBE_TN_LASI_STATUS_REG, - MDIO_MMD_PMAPMD, &phy_data); - - if (!(phy_data & IXGBE_TN_LASI_STATUS_TEMP_ALARM)) - return 0; + status = hw->phy.ops.read_reg(hw, IXGBE_TN_LASI_STATUS_REG, + MDIO_MMD_PMAPMD, &phy_data); + if (status) + return false; - return IXGBE_ERR_OVERTEMP; + return !!(phy_data & IXGBE_TN_LASI_STATUS_TEMP_ALARM); } /** ixgbe_set_copper_phy_power - Control power for copper phy diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h index 6544c4539..ef72729d7 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h @@ -155,7 +155,7 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw); s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, u16 *list_offset, u16 *data_offset); -s32 ixgbe_tn_check_overtemp(struct ixgbe_hw *hw); +bool ixgbe_tn_check_overtemp(struct ixgbe_hw *hw); s32 ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr, u8 *data); s32 ixgbe_read_i2c_byte_generic_unlocked(struct ixgbe_hw *hw, u8 byte_offset, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index ea88ac04a..198ab9d97 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -1329,7 +1329,7 @@ static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf) break; default: e_err(drv, "Unhandled Msg %8.8x\n", msgbuf[0]); - retval = IXGBE_ERR_MBX; + retval = -EIO; break; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 2b00db92b..61b9774b3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -3509,10 +3509,10 @@ struct ixgbe_phy_operations { s32 (*read_i2c_sff8472)(struct ixgbe_hw *, u8 , u8 *); s32 (*read_i2c_eeprom)(struct ixgbe_hw *, u8 , u8 *); s32 (*write_i2c_eeprom)(struct ixgbe_hw *, u8, u8); - s32 (*check_overtemp)(struct ixgbe_hw *); + bool (*check_overtemp)(struct ixgbe_hw *); s32 (*set_phy_power)(struct ixgbe_hw *, bool on); s32 (*enter_lplu)(struct ixgbe_hw *); - s32 (*handle_lasi)(struct ixgbe_hw *hw); + s32 (*handle_lasi)(struct ixgbe_hw *hw, bool *); s32 (*read_i2c_byte_unlocked)(struct ixgbe_hw *, u8 offset, u8 addr, u8 *value); s32 (*write_i2c_byte_unlocked)(struct ixgbe_hw *, u8 offset, u8 addr, @@ -3665,45 +3665,6 @@ struct ixgbe_info { const u32 *mvals; }; - -/* Error Codes */ -#define IXGBE_ERR_EEPROM -1 -#define IXGBE_ERR_EEPROM_CHECKSUM -2 -#define IXGBE_ERR_PHY -3 -#define IXGBE_ERR_CONFIG -4 -#define IXGBE_ERR_PARAM -5 -#define IXGBE_ERR_MAC_TYPE -6 -#define IXGBE_ERR_UNKNOWN_PHY -7 -#define IXGBE_ERR_LINK_SETUP -8 -#define IXGBE_ERR_ADAPTER_STOPPED -9 -#define IXGBE_ERR_INVALID_MAC_ADDR -10 -#define IXGBE_ERR_DEVICE_NOT_SUPPORTED -11 -#define IXGBE_ERR_PRIMARY_REQUESTS_PENDING -12 -#define IXGBE_ERR_INVALID_LINK_SETTINGS -13 -#define IXGBE_ERR_AUTONEG_NOT_COMPLETE -14 -#define IXGBE_ERR_RESET_FAILED -15 -#define IXGBE_ERR_SWFW_SYNC -16 -#define IXGBE_ERR_PHY_ADDR_INVALID -17 -#define IXGBE_ERR_I2C -18 -#define IXGBE_ERR_SFP_NOT_SUPPORTED -19 -#define IXGBE_ERR_SFP_NOT_PRESENT -20 -#define IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT -21 -#define IXGBE_ERR_NO_SAN_ADDR_PTR -22 -#define IXGBE_ERR_FDIR_REINIT_FAILED -23 -#define IXGBE_ERR_EEPROM_VERSION -24 -#define IXGBE_ERR_NO_SPACE -25 -#define IXGBE_ERR_OVERTEMP -26 -#define IXGBE_ERR_FC_NOT_NEGOTIATED -27 -#define IXGBE_ERR_FC_NOT_SUPPORTED -28 -#define IXGBE_ERR_SFP_SETUP_NOT_COMPLETE -30 -#define IXGBE_ERR_PBA_SECTION -31 -#define IXGBE_ERR_INVALID_ARGUMENT -32 -#define IXGBE_ERR_HOST_INTERFACE_COMMAND -33 -#define IXGBE_ERR_FDIR_CMD_INCOMPLETE -38 -#define IXGBE_ERR_FW_RESP_INVALID -39 -#define IXGBE_ERR_TOKEN_RETRY -40 -#define IXGBE_NOT_IMPLEMENTED 0x7FFFFFFF - #define IXGBE_FUSES0_GROUP(_i) (0x11158 + ((_i) * 4)) #define IXGBE_FUSES0_300MHZ BIT(5) #define IXGBE_FUSES0_REV_MASK (3u << 6) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c index d5cfb51ff..15325c549 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c @@ -84,7 +84,7 @@ mac_reset_top: status = hw->mac.ops.acquire_swfw_sync(hw, swfw_mask); if (status) { hw_dbg(hw, "semaphore failed with %d", status); - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; } ctrl = IXGBE_CTRL_RST; @@ -103,7 +103,7 @@ mac_reset_top: } if (ctrl & IXGBE_CTRL_RST_MASK) { - status = IXGBE_ERR_RESET_FAILED; + status = -EIO; hw_dbg(hw, "Reset polling failed to complete.\n"); } msleep(100); @@ -220,7 +220,7 @@ static s32 ixgbe_read_eerd_X540(struct ixgbe_hw *hw, u16 offset, u16 *data) s32 status; if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; status = ixgbe_read_eerd_generic(hw, offset, data); @@ -243,7 +243,7 @@ static s32 ixgbe_read_eerd_buffer_X540(struct ixgbe_hw *hw, s32 status; if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; status = ixgbe_read_eerd_buffer_generic(hw, offset, words, data); @@ -264,7 +264,7 @@ static s32 ixgbe_write_eewr_X540(struct ixgbe_hw *hw, u16 offset, u16 data) s32 status; if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; status = ixgbe_write_eewr_generic(hw, offset, data); @@ -287,7 +287,7 @@ static s32 ixgbe_write_eewr_buffer_X540(struct ixgbe_hw *hw, s32 status; if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; status = ixgbe_write_eewr_buffer_generic(hw, offset, words, data); @@ -324,7 +324,7 @@ static s32 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw) for (i = 0; i < checksum_last_word; i++) { if (ixgbe_read_eerd_generic(hw, i, &word)) { hw_dbg(hw, "EEPROM read failed\n"); - return IXGBE_ERR_EEPROM; + return -EIO; } checksum += word; } @@ -349,7 +349,7 @@ static s32 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw) if (ixgbe_read_eerd_generic(hw, pointer, &length)) { hw_dbg(hw, "EEPROM read failed\n"); - return IXGBE_ERR_EEPROM; + return -EIO; } /* Skip pointer section if length is invalid. */ @@ -360,7 +360,7 @@ static s32 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw) for (j = pointer + 1; j <= pointer + length; j++) { if (ixgbe_read_eerd_generic(hw, j, &word)) { hw_dbg(hw, "EEPROM read failed\n"); - return IXGBE_ERR_EEPROM; + return -EIO; } checksum += word; } @@ -397,7 +397,7 @@ static s32 ixgbe_validate_eeprom_checksum_X540(struct ixgbe_hw *hw, } if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; status = hw->eeprom.ops.calc_checksum(hw); if (status < 0) @@ -418,7 +418,7 @@ static s32 ixgbe_validate_eeprom_checksum_X540(struct ixgbe_hw *hw, */ if (read_checksum != checksum) { hw_dbg(hw, "Invalid EEPROM checksum"); - status = IXGBE_ERR_EEPROM_CHECKSUM; + status = -EIO; } /* If the user cares, return the calculated checksum */ @@ -455,7 +455,7 @@ static s32 ixgbe_update_eeprom_checksum_X540(struct ixgbe_hw *hw) } if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; status = hw->eeprom.ops.calc_checksum(hw); if (status < 0) @@ -490,7 +490,7 @@ static s32 ixgbe_update_flash_X540(struct ixgbe_hw *hw) s32 status; status = ixgbe_poll_flash_update_done_X540(hw); - if (status == IXGBE_ERR_EEPROM) { + if (status == -EIO) { hw_dbg(hw, "Flash update time out\n"); return status; } @@ -540,7 +540,7 @@ static s32 ixgbe_poll_flash_update_done_X540(struct ixgbe_hw *hw) return 0; udelay(5); } - return IXGBE_ERR_EEPROM; + return -EIO; } /** @@ -575,7 +575,7 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask) * SW_FW_SYNC bits (not just NVM) */ if (ixgbe_get_swfw_sync_semaphore(hw)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC(hw)); if (!(swfw_sync & (fwmask | swmask | hwmask))) { @@ -599,7 +599,7 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask) * bits in the SW_FW_SYNC register. */ if (ixgbe_get_swfw_sync_semaphore(hw)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC(hw)); if (swfw_sync & (fwmask | hwmask)) { swfw_sync |= swmask; @@ -622,11 +622,11 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask) rmask |= IXGBE_GSSR_I2C_MASK; ixgbe_release_swfw_sync_X540(hw, rmask); ixgbe_release_swfw_sync_semaphore(hw); - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; } ixgbe_release_swfw_sync_semaphore(hw); - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; } /** @@ -680,7 +680,7 @@ static s32 ixgbe_get_swfw_sync_semaphore(struct ixgbe_hw *hw) if (i == timeout) { hw_dbg(hw, "Software semaphore SMBI between device drivers not granted.\n"); - return IXGBE_ERR_EEPROM; + return -EIO; } /* Now get the semaphore between SW/FW through the REGSMP bit */ @@ -697,7 +697,7 @@ static s32 ixgbe_get_swfw_sync_semaphore(struct ixgbe_hw *hw) */ hw_dbg(hw, "REGSMP Software NVM semaphore not granted\n"); ixgbe_release_swfw_sync_semaphore(hw); - return IXGBE_ERR_EEPROM; + return -EIO; } /** @@ -768,7 +768,7 @@ s32 ixgbe_blink_led_start_X540(struct ixgbe_hw *hw, u32 index) bool link_up; if (index > 3) - return IXGBE_ERR_PARAM; + return -EINVAL; /* Link should be up in order for the blink bit in the LED control * register to work. Force link and speed in the MAC if link is down. @@ -804,7 +804,7 @@ s32 ixgbe_blink_led_stop_X540(struct ixgbe_hw *hw, u32 index) u32 ledctl_reg; if (index > 3) - return IXGBE_ERR_PARAM; + return -EINVAL; /* Restore the LED to its default value. */ ledctl_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index aa4bf6c9a..cdc912bba 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -206,13 +206,13 @@ static s32 ixgbe_reset_cs4227(struct ixgbe_hw *hw) } if (retry == IXGBE_CS4227_RETRIES) { hw_err(hw, "CS4227 reset did not complete\n"); - return IXGBE_ERR_PHY; + return -EIO; } status = ixgbe_read_cs4227(hw, IXGBE_CS4227_EEPROM_STATUS, &value); if (status || !(value & IXGBE_CS4227_EEPROM_LOAD_OK)) { hw_err(hw, "CS4227 EEPROM did not load successfully\n"); - return IXGBE_ERR_PHY; + return -EIO; } return 0; @@ -350,13 +350,13 @@ static s32 ixgbe_identify_phy_x550em(struct ixgbe_hw *hw) static s32 ixgbe_read_phy_reg_x550em(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, u16 *phy_data) { - return IXGBE_NOT_IMPLEMENTED; + return -EOPNOTSUPP; } static s32 ixgbe_write_phy_reg_x550em(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, u16 phy_data) { - return IXGBE_NOT_IMPLEMENTED; + return -EOPNOTSUPP; } /** @@ -463,7 +463,7 @@ s32 ixgbe_fw_phy_activity(struct ixgbe_hw *hw, u16 activity, --retries; } while (retries > 0); - return IXGBE_ERR_HOST_INTERFACE_COMMAND; + return -EIO; } static const struct { @@ -511,7 +511,7 @@ static s32 ixgbe_get_phy_id_fw(struct ixgbe_hw *hw) hw->phy.id |= phy_id_lo & IXGBE_PHY_REVISION_MASK; hw->phy.revision = phy_id_lo & ~IXGBE_PHY_REVISION_MASK; if (!hw->phy.id || hw->phy.id == IXGBE_PHY_REVISION_MASK) - return IXGBE_ERR_PHY_ADDR_INVALID; + return -EFAULT; hw->phy.autoneg_advertised = hw->phy.speeds_supported; hw->phy.eee_speeds_supported = IXGBE_LINK_SPEED_100_FULL | @@ -568,7 +568,7 @@ static s32 ixgbe_setup_fw_link(struct ixgbe_hw *hw) if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) { hw_err(hw, "rx_pause not valid in strict IEEE mode\n"); - return IXGBE_ERR_INVALID_LINK_SETTINGS; + return -EINVAL; } switch (hw->fc.requested_mode) { @@ -600,8 +600,10 @@ static s32 ixgbe_setup_fw_link(struct ixgbe_hw *hw) rc = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_SETUP_LINK, &setup); if (rc) return rc; + if (setup[0] == FW_PHY_ACT_SETUP_LINK_RSP_DOWN) - return IXGBE_ERR_OVERTEMP; + return -EIO; + return 0; } @@ -675,7 +677,7 @@ static s32 ixgbe_iosf_wait(struct ixgbe_hw *hw, u32 *ctrl) *ctrl = command; if (i == IXGBE_MDIO_COMMAND_TIMEOUT) { hw_dbg(hw, "IOSF wait timed out\n"); - return IXGBE_ERR_PHY; + return -EIO; } return 0; @@ -715,7 +717,8 @@ static s32 ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, error = (command & IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK) >> IXGBE_SB_IOSF_CTRL_CMPL_ERR_SHIFT; hw_dbg(hw, "Failed to read, error %x\n", error); - return IXGBE_ERR_PHY; + ret = -EIO; + goto out; } if (!ret) @@ -750,9 +753,9 @@ static s32 ixgbe_get_phy_token(struct ixgbe_hw *hw) if (token_cmd.hdr.cmd_or_resp.ret_status == FW_PHY_TOKEN_OK) return 0; if (token_cmd.hdr.cmd_or_resp.ret_status != FW_PHY_TOKEN_RETRY) - return IXGBE_ERR_FW_RESP_INVALID; + return -EIO; - return IXGBE_ERR_TOKEN_RETRY; + return -EAGAIN; } /** @@ -778,7 +781,7 @@ static s32 ixgbe_put_phy_token(struct ixgbe_hw *hw) return status; if (token_cmd.hdr.cmd_or_resp.ret_status == FW_PHY_TOKEN_OK) return 0; - return IXGBE_ERR_FW_RESP_INVALID; + return -EIO; } /** @@ -942,7 +945,7 @@ static s32 ixgbe_checksum_ptr_x550(struct ixgbe_hw *hw, u16 ptr, local_buffer = buf; } else { if (buffer_size < ptr) - return IXGBE_ERR_PARAM; + return -EINVAL; local_buffer = &buffer[ptr]; } @@ -960,7 +963,7 @@ static s32 ixgbe_checksum_ptr_x550(struct ixgbe_hw *hw, u16 ptr, } if (buffer && ((u32)start + (u32)length > buffer_size)) - return IXGBE_ERR_PARAM; + return -EINVAL; for (i = start; length; i++, length--) { if (i == bufsz && !buffer) { @@ -1012,7 +1015,7 @@ static s32 ixgbe_calc_checksum_X550(struct ixgbe_hw *hw, u16 *buffer, local_buffer = eeprom_ptrs; } else { if (buffer_size < IXGBE_EEPROM_LAST_WORD) - return IXGBE_ERR_PARAM; + return -EINVAL; local_buffer = buffer; } @@ -1148,7 +1151,7 @@ static s32 ixgbe_validate_eeprom_checksum_X550(struct ixgbe_hw *hw, * calculated checksum */ if (read_checksum != checksum) { - status = IXGBE_ERR_EEPROM_CHECKSUM; + status = -EIO; hw_dbg(hw, "Invalid EEPROM checksum"); } @@ -1203,7 +1206,7 @@ static s32 ixgbe_write_ee_hostif_X550(struct ixgbe_hw *hw, u16 offset, u16 data) hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); } else { hw_dbg(hw, "write ee hostif failed to get semaphore"); - status = IXGBE_ERR_SWFW_SYNC; + status = -EBUSY; } return status; @@ -1415,7 +1418,7 @@ static s32 ixgbe_write_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, error = (command & IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK) >> IXGBE_SB_IOSF_CTRL_CMPL_ERR_SHIFT; hw_dbg(hw, "Failed to write, error %x\n", error); - return IXGBE_ERR_PHY; + return -EIO; } out: @@ -1558,7 +1561,7 @@ static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed) /* iXFI is only supported with X552 */ if (mac->type != ixgbe_mac_X550EM_x) - return IXGBE_ERR_LINK_SETUP; + return -EIO; /* Disable AN and force speed to 10G Serial. */ status = ixgbe_read_iosf_sb_reg_x550(hw, @@ -1580,7 +1583,7 @@ static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed) break; default: /* Other link speeds are not supported by internal KR PHY. */ - return IXGBE_ERR_LINK_SETUP; + return -EINVAL; } status = ixgbe_write_iosf_sb_reg_x550(hw, @@ -1611,7 +1614,7 @@ static s32 ixgbe_supported_sfp_modules_X550em(struct ixgbe_hw *hw, bool *linear) { switch (hw->phy.sfp_type) { case ixgbe_sfp_type_not_present: - return IXGBE_ERR_SFP_NOT_PRESENT; + return -ENOENT; case ixgbe_sfp_type_da_cu_core0: case ixgbe_sfp_type_da_cu_core1: *linear = true; @@ -1630,7 +1633,7 @@ static s32 ixgbe_supported_sfp_modules_X550em(struct ixgbe_hw *hw, bool *linear) case ixgbe_sfp_type_1g_cu_core0: case ixgbe_sfp_type_1g_cu_core1: default: - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; } return 0; @@ -1660,7 +1663,7 @@ ixgbe_setup_mac_link_sfp_x550em(struct ixgbe_hw *hw, * there is no reason to configure CS4227 and SFP not present error is * not accepted in the setup MAC link flow. */ - if (status == IXGBE_ERR_SFP_NOT_PRESENT) + if (status == -ENOENT) return 0; if (status) @@ -1718,7 +1721,7 @@ static s32 ixgbe_setup_sfi_x550a(struct ixgbe_hw *hw, ixgbe_link_speed *speed) break; default: /* Other link speeds are not supported by internal PHY. */ - return IXGBE_ERR_LINK_SETUP; + return -EINVAL; } (void)mac->ops.write_iosf_sb_reg(hw, @@ -1803,7 +1806,7 @@ ixgbe_setup_mac_link_sfp_n(struct ixgbe_hw *hw, ixgbe_link_speed speed, /* If no SFP module present, then return success. Return success since * SFP not present error is not excepted in the setup MAC link flow. */ - if (ret_val == IXGBE_ERR_SFP_NOT_PRESENT) + if (ret_val == -ENOENT) return 0; if (ret_val) @@ -1853,7 +1856,7 @@ ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, ixgbe_link_speed speed, /* If no SFP module present, then return success. Return success since * SFP not present error is not excepted in the setup MAC link flow. */ - if (ret_val == IXGBE_ERR_SFP_NOT_PRESENT) + if (ret_val == -ENOENT) return 0; if (ret_val) @@ -1863,7 +1866,7 @@ ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, ixgbe_link_speed speed, ixgbe_setup_kr_speed_x550em(hw, speed); if (hw->phy.mdio.prtad == MDIO_PRTAD_NONE) - return IXGBE_ERR_PHY_ADDR_INVALID; + return -EFAULT; /* Get external PHY SKU id */ ret_val = hw->phy.ops.read_reg(hw, IXGBE_CS4227_EFUSE_PDF_SKU, @@ -1962,7 +1965,7 @@ static s32 ixgbe_check_link_t_X550em(struct ixgbe_hw *hw, u16 i, autoneg_status; if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper) - return IXGBE_ERR_CONFIG; + return -EIO; status = ixgbe_check_mac_link_generic(hw, speed, link_up, link_up_wait_to_complete); @@ -2145,9 +2148,9 @@ static s32 ixgbe_setup_sgmii_fw(struct ixgbe_hw *hw, ixgbe_link_speed speed, */ static void ixgbe_fc_autoneg_sgmii_x550em_a(struct ixgbe_hw *hw) { - s32 status = IXGBE_ERR_FC_NOT_NEGOTIATED; u32 info[FW_PHY_ACT_DATA_COUNT] = { 0 }; ixgbe_link_speed speed; + s32 status = -EIO; bool link_up; /* AN should have completed when the cable was plugged in. @@ -2165,7 +2168,7 @@ static void ixgbe_fc_autoneg_sgmii_x550em_a(struct ixgbe_hw *hw) /* Check if auto-negotiation has completed */ status = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_GET_LINK_INFO, &info); if (status || !(info[0] & FW_PHY_ACT_GET_LINK_INFO_AN_COMPLETE)) { - status = IXGBE_ERR_FC_NOT_NEGOTIATED; + status = -EIO; goto out; } @@ -2369,18 +2372,18 @@ static s32 ixgbe_get_link_capabilities_X550em(struct ixgbe_hw *hw, * @hw: pointer to hardware structure * @lsc: pointer to boolean flag which indicates whether external Base T * PHY interrupt is lsc + * @is_overtemp: indicate whether an overtemp event encountered * * Determime if external Base T PHY interrupt cause is high temperature * failure alarm or link status change. - * - * Return IXGBE_ERR_OVERTEMP if interrupt is high temperature - * failure alarm, else return PHY access status. **/ -static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) +static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc, + bool *is_overtemp) { u32 status; u16 reg; + *is_overtemp = false; *lsc = false; /* Vendor alarm triggered */ @@ -2412,7 +2415,8 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) if (reg & IXGBE_MDIO_GLOBAL_ALM_1_HI_TMP_FAIL) { /* power down the PHY in case the PHY FW didn't already */ ixgbe_set_copper_phy_power(hw, false); - return IXGBE_ERR_OVERTEMP; + *is_overtemp = true; + return -EIO; } if (reg & IXGBE_MDIO_GLOBAL_ALM_1_DEV_FAULT) { /* device fault alarm triggered */ @@ -2426,7 +2430,8 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) if (reg == IXGBE_MDIO_GLOBAL_FAULT_MSG_HI_TMP) { /* power down the PHY in case the PHY FW didn't */ ixgbe_set_copper_phy_power(hw, false); - return IXGBE_ERR_OVERTEMP; + *is_overtemp = true; + return -EIO; } } @@ -2462,12 +2467,12 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) **/ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw) { + bool lsc, overtemp; u32 status; u16 reg; - bool lsc; /* Clear interrupt flags */ - status = ixgbe_get_lasi_ext_t_x550em(hw, &lsc); + status = ixgbe_get_lasi_ext_t_x550em(hw, &lsc, &overtemp); /* Enable link status change alarm */ @@ -2546,21 +2551,20 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw) /** * ixgbe_handle_lasi_ext_t_x550em - Handle external Base T PHY interrupt * @hw: pointer to hardware structure + * @is_overtemp: indicate whether an overtemp event encountered * * Handle external Base T PHY interrupt. If high temperature * failure alarm then return error, else if link status change * then setup internal/external PHY link - * - * Return IXGBE_ERR_OVERTEMP if interrupt is high temperature - * failure alarm, else return PHY access status. **/ -static s32 ixgbe_handle_lasi_ext_t_x550em(struct ixgbe_hw *hw) +static s32 ixgbe_handle_lasi_ext_t_x550em(struct ixgbe_hw *hw, + bool *is_overtemp) { struct ixgbe_phy_info *phy = &hw->phy; bool lsc; u32 status; - status = ixgbe_get_lasi_ext_t_x550em(hw, &lsc); + status = ixgbe_get_lasi_ext_t_x550em(hw, &lsc, is_overtemp); if (status) return status; @@ -2692,7 +2696,7 @@ static s32 ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw) u16 speed; if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper) - return IXGBE_ERR_CONFIG; + return -EIO; if (!(hw->mac.type == ixgbe_mac_X550EM_x && !(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE))) { @@ -2735,7 +2739,7 @@ static s32 ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw) break; default: /* Internal PHY does not support anything else */ - return IXGBE_ERR_INVALID_LINK_SETTINGS; + return -EINVAL; } return ixgbe_setup_ixfi_x550em(hw, &force_speed); @@ -2767,7 +2771,7 @@ static s32 ixgbe_led_on_t_x550em(struct ixgbe_hw *hw, u32 led_idx) u16 phy_data; if (led_idx >= IXGBE_X557_MAX_LED_INDEX) - return IXGBE_ERR_PARAM; + return -EINVAL; /* To turn on the LED, set mode to ON. */ hw->phy.ops.read_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx, @@ -2789,7 +2793,7 @@ static s32 ixgbe_led_off_t_x550em(struct ixgbe_hw *hw, u32 led_idx) u16 phy_data; if (led_idx >= IXGBE_X557_MAX_LED_INDEX) - return IXGBE_ERR_PARAM; + return -EINVAL; /* To turn on the LED, set mode to ON. */ hw->phy.ops.read_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx, @@ -2813,8 +2817,9 @@ static s32 ixgbe_led_off_t_x550em(struct ixgbe_hw *hw, u32 led_idx) * * Sends driver version number to firmware through the manageability * block. On success return 0 - * else returns IXGBE_ERR_SWFW_SYNC when encountering an error acquiring - * semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails. + * else returns -EBUSY when encountering an error acquiring + * semaphore, -EIO when command fails or -ENIVAL when incorrect + * params passed. **/ static s32 ixgbe_set_fw_drv_ver_x550(struct ixgbe_hw *hw, u8 maj, u8 min, u8 build, u8 sub, u16 len, @@ -2825,7 +2830,7 @@ static s32 ixgbe_set_fw_drv_ver_x550(struct ixgbe_hw *hw, u8 maj, u8 min, int i; if (!len || !driver_ver || (len > sizeof(fw_cmd.driver_string))) - return IXGBE_ERR_INVALID_ARGUMENT; + return -EINVAL; fw_cmd.hdr.cmd = FW_CEM_CMD_DRIVER_INFO; fw_cmd.hdr.buf_len = FW_CEM_CMD_DRIVER_INFO_LEN + len; @@ -2850,7 +2855,7 @@ static s32 ixgbe_set_fw_drv_ver_x550(struct ixgbe_hw *hw, u8 maj, u8 min, if (fw_cmd.hdr.cmd_or_resp.ret_status != FW_CEM_RESP_STATUS_SUCCESS) - return IXGBE_ERR_HOST_INTERFACE_COMMAND; + return -EIO; return 0; } @@ -2907,7 +2912,7 @@ static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *hw) /* Validate the requested mode */ if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) { hw_err(hw, "ixgbe_fc_rx_pause not valid in strict IEEE mode\n"); - return IXGBE_ERR_INVALID_LINK_SETTINGS; + return -EINVAL; } /* 10gig parts do not have a word in the EEPROM to determine the @@ -2942,7 +2947,7 @@ static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *hw) break; default: hw_err(hw, "Flow control param set incorrectly\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } switch (hw->device_id) { @@ -2986,8 +2991,8 @@ static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *hw) static void ixgbe_fc_autoneg_backplane_x550em_a(struct ixgbe_hw *hw) { u32 link_s1, lp_an_page_low, an_cntl_1; - s32 status = IXGBE_ERR_FC_NOT_NEGOTIATED; ixgbe_link_speed speed; + s32 status = -EIO; bool link_up; /* AN should have completed when the cable was plugged in. @@ -3013,7 +3018,7 @@ static void ixgbe_fc_autoneg_backplane_x550em_a(struct ixgbe_hw *hw) if (status || (link_s1 & IXGBE_KRM_LINK_S1_MAC_AN_COMPLETE) == 0) { hw_dbg(hw, "Auto-Negotiation did not complete\n"); - status = IXGBE_ERR_FC_NOT_NEGOTIATED; + status = -EIO; goto out; } @@ -3187,21 +3192,23 @@ static s32 ixgbe_reset_phy_fw(struct ixgbe_hw *hw) /** * ixgbe_check_overtemp_fw - Check firmware-controlled PHYs for overtemp * @hw: pointer to hardware structure + * + * Return true when an overtemp event detected, otherwise false. */ -static s32 ixgbe_check_overtemp_fw(struct ixgbe_hw *hw) +static bool ixgbe_check_overtemp_fw(struct ixgbe_hw *hw) { u32 store[FW_PHY_ACT_DATA_COUNT] = { 0 }; s32 rc; rc = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_GET_LINK_INFO, &store); if (rc) - return rc; + return false; if (store[0] & FW_PHY_ACT_GET_LINK_INFO_TEMP) { ixgbe_shutdown_fw_phy(hw); - return IXGBE_ERR_OVERTEMP; + return true; } - return 0; + return false; } /** @@ -3251,8 +3258,7 @@ static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) /* Identify the PHY or SFP module */ ret_val = phy->ops.identify(hw); - if (ret_val == IXGBE_ERR_SFP_NOT_SUPPORTED || - ret_val == IXGBE_ERR_PHY_ADDR_INVALID) + if (ret_val == -EOPNOTSUPP || ret_val == -EFAULT) return ret_val; /* Setup function pointers based on detected hardware */ @@ -3460,8 +3466,7 @@ static s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw) /* PHY ops must be identified and initialized prior to reset */ status = hw->phy.ops.init(hw); - if (status == IXGBE_ERR_SFP_NOT_SUPPORTED || - status == IXGBE_ERR_PHY_ADDR_INVALID) + if (status == -EOPNOTSUPP || status == -EFAULT) return status; /* start the external PHY */ @@ -3477,7 +3482,7 @@ static s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw) hw->phy.sfp_setup_needed = false; } - if (status == IXGBE_ERR_SFP_NOT_SUPPORTED) + if (status == -EOPNOTSUPP) return status; /* Reset PHY */ @@ -3501,7 +3506,7 @@ mac_reset_top: status = hw->mac.ops.acquire_swfw_sync(hw, swfw_mask); if (status) { hw_dbg(hw, "semaphore failed with %d", status); - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; } ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL); @@ -3519,7 +3524,7 @@ mac_reset_top: } if (ctrl & IXGBE_CTRL_RST_MASK) { - status = IXGBE_ERR_RESET_FAILED; + status = -EIO; hw_dbg(hw, "Reset polling failed to complete.\n"); } @@ -3615,7 +3620,7 @@ static s32 ixgbe_setup_fc_backplane_x550em_a(struct ixgbe_hw *hw) /* Validate the requested mode */ if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) { hw_err(hw, "ixgbe_fc_rx_pause not valid in strict IEEE mode\n"); - return IXGBE_ERR_INVALID_LINK_SETTINGS; + return -EINVAL; } if (hw->fc.requested_mode == ixgbe_fc_default) @@ -3672,7 +3677,7 @@ static s32 ixgbe_setup_fc_backplane_x550em_a(struct ixgbe_hw *hw) break; default: hw_err(hw, "Flow control param set incorrectly\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } status = hw->mac.ops.write_iosf_sb_reg(hw, @@ -3768,7 +3773,7 @@ static s32 ixgbe_acquire_swfw_sync_x550em_a(struct ixgbe_hw *hw, u32 mask) return 0; if (hmask) ixgbe_release_swfw_sync_X540(hw, hmask); - if (status != IXGBE_ERR_TOKEN_RETRY) + if (status != -EAGAIN) return status; msleep(FW_PHY_TOKEN_DELAY); } @@ -3812,7 +3817,7 @@ static s32 ixgbe_read_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, s32 status; if (hw->mac.ops.acquire_swfw_sync(hw, mask)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; status = hw->phy.ops.read_reg_mdi(hw, reg_addr, device_type, phy_data); @@ -3838,7 +3843,7 @@ static s32 ixgbe_write_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, s32 status; if (hw->mac.ops.acquire_swfw_sync(hw, mask)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; status = ixgbe_write_phy_reg_mdi(hw, reg_addr, device_type, phy_data); hw->mac.ops.release_swfw_sync(hw, mask); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 0bcf3e559..55639c133 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -437,6 +437,10 @@ static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam, return; } + /* AF modifies given action iff PF/VF has requested for it */ + if ((entry->action & 0xFULL) != NIX_RX_ACTION_DEFAULT) + return; + /* copy VF default entry action to the VF mcam entry */ rx_action = npc_get_default_entry_action(rvu, mcam, blkaddr, target_func); @@ -2678,18 +2682,17 @@ int rvu_mbox_handler_npc_mcam_alloc_entry(struct rvu *rvu, rsp->entry = NPC_MCAM_ENTRY_INVALID; rsp->free_count = 0; - /* Check if ref_entry is within range */ - if (req->priority && req->ref_entry >= mcam->bmap_entries) { - dev_err(rvu->dev, "%s: reference entry %d is out of range\n", - __func__, req->ref_entry); - return NPC_MCAM_INVALID_REQ; - } + /* Check if ref_entry is greater that the range + * then set it to max value. + */ + if (req->ref_entry > mcam->bmap_entries) + req->ref_entry = mcam->bmap_entries; /* ref_entry can't be '0' if requested priority is high. * Can't be last entry if requested priority is low. */ if ((!req->ref_entry && req->priority == NPC_MCAM_HIGHER_PRIO) || - ((req->ref_entry == (mcam->bmap_entries - 1)) && + ((req->ref_entry == mcam->bmap_entries) && req->priority == NPC_MCAM_LOWER_PRIO)) return NPC_MCAM_INVALID_REQ; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 0f896f606..c00d6d67d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -930,8 +930,11 @@ int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura) if (pfvf->ptp && qidx < pfvf->hw.tx_queues) { err = qmem_alloc(pfvf->dev, &sq->timestamps, qset->sqe_cnt, sizeof(*sq->timestamps)); - if (err) + if (err) { + kfree(sq->sg); + sq->sg = NULL; return err; + } } sq->head = 0; @@ -947,7 +950,14 @@ int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura) sq->stats.bytes = 0; sq->stats.pkts = 0; - return pfvf->hw_ops->sq_aq_init(pfvf, qidx, sqb_aura); + err = pfvf->hw_ops->sq_aq_init(pfvf, qidx, sqb_aura); + if (err) { + kfree(sq->sg); + sq->sg = NULL; + return err; + } + + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 3b9ba8fa2..41eac7dfb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -52,8 +52,10 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, max_regions = max_tcam_regions; tcam->used_regions = bitmap_zalloc(max_regions, GFP_KERNEL); - if (!tcam->used_regions) - return -ENOMEM; + if (!tcam->used_regions) { + err = -ENOMEM; + goto err_alloc_used_regions; + } tcam->max_regions = max_regions; max_groups = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_GROUPS); @@ -65,6 +67,8 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, tcam->max_groups = max_groups; tcam->max_group_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_GROUP_SIZE); + tcam->max_group_size = min_t(unsigned int, tcam->max_group_size, + MLXSW_REG_PAGT_ACL_MAX_NUM); err = ops->init(mlxsw_sp, tcam->priv, tcam); if (err) @@ -76,6 +80,8 @@ err_tcam_init: bitmap_free(tcam->used_groups); err_alloc_used_groups: bitmap_free(tcam->used_regions); +err_alloc_used_regions: + mutex_destroy(&tcam->lock); return err; } @@ -84,10 +90,10 @@ void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - mutex_destroy(&tcam->lock); ops->fini(mlxsw_sp, tcam->priv); bitmap_free(tcam->used_groups); bitmap_free(tcam->used_regions); + mutex_destroy(&tcam->lock); } int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp, diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c index 41fa2523d..5f2cd9a8c 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c @@ -37,19 +37,24 @@ static void lan966x_lag_set_aggr_pgids(struct lan966x *lan966x) /* Now, set PGIDs for each active LAG */ for (lag = 0; lag < lan966x->num_phys_ports; ++lag) { - struct net_device *bond = lan966x->ports[lag]->bond; + struct lan966x_port *port = lan966x->ports[lag]; int num_active_ports = 0; + struct net_device *bond; unsigned long bond_mask; u8 aggr_idx[16]; - if (!bond || (visited & BIT(lag))) + if (!port || !port->bond || (visited & BIT(lag))) continue; + bond = port->bond; bond_mask = lan966x_lag_get_mask(lan966x, bond); for_each_set_bit(p, &bond_mask, lan966x->num_phys_ports) { struct lan966x_port *port = lan966x->ports[p]; + if (!port) + continue; + lan_wr(ANA_PGID_PGID_SET(bond_mask), lan966x, ANA_PGID(p)); if (port->lag_tx_active) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c index 0050fcb98..8cc517283 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c @@ -168,9 +168,10 @@ static void lan966x_port_link_up(struct lan966x_port *port) lan966x_taprio_speed_set(port, config->speed); /* Also the GIGA_MODE_ENA(1) needs to be set regardless of the - * port speed for QSGMII ports. + * port speed for QSGMII or SGMII ports. */ - if (phy_interface_num_ports(config->portmode) == 4) + if (phy_interface_num_ports(config->portmode) == 4 || + config->portmode == PHY_INTERFACE_MODE_SGMII) mode = DEV_MAC_MODE_CFG_GIGA_MODE_ENA_SET(1); lan_wr(config->duplex | mode, diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c index 4af285918..75868b3f5 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c @@ -347,10 +347,10 @@ int sparx5_del_mact_entry(struct sparx5 *sparx5, list) { if ((vid == 0 || mact_entry->vid == vid) && ether_addr_equal(addr, mact_entry->mac)) { + sparx5_mact_forget(sparx5, addr, mact_entry->vid); + list_del(&mact_entry->list); devm_kfree(sparx5->dev, mact_entry); - - sparx5_mact_forget(sparx5, addr, mact_entry->vid); } } mutex_unlock(&sparx5->mact_lock); diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index 3423c95cc..7031f4128 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -744,6 +744,7 @@ static int mchp_sparx5_probe(struct platform_device *pdev) platform_set_drvdata(pdev, sparx5); sparx5->pdev = pdev; sparx5->dev = &pdev->dev; + spin_lock_init(&sparx5->tx_lock); /* Do switch core reset if available */ reset = devm_reset_control_get_optional_shared(&pdev->dev, "switch"); diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h index 7a83222ca..cb3173d2b 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h @@ -278,6 +278,7 @@ struct sparx5 { int xtr_irq; /* Frame DMA */ int fdma_irq; + spinlock_t tx_lock; /* lock for frame transmission */ struct sparx5_rx rx; struct sparx5_tx tx; /* PTP */ diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c index 6db6ac6a3..ac7e1cffb 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c @@ -244,10 +244,12 @@ netdev_tx_t sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev) } skb_tx_timestamp(skb); + spin_lock(&sparx5->tx_lock); if (sparx5->fdma_irq > 0) ret = sparx5_fdma_xmit(sparx5, ifh, skb); else ret = sparx5_inject(sparx5, ifh, skb, dev); + spin_unlock(&sparx5->tx_lock); if (ret == -EBUSY) goto busy; diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c index f7492be45..497766ecd 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -1243,7 +1243,7 @@ static int nfp_ct_do_tc_merge(struct nfp_fl_ct_zone_entry *zt, /* Checks that the chain_index of the filter matches the * chain_index of the GOTO action. */ - if (post_ct_entry->chain_index != pre_ct_entry->chain_index) + if (post_ct_entry->chain_index != pre_ct_entry->goto_chain_index) return -EINVAL; err = nfp_ct_merge_check(pre_ct_entry, post_ct_entry); @@ -1379,10 +1379,30 @@ static void nfp_nft_ct_translate_mangle_action(struct flow_action_entry *mangle_ mangle_action->mangle.mask = (__force u32)cpu_to_be32(mangle_action->mangle.mask); return; + /* Both struct tcphdr and struct udphdr start with + * __be16 source; + * __be16 dest; + * so we can use the same code for both. + */ case FLOW_ACT_MANGLE_HDR_TYPE_TCP: case FLOW_ACT_MANGLE_HDR_TYPE_UDP: - mangle_action->mangle.val = (__force u16)cpu_to_be16(mangle_action->mangle.val); - mangle_action->mangle.mask = (__force u16)cpu_to_be16(mangle_action->mangle.mask); + if (mangle_action->mangle.offset == offsetof(struct tcphdr, source)) { + mangle_action->mangle.val = + (__force u32)cpu_to_be32(mangle_action->mangle.val << 16); + /* The mask of mangle action is inverse mask, + * so clear the dest tp port with 0xFFFF to + * instead of rotate-left operation. + */ + mangle_action->mangle.mask = + (__force u32)cpu_to_be32(mangle_action->mangle.mask << 16 | 0xFFFF); + } + if (mangle_action->mangle.offset == offsetof(struct tcphdr, dest)) { + mangle_action->mangle.offset = 0; + mangle_action->mangle.val = + (__force u32)cpu_to_be32(mangle_action->mangle.val); + mangle_action->mangle.mask = + (__force u32)cpu_to_be32(mangle_action->mangle.mask); + } return; default: @@ -1756,7 +1776,8 @@ int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv, if (IS_ERR(ct_entry)) return PTR_ERR(ct_entry); ct_entry->type = CT_TYPE_PRE_CT; - ct_entry->chain_index = ct_goto->chain_index; + ct_entry->chain_index = flow->common.chain_index; + ct_entry->goto_chain_index = ct_goto->chain_index; list_add(&ct_entry->list_node, &zt->pre_ct_list); zt->pre_ct_count++; @@ -1776,9 +1797,30 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv, { struct flow_rule *rule = flow_cls_offload_flow_rule(flow); struct nfp_fl_ct_flow_entry *ct_entry; + struct flow_action_entry *ct_goto; struct nfp_fl_ct_zone_entry *zt; + struct flow_action_entry *act; bool wildcarded = false; struct flow_match_ct ct; + int i; + + flow_action_for_each(i, act, &rule->action) { + switch (act->id) { + case FLOW_ACTION_REDIRECT: + case FLOW_ACTION_REDIRECT_INGRESS: + case FLOW_ACTION_MIRRED: + case FLOW_ACTION_MIRRED_INGRESS: + if (act->dev->rtnl_link_ops && + !strcmp(act->dev->rtnl_link_ops->kind, "openvswitch")) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: out port is openvswitch internal port"); + return -EOPNOTSUPP; + } + break; + default: + break; + } + } flow_rule_match_ct(rule, &ct); if (!ct.mask->ct_zone) { @@ -1803,6 +1845,8 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv, ct_entry->type = CT_TYPE_POST_CT; ct_entry->chain_index = flow->common.chain_index; + ct_goto = get_flow_act(flow->rule, FLOW_ACTION_GOTO); + ct_entry->goto_chain_index = ct_goto ? ct_goto->chain_index : 0; list_add(&ct_entry->list_node, &zt->post_ct_list); zt->post_ct_count++; diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h index 762c0b36e..9440ab776 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h @@ -112,6 +112,7 @@ enum nfp_nfp_layer_name { * @cookie: Flow cookie, same as original TC flow, used as key * @list_node: Used by the list * @chain_index: Chain index of the original flow + * @goto_chain_index: goto chain index of the flow * @netdev: netdev structure. * @type: Type of pre-entry from enum ct_entry_type * @zt: Reference to the zone table this belongs to @@ -125,6 +126,7 @@ struct nfp_fl_ct_flow_entry { unsigned long cookie; struct list_head list_node; u32 chain_index; + u32 goto_chain_index; enum ct_entry_type type; struct net_device *netdev; struct nfp_fl_ct_zone_entry *zt; diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index 52f67157b..a3c52c91a 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -980,7 +980,7 @@ nfp_tunnel_add_shared_mac(struct nfp_app *app, struct net_device *netdev, u16 nfp_mac_idx = 0; entry = nfp_tunnel_lookup_offloaded_macs(app, netdev->dev_addr); - if (entry && nfp_tunnel_is_mac_idx_global(entry->index)) { + if (entry && (nfp_tunnel_is_mac_idx_global(entry->index) || netif_is_lag_port(netdev))) { if (entry->bridge_count || !nfp_flower_is_supported_bridge(netdev)) { nfp_tunnel_offloaded_macs_inc_ref_and_link(entry, diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c index 33b4c2856..3f10c5365 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c @@ -537,11 +537,13 @@ static int enable_bars(struct nfp6000_pcie *nfp, u16 interface) const u32 barcfg_msix_general = NFP_PCIE_BAR_PCIE2CPP_MapType( NFP_PCIE_BAR_PCIE2CPP_MapType_GENERAL) | - NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT; + NFP_PCIE_BAR_PCIE2CPP_LengthSelect( + NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT); const u32 barcfg_msix_xpb = NFP_PCIE_BAR_PCIE2CPP_MapType( NFP_PCIE_BAR_PCIE2CPP_MapType_BULK) | - NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT | + NFP_PCIE_BAR_PCIE2CPP_LengthSelect( + NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT) | NFP_PCIE_BAR_PCIE2CPP_Target_BaseAddress( NFP_CPP_TARGET_ISLAND_XPB); const u32 barcfg_explicit[4] = { diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c index ce436e973..4b9caec6e 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c @@ -380,6 +380,10 @@ static void ionic_remove(struct pci_dev *pdev) del_timer_sync(&ionic->watchdog_timer); if (ionic->lif) { + /* prevent adminq cmds if already known as down */ + if (test_and_clear_bit(IONIC_LIF_F_FW_RESET, ionic->lif->state)) + set_bit(IONIC_LIF_F_FW_STOPPING, ionic->lif->state); + ionic_lif_unregister(ionic->lif); ionic_devlink_unregister(ionic); ionic_lif_deinit(ionic->lif); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c index 344a39246..7adfcd7c2 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c @@ -293,6 +293,7 @@ void ionic_dev_cmd_comp(struct ionic_dev *idev, union ionic_dev_cmd_comp *comp) void ionic_dev_cmd_go(struct ionic_dev *idev, union ionic_dev_cmd *cmd) { + idev->opcode = cmd->cmd.opcode; memcpy_toio(&idev->dev_cmd_regs->cmd, cmd, sizeof(*cmd)); iowrite32(0, &idev->dev_cmd_regs->done); iowrite32(1, &idev->dev_cmd_regs->doorbell); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h index 13dfcf9f7..c10da629e 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h @@ -150,6 +150,7 @@ struct ionic_dev { bool fw_hb_ready; bool fw_status_ready; u8 fw_generation; + u8 opcode; u64 __iomem *db_pages; dma_addr_t phy_db_pages; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index f7634884c..fcc3faecb 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -3008,6 +3008,9 @@ static void ionic_lif_reset(struct ionic_lif *lif) { struct ionic_dev *idev = &lif->ionic->idev; + if (!ionic_is_fw_running(idev)) + return; + mutex_lock(&lif->ionic->dev_cmd_lock); ionic_dev_cmd_lif_reset(idev, lif->index); ionic_dev_cmd_wait(lif->ionic, DEVCMD_TIMEOUT); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c index 79272f5f3..d2038ff31 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -410,22 +410,28 @@ int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx, do_msg); } -int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx) +static int __ionic_adminq_post_wait(struct ionic_lif *lif, + struct ionic_admin_ctx *ctx, + const bool do_msg) { int err; + if (!ionic_is_fw_running(&lif->ionic->idev)) + return 0; + err = ionic_adminq_post(lif, ctx); - return ionic_adminq_wait(lif, ctx, err, true); + return ionic_adminq_wait(lif, ctx, err, do_msg); } -int ionic_adminq_post_wait_nomsg(struct ionic_lif *lif, struct ionic_admin_ctx *ctx) +int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx) { - int err; - - err = ionic_adminq_post(lif, ctx); + return __ionic_adminq_post_wait(lif, ctx, true); +} - return ionic_adminq_wait(lif, ctx, err, false); +int ionic_adminq_post_wait_nomsg(struct ionic_lif *lif, struct ionic_admin_ctx *ctx) +{ + return __ionic_adminq_post_wait(lif, ctx, false); } static void ionic_dev_cmd_clean(struct ionic *ionic) @@ -465,7 +471,7 @@ static int __ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds, */ max_wait = jiffies + (max_seconds * HZ); try_again: - opcode = readb(&idev->dev_cmd_regs->cmd.cmd.opcode); + opcode = idev->opcode; start_time = jiffies; for (fw_up = ionic_is_fw_running(idev); !done && fw_up && time_before(jiffies, max_wait); diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 54bb072ae..c11d62685 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -209,6 +209,7 @@ struct stmmac_safety_stats { unsigned long mac_errors[32]; unsigned long mtl_errors[32]; unsigned long dma_errors[32]; + unsigned long dma_dpp_errors[32]; }; /* Number of fields in Safety Stats */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h index 880a75bf2..8748c37e9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h @@ -282,6 +282,8 @@ #define XGMAC_RXCEIE BIT(4) #define XGMAC_TXCEIE BIT(0) #define XGMAC_MTL_ECC_INT_STATUS 0x000010cc +#define XGMAC_MTL_DPP_CONTROL 0x000010e0 +#define XGMAC_DPP_DISABLE BIT(0) #define XGMAC_MTL_TXQ_OPMODE(x) (0x00001100 + (0x80 * (x))) #define XGMAC_TQS GENMASK(25, 16) #define XGMAC_TQS_SHIFT 16 @@ -364,6 +366,7 @@ #define XGMAC_DCEIE BIT(1) #define XGMAC_TCEIE BIT(0) #define XGMAC_DMA_ECC_INT_STATUS 0x0000306c +#define XGMAC_DMA_DPP_INT_STATUS 0x00003074 #define XGMAC_DMA_CH_CONTROL(x) (0x00003100 + (0x80 * (x))) #define XGMAC_SPH BIT(24) #define XGMAC_PBLx8 BIT(16) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index c2181c277..ec1616ffb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -789,6 +789,44 @@ static const struct dwxgmac3_error_desc dwxgmac3_dma_errors[32]= { { false, "UNKNOWN", "Unknown Error" }, /* 31 */ }; +#define DPP_RX_ERR "Read Rx Descriptor Parity checker Error" +#define DPP_TX_ERR "Read Tx Descriptor Parity checker Error" + +static const struct dwxgmac3_error_desc dwxgmac3_dma_dpp_errors[32] = { + { true, "TDPES0", DPP_TX_ERR }, + { true, "TDPES1", DPP_TX_ERR }, + { true, "TDPES2", DPP_TX_ERR }, + { true, "TDPES3", DPP_TX_ERR }, + { true, "TDPES4", DPP_TX_ERR }, + { true, "TDPES5", DPP_TX_ERR }, + { true, "TDPES6", DPP_TX_ERR }, + { true, "TDPES7", DPP_TX_ERR }, + { true, "TDPES8", DPP_TX_ERR }, + { true, "TDPES9", DPP_TX_ERR }, + { true, "TDPES10", DPP_TX_ERR }, + { true, "TDPES11", DPP_TX_ERR }, + { true, "TDPES12", DPP_TX_ERR }, + { true, "TDPES13", DPP_TX_ERR }, + { true, "TDPES14", DPP_TX_ERR }, + { true, "TDPES15", DPP_TX_ERR }, + { true, "RDPES0", DPP_RX_ERR }, + { true, "RDPES1", DPP_RX_ERR }, + { true, "RDPES2", DPP_RX_ERR }, + { true, "RDPES3", DPP_RX_ERR }, + { true, "RDPES4", DPP_RX_ERR }, + { true, "RDPES5", DPP_RX_ERR }, + { true, "RDPES6", DPP_RX_ERR }, + { true, "RDPES7", DPP_RX_ERR }, + { true, "RDPES8", DPP_RX_ERR }, + { true, "RDPES9", DPP_RX_ERR }, + { true, "RDPES10", DPP_RX_ERR }, + { true, "RDPES11", DPP_RX_ERR }, + { true, "RDPES12", DPP_RX_ERR }, + { true, "RDPES13", DPP_RX_ERR }, + { true, "RDPES14", DPP_RX_ERR }, + { true, "RDPES15", DPP_RX_ERR }, +}; + static void dwxgmac3_handle_dma_err(struct net_device *ndev, void __iomem *ioaddr, bool correctable, struct stmmac_safety_stats *stats) @@ -800,6 +838,13 @@ static void dwxgmac3_handle_dma_err(struct net_device *ndev, dwxgmac3_log_error(ndev, value, correctable, "DMA", dwxgmac3_dma_errors, STAT_OFF(dma_errors), stats); + + value = readl(ioaddr + XGMAC_DMA_DPP_INT_STATUS); + writel(value, ioaddr + XGMAC_DMA_DPP_INT_STATUS); + + dwxgmac3_log_error(ndev, value, false, "DMA_DPP", + dwxgmac3_dma_dpp_errors, + STAT_OFF(dma_dpp_errors), stats); } static int @@ -838,6 +883,12 @@ dwxgmac3_safety_feat_config(void __iomem *ioaddr, unsigned int asp, value |= XGMAC_TMOUTEN; /* FSM Timeout Feature */ writel(value, ioaddr + XGMAC_MAC_FSM_CONTROL); + /* 5. Enable Data Path Parity Protection */ + value = readl(ioaddr + XGMAC_MTL_DPP_CONTROL); + /* already enabled by default, explicit enable it again */ + value &= ~XGMAC_DPP_DISABLE; + writel(value, ioaddr + XGMAC_MTL_DPP_CONTROL); + return 0; } @@ -871,7 +922,11 @@ static int dwxgmac3_safety_feat_irq_status(struct net_device *ndev, ret |= !corr; } - err = dma & (XGMAC_DEUIS | XGMAC_DECIS); + /* DMA_DPP_Interrupt_Status is indicated by MCSIS bit in + * DMA_Safety_Interrupt_Status, so we handle DMA Data Path + * Parity Errors here + */ + err = dma & (XGMAC_DEUIS | XGMAC_DECIS | XGMAC_MCSIS); corr = dma & XGMAC_DECIS; if (err) { dwxgmac3_handle_dma_err(ndev, ioaddr, corr, stats); @@ -887,6 +942,7 @@ static const struct dwxgmac3_error { { dwxgmac3_mac_errors }, { dwxgmac3_mtl_errors }, { dwxgmac3_dma_errors }, + { dwxgmac3_dma_dpp_errors }, }; static int dwxgmac3_safety_feat_dump(struct stmmac_safety_stats *stats, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index e988a60c8..e2d51014a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3826,6 +3826,9 @@ static int __stmmac_open(struct net_device *dev, priv->rx_copybreak = STMMAC_RX_COPYBREAK; buf_sz = dma_conf->dma_buf_sz; + for (int i = 0; i < MTL_MAX_TX_QUEUES; i++) + if (priv->dma_conf.tx_queue[i].tbs & STMMAC_TBS_EN) + dma_conf->tx_queue[i].tbs = priv->dma_conf.tx_queue[i].tbs; memcpy(&priv->dma_conf, dma_conf, sizeof(*dma_conf)); stmmac_reset_queues_param(priv); @@ -3897,8 +3900,10 @@ static void stmmac_fpe_stop_wq(struct stmmac_priv *priv) { set_bit(__FPE_REMOVING, &priv->fpe_task_state); - if (priv->fpe_wq) + if (priv->fpe_wq) { destroy_workqueue(priv->fpe_wq); + priv->fpe_wq = NULL; + } netdev_info(priv->dev, "FPE workqueue stop"); } @@ -5820,11 +5825,6 @@ static irqreturn_t stmmac_mac_interrupt(int irq, void *dev_id) struct net_device *dev = (struct net_device *)dev_id; struct stmmac_priv *priv = netdev_priv(dev); - if (unlikely(!dev)) { - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); - return IRQ_NONE; - } - /* Check if adapter is up */ if (test_bit(STMMAC_DOWN, &priv->state)) return IRQ_HANDLED; @@ -5840,11 +5840,6 @@ static irqreturn_t stmmac_safety_interrupt(int irq, void *dev_id) struct net_device *dev = (struct net_device *)dev_id; struct stmmac_priv *priv = netdev_priv(dev); - if (unlikely(!dev)) { - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); - return IRQ_NONE; - } - /* Check if adapter is up */ if (test_bit(STMMAC_DOWN, &priv->state)) return IRQ_HANDLED; @@ -5866,11 +5861,6 @@ static irqreturn_t stmmac_msi_intr_tx(int irq, void *data) dma_conf = container_of(tx_q, struct stmmac_dma_conf, tx_queue[chan]); priv = container_of(dma_conf, struct stmmac_priv, dma_conf); - if (unlikely(!data)) { - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); - return IRQ_NONE; - } - /* Check if adapter is up */ if (test_bit(STMMAC_DOWN, &priv->state)) return IRQ_HANDLED; @@ -5897,11 +5887,6 @@ static irqreturn_t stmmac_msi_intr_rx(int irq, void *data) dma_conf = container_of(rx_q, struct stmmac_dma_conf, rx_queue[chan]); priv = container_of(dma_conf, struct stmmac_priv, dma_conf); - if (unlikely(!data)) { - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); - return IRQ_NONE; - } - /* Check if adapter is up */ if (test_bit(STMMAC_DOWN, &priv->state)) return IRQ_HANDLED; diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 13c9c2d6b..d95771ca4 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -631,6 +631,8 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) } } + phy->mac_managed_pm = true; + slave->phy = phy; phy_attached_info(slave->phy); diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index 83596ec0c..6e70aa1cc 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -772,6 +772,9 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) slave->slave_num); return; } + + phy->mac_managed_pm = true; + slave->phy = phy; phy_attached_info(slave->phy); diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index f393e454f..3f8da6f0b 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -221,7 +221,7 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, struct genevehdr *gnvh = geneve_hdr(skb); struct metadata_dst *tun_dst = NULL; unsigned int len; - int err = 0; + int nh, err = 0; void *oiph; if (ip_tunnel_collect_metadata() || gs->collect_md) { @@ -272,9 +272,23 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, skb->pkt_type = PACKET_HOST; } - oiph = skb_network_header(skb); + /* Save offset of outer header relative to skb->head, + * because we are going to reset the network header to the inner header + * and might change skb->head. + */ + nh = skb_network_header(skb) - skb->head; + skb_reset_network_header(skb); + if (!pskb_inet_may_pull(skb)) { + DEV_STATS_INC(geneve->dev, rx_length_errors); + DEV_STATS_INC(geneve->dev, rx_errors); + goto drop; + } + + /* Get the outer header. */ + oiph = skb->head + nh; + if (geneve_get_sk_family(gs) == AF_INET) err = IP_ECN_decapsulate(oiph, skb); #if IS_ENABLED(CONFIG_IPV6) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index bace98959..7086acfed 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1902,26 +1902,26 @@ static int __init gtp_init(void) get_random_bytes(>p_h_initval, sizeof(gtp_h_initval)); - err = rtnl_link_register(>p_link_ops); + err = register_pernet_subsys(>p_net_ops); if (err < 0) goto error_out; - err = genl_register_family(>p_genl_family); + err = rtnl_link_register(>p_link_ops); if (err < 0) - goto unreg_rtnl_link; + goto unreg_pernet_subsys; - err = register_pernet_subsys(>p_net_ops); + err = genl_register_family(>p_genl_family); if (err < 0) - goto unreg_genl_family; + goto unreg_rtnl_link; pr_info("GTP module loaded (pdp ctx size %zd bytes)\n", sizeof(struct pdp_ctx)); return 0; -unreg_genl_family: - genl_unregister_family(>p_genl_family); unreg_rtnl_link: rtnl_link_unregister(>p_link_ops); +unreg_pernet_subsys: + unregister_pernet_subsys(>p_net_ops); error_out: pr_err("error loading GTP module loaded\n"); return err; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index da737d959..3a834d4e1 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -740,7 +740,10 @@ void netvsc_device_remove(struct hv_device *device) /* Disable NAPI and disassociate its context from the device. */ for (i = 0; i < net_device->num_chn; i++) { /* See also vmbus_reset_channel_cb(). */ - napi_disable(&net_device->chan_table[i].napi); + /* only disable enabled NAPI channel */ + if (i < ndev->real_num_rx_queues) + napi_disable(&net_device->chan_table[i].napi); + netif_napi_del(&net_device->chan_table[i].napi); } diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index c1aac6ceb..1b7405539 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -42,6 +42,10 @@ #define LINKCHANGE_INT (2 * HZ) #define VF_TAKEOVER_INT (HZ / 10) +/* Macros to define the context of vf registration */ +#define VF_REG_IN_PROBE 1 +#define VF_REG_IN_NOTIFIER 2 + static unsigned int ring_size __ro_after_init = 128; module_param(ring_size, uint, 0444); MODULE_PARM_DESC(ring_size, "Ring buffer size (# of 4K pages)"); @@ -2181,7 +2185,7 @@ static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb) } static int netvsc_vf_join(struct net_device *vf_netdev, - struct net_device *ndev) + struct net_device *ndev, int context) { struct net_device_context *ndev_ctx = netdev_priv(ndev); int ret; @@ -2204,7 +2208,11 @@ static int netvsc_vf_join(struct net_device *vf_netdev, goto upper_link_failed; } - schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT); + /* If this registration is called from probe context vf_takeover + * is taken care of later in probe itself. + */ + if (context == VF_REG_IN_NOTIFIER) + schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT); call_netdevice_notifiers(NETDEV_JOIN, vf_netdev); @@ -2342,7 +2350,7 @@ static int netvsc_prepare_bonding(struct net_device *vf_netdev) return NOTIFY_DONE; } -static int netvsc_register_vf(struct net_device *vf_netdev) +static int netvsc_register_vf(struct net_device *vf_netdev, int context) { struct net_device_context *net_device_ctx; struct netvsc_device *netvsc_dev; @@ -2382,7 +2390,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev) netdev_info(ndev, "VF registering: %s\n", vf_netdev->name); - if (netvsc_vf_join(vf_netdev, ndev) != 0) + if (netvsc_vf_join(vf_netdev, ndev, context) != 0) return NOTIFY_DONE; dev_hold(vf_netdev); @@ -2480,10 +2488,31 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) return NOTIFY_OK; } +static int check_dev_is_matching_vf(struct net_device *event_ndev) +{ + /* Skip NetVSC interfaces */ + if (event_ndev->netdev_ops == &device_ops) + return -ENODEV; + + /* Avoid non-Ethernet type devices */ + if (event_ndev->type != ARPHRD_ETHER) + return -ENODEV; + + /* Avoid Vlan dev with same MAC registering as VF */ + if (is_vlan_dev(event_ndev)) + return -ENODEV; + + /* Avoid Bonding master dev with same MAC registering as VF */ + if (netif_is_bond_master(event_ndev)) + return -ENODEV; + + return 0; +} + static int netvsc_probe(struct hv_device *dev, const struct hv_vmbus_device_id *dev_id) { - struct net_device *net = NULL; + struct net_device *net = NULL, *vf_netdev; struct net_device_context *net_device_ctx; struct netvsc_device_info *device_info = NULL; struct netvsc_device *nvdev; @@ -2592,6 +2621,30 @@ static int netvsc_probe(struct hv_device *dev, } list_add(&net_device_ctx->list, &netvsc_dev_list); + + /* When the hv_netvsc driver is unloaded and reloaded, the + * NET_DEVICE_REGISTER for the vf device is replayed before probe + * is complete. This is because register_netdevice_notifier() gets + * registered before vmbus_driver_register() so that callback func + * is set before probe and we don't miss events like NETDEV_POST_INIT + * So, in this section we try to register the matching vf device that + * is present as a netdevice, knowing that its register call is not + * processed in the netvsc_netdev_notifier(as probing is progress and + * get_netvsc_byslot fails). + */ + for_each_netdev(dev_net(net), vf_netdev) { + ret = check_dev_is_matching_vf(vf_netdev); + if (ret != 0) + continue; + + if (net != get_netvsc_byslot(vf_netdev)) + continue; + + netvsc_prepare_bonding(vf_netdev); + netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE); + __netvsc_vf_setup(net, vf_netdev); + break; + } rtnl_unlock(); netvsc_devinfo_put(device_info); @@ -2748,28 +2801,17 @@ static int netvsc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); + int ret = 0; - /* Skip our own events */ - if (event_dev->netdev_ops == &device_ops) - return NOTIFY_DONE; - - /* Avoid non-Ethernet type devices */ - if (event_dev->type != ARPHRD_ETHER) - return NOTIFY_DONE; - - /* Avoid Vlan dev with same MAC registering as VF */ - if (is_vlan_dev(event_dev)) - return NOTIFY_DONE; - - /* Avoid Bonding master dev with same MAC registering as VF */ - if (netif_is_bond_master(event_dev)) + ret = check_dev_is_matching_vf(event_dev); + if (ret != 0) return NOTIFY_DONE; switch (event) { case NETDEV_POST_INIT: return netvsc_prepare_bonding(event_dev); case NETDEV_REGISTER: - return netvsc_register_vf(event_dev); + return netvsc_register_vf(event_dev, VF_REG_IN_NOTIFIER); case NETDEV_UNREGISTER: return netvsc_unregister_vf(event_dev); case NETDEV_UP: diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index c3fbdd6b6..f3fa4bd12 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -835,14 +835,14 @@ static void nsim_dev_trap_report_work(struct work_struct *work) trap_report_dw.work); nsim_dev = nsim_trap_data->nsim_dev; - /* For each running port and enabled packet trap, generate a UDP - * packet with a random 5-tuple and report it. - */ if (!devl_trylock(priv_to_devlink(nsim_dev))) { - schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw, 0); + schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw, 1); return; } + /* For each running port and enabled packet trap, generate a UDP + * packet with a random 5-tuple and report it. + */ list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list) { if (!netif_running(nsim_dev_port->ns->netdev)) continue; diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index edd4b1e58..75868e63b 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -2051,7 +2051,7 @@ static struct phy_driver at803x_driver[] = { .write_page = at803x_write_page, .get_features = at803x_get_features, .read_status = at803x_read_status, - .config_intr = &at803x_config_intr, + .config_intr = at803x_config_intr, .handle_interrupt = at803x_handle_interrupt, .get_tunable = at803x_get_tunable, .set_tunable = at803x_set_tunable, @@ -2081,7 +2081,7 @@ static struct phy_driver at803x_driver[] = { .resume = at803x_resume, .flags = PHY_POLL_CABLE_TEST, /* PHY_BASIC_FEATURES */ - .config_intr = &at803x_config_intr, + .config_intr = at803x_config_intr, .handle_interrupt = at803x_handle_interrupt, .cable_test_start = at803x_cable_test_start, .cable_test_get_status = at803x_cable_test_get_status, @@ -2097,7 +2097,7 @@ static struct phy_driver at803x_driver[] = { .resume = at803x_resume, .flags = PHY_POLL_CABLE_TEST, /* PHY_BASIC_FEATURES */ - .config_intr = &at803x_config_intr, + .config_intr = at803x_config_intr, .handle_interrupt = at803x_handle_interrupt, .cable_test_start = at803x_cable_test_start, .cable_test_get_status = at803x_cable_test_get_status, diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 3d99fd666..70e52d270 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -414,9 +414,11 @@ static int rtl8211f_config_init(struct phy_device *phydev) ERR_PTR(ret)); return ret; } + + return genphy_soft_reset(phydev); } - return genphy_soft_reset(phydev); + return 0; } static int rtl821x_resume(struct phy_device *phydev) diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c index 15a179631..abc65c4d7 100644 --- a/drivers/net/ppp/ppp_async.c +++ b/drivers/net/ppp/ppp_async.c @@ -469,6 +469,10 @@ ppp_async_ioctl(struct ppp_channel *chan, unsigned int cmd, unsigned long arg) case PPPIOCSMRU: if (get_user(val, p)) break; + if (val > U16_MAX) { + err = -EINVAL; + break; + } if (val < PPP_MRU) val = PPP_MRU; ap->mru = val; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 367255bb4..922d6f16d 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -653,6 +653,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun->tfiles[tun->numqueues - 1]); ntfile = rtnl_dereference(tun->tfiles[index]); ntfile->queue_index = index; + ntfile->xdp_rxq.queue_index = index; rcu_assign_pointer(tun->tfiles[tun->numqueues - 1], NULL); diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 5a1bf42ce..d837c1887 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1315,8 +1315,6 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf) netif_set_tso_max_size(dev->net, 16384); - ax88179_reset(dev); - return 0; } diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index 99ec1d4a9..8b6d6a1b3 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -232,7 +232,7 @@ static int dm9601_mdio_read(struct net_device *netdev, int phy_id, int loc) err = dm_read_shared_word(dev, 1, loc, &res); if (err < 0) { netdev_err(dev->net, "MDIO read error: %d\n", err); - return err; + return 0; } netdev_dbg(dev->net, diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index c458c030f..366e83ed0 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1501,7 +1501,9 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) lan78xx_rx_urb_submit_all(dev); + local_bh_disable(); napi_schedule(&dev->napi); + local_bh_enable(); } return 0; @@ -3035,7 +3037,8 @@ static int lan78xx_reset(struct lan78xx_net *dev) if (dev->chipid == ID_REV_CHIP_ID_7801_) buf &= ~MAC_CR_GMII_EN_; - if (dev->chipid == ID_REV_CHIP_ID_7800_) { + if (dev->chipid == ID_REV_CHIP_ID_7800_ || + dev->chipid == ID_REV_CHIP_ID_7850_) { ret = lan78xx_read_raw_eeprom(dev, 0, 1, &sig); if (!ret && sig != EEPROM_INDICATOR) { /* Implies there is no external eeprom. Set mac speed */ @@ -3134,7 +3137,8 @@ static int lan78xx_open(struct net_device *net) done: mutex_unlock(&dev->dev_mutex); - usb_autopm_put_interface(dev->intf); + if (ret < 0) + usb_autopm_put_interface(dev->intf); return ret; } diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 36c5a41f8..dd9f5f146 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1135,14 +1135,6 @@ static int veth_enable_xdp(struct net_device *dev) veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, true); return err; } - - if (!veth_gro_requested(dev)) { - /* user-space did not require GRO, but adding XDP - * is supposed to get GRO working - */ - dev->features |= NETIF_F_GRO; - netdev_features_change(dev); - } } } @@ -1162,18 +1154,9 @@ static void veth_disable_xdp(struct net_device *dev) for (i = 0; i < dev->real_num_rx_queues; i++) rcu_assign_pointer(priv->rq[i].xdp_prog, NULL); - if (!netif_running(dev) || !veth_gro_requested(dev)) { + if (!netif_running(dev) || !veth_gro_requested(dev)) veth_napi_del(dev); - /* if user-space did not require GRO, since adding XDP - * enabled it, clear it now - */ - if (!veth_gro_requested(dev) && netif_running(dev)) { - dev->features &= ~NETIF_F_GRO; - netdev_features_change(dev); - } - } - veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, false); } @@ -1376,7 +1359,8 @@ static int veth_alloc_queues(struct net_device *dev) struct veth_priv *priv = netdev_priv(dev); int i; - priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL_ACCOUNT); + priv->rq = kvcalloc(dev->num_rx_queues, sizeof(*priv->rq), + GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); if (!priv->rq) return -ENOMEM; @@ -1392,7 +1376,7 @@ static void veth_free_queues(struct net_device *dev) { struct veth_priv *priv = netdev_priv(dev); - kfree(priv->rq); + kvfree(priv->rq); } static int veth_dev_init(struct net_device *dev) @@ -1558,6 +1542,14 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, } if (!old_prog) { + if (!veth_gro_requested(dev)) { + /* user-space did not require GRO, but adding + * XDP is supposed to get GRO working + */ + dev->features |= NETIF_F_GRO; + netdev_features_change(dev); + } + peer->hw_features &= ~NETIF_F_GSO_SOFTWARE; peer->max_mtu = max_mtu; } @@ -1568,6 +1560,14 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, if (dev->flags & IFF_UP) veth_disable_xdp(dev); + /* if user-space did not require GRO, since adding XDP + * enabled it, clear it now + */ + if (!veth_gro_requested(dev)) { + dev->features &= ~NETIF_F_GRO; + netdev_features_change(dev); + } + if (peer) { peer->hw_features |= NETIF_F_GSO_SOFTWARE; peer->max_mtu = ETH_MAX_MTU; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 21d3461fb..45f1a871b 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3474,10 +3474,11 @@ static int virtnet_find_vqs(struct virtnet_info *vi) { vq_callback_t **callbacks; struct virtqueue **vqs; - int ret = -ENOMEM; - int i, total_vqs; const char **names; + int ret = -ENOMEM; + int total_vqs; bool *ctx; + u16 i; /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by @@ -3514,8 +3515,8 @@ static int virtnet_find_vqs(struct virtnet_info *vi) for (i = 0; i < vi->max_queue_pairs; i++) { callbacks[rxq2vq(i)] = skb_recv_done; callbacks[txq2vq(i)] = skb_xmit_done; - sprintf(vi->rq[i].name, "input.%d", i); - sprintf(vi->sq[i].name, "output.%d", i); + sprintf(vi->rq[i].name, "input.%u", i); + sprintf(vi->sq[i].name, "output.%u", i); names[rxq2vq(i)] = vi->rq[i].name; names[txq2vq(i)] = vi->sq[i].name; if (ctx) diff --git a/drivers/net/wireless/ath/ath11k/pcic.c b/drivers/net/wireless/ath/ath11k/pcic.c index 380f9d37b..e3b65efcc 100644 --- a/drivers/net/wireless/ath/ath11k/pcic.c +++ b/drivers/net/wireless/ath/ath11k/pcic.c @@ -453,8 +453,6 @@ void ath11k_pcic_ext_irq_enable(struct ath11k_base *ab) { int i; - set_bit(ATH11K_FLAG_EXT_IRQ_ENABLED, &ab->dev_flags); - for (i = 0; i < ATH11K_EXT_IRQ_GRP_NUM_MAX; i++) { struct ath11k_ext_irq_grp *irq_grp = &ab->ext_irq_grp[i]; @@ -465,6 +463,8 @@ void ath11k_pcic_ext_irq_enable(struct ath11k_base *ab) } ath11k_pcic_ext_grp_enable(irq_grp); } + + set_bit(ATH11K_FLAG_EXT_IRQ_ENABLED, &ab->dev_flags); } EXPORT_SYMBOL(ath11k_pcic_ext_irq_enable); diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c index 672789e3c..d6a3f001d 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c @@ -652,9 +652,10 @@ void ath9k_htc_txstatus(struct ath9k_htc_priv *priv, void *wmi_event) struct ath9k_htc_tx_event *tx_pend; int i; - for (i = 0; i < txs->cnt; i++) { - WARN_ON(txs->cnt > HTC_MAX_TX_STATUS); + if (WARN_ON_ONCE(txs->cnt > HTC_MAX_TX_STATUS)) + return; + for (i = 0; i < txs->cnt; i++) { __txs = &txs->txstatus[i]; skb = ath9k_htc_tx_get_packet(priv, __txs); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index 585e8cd2d..f5fcc547d 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -576,7 +576,7 @@ int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 2) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } @@ -592,7 +592,7 @@ int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 1) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } @@ -608,7 +608,7 @@ int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 0) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } @@ -665,7 +665,7 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 2) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } @@ -681,7 +681,7 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 1) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } @@ -697,7 +697,7 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 0) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } @@ -1044,6 +1044,9 @@ int iwl_acpi_get_ppag_table(struct iwl_fw_runtime *fwrt) goto read_table; } + ret = PTR_ERR(wifi_pkg); + goto out_free; + read_table: fwrt->ppag_ver = tbl_rev; flags = &wifi_pkg->package.elements[1]; diff --git a/drivers/net/wireless/marvell/mwifiex/Kconfig b/drivers/net/wireless/marvell/mwifiex/Kconfig index 2b4ff2b78..b182f7155 100644 --- a/drivers/net/wireless/marvell/mwifiex/Kconfig +++ b/drivers/net/wireless/marvell/mwifiex/Kconfig @@ -10,13 +10,14 @@ config MWIFIEX mwifiex. config MWIFIEX_SDIO - tristate "Marvell WiFi-Ex Driver for SD8786/SD8787/SD8797/SD8887/SD8897/SD8977/SD8987/SD8997" + tristate "Marvell WiFi-Ex Driver for SD8786/SD8787/SD8797/SD8887/SD8897/SD8977/SD8978/SD8987/SD8997" depends on MWIFIEX && MMC select FW_LOADER select WANT_DEV_COREDUMP help This adds support for wireless adapters based on Marvell - 8786/8787/8797/8887/8897/8977/8987/8997 chipsets with SDIO interface. + 8786/8787/8797/8887/8897/8977/8978/8987/8997 chipsets with + SDIO interface. SD8978 is also known as NXP IW416. If you choose to build it as a module, it will be called mwifiex_sdio. diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c index ea1c1c241..2c9b70e9a 100644 --- a/drivers/net/wireless/marvell/mwifiex/sdio.c +++ b/drivers/net/wireless/marvell/mwifiex/sdio.c @@ -263,7 +263,7 @@ static const struct mwifiex_sdio_card_reg mwifiex_reg_sd8887 = { 0x68, 0x69, 0x6a}, }; -static const struct mwifiex_sdio_card_reg mwifiex_reg_sd8987 = { +static const struct mwifiex_sdio_card_reg mwifiex_reg_sd89xx = { .start_rd_port = 0, .start_wr_port = 0, .base_0_reg = 0xF8, @@ -331,6 +331,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8786 = { .can_dump_fw = false, .can_auto_tdls = false, .can_ext_scan = false, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8787 = { @@ -346,6 +347,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8787 = { .can_dump_fw = false, .can_auto_tdls = false, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8797 = { @@ -361,6 +363,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8797 = { .can_dump_fw = false, .can_auto_tdls = false, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8897 = { @@ -376,6 +379,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8897 = { .can_dump_fw = true, .can_auto_tdls = false, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8977 = { @@ -392,6 +396,24 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8977 = { .fw_dump_enh = true, .can_auto_tdls = false, .can_ext_scan = true, + .fw_ready_extra_delay = false, +}; + +static const struct mwifiex_sdio_device mwifiex_sdio_sd8978 = { + .firmware_sdiouart = SD8978_SDIOUART_FW_NAME, + .reg = &mwifiex_reg_sd89xx, + .max_ports = 32, + .mp_agg_pkt_limit = 16, + .tx_buf_size = MWIFIEX_TX_DATA_BUF_SIZE_4K, + .mp_tx_agg_buf_size = MWIFIEX_MP_AGGR_BUF_SIZE_MAX, + .mp_rx_agg_buf_size = MWIFIEX_MP_AGGR_BUF_SIZE_MAX, + .supports_sdio_new_mode = true, + .has_control_mask = false, + .can_dump_fw = true, + .fw_dump_enh = true, + .can_auto_tdls = false, + .can_ext_scan = true, + .fw_ready_extra_delay = true, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8997 = { @@ -409,6 +431,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8997 = { .fw_dump_enh = true, .can_auto_tdls = false, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8887 = { @@ -424,11 +447,12 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8887 = { .can_dump_fw = false, .can_auto_tdls = true, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8987 = { .firmware = SD8987_DEFAULT_FW_NAME, - .reg = &mwifiex_reg_sd8987, + .reg = &mwifiex_reg_sd89xx, .max_ports = 32, .mp_agg_pkt_limit = 16, .tx_buf_size = MWIFIEX_TX_DATA_BUF_SIZE_2K, @@ -440,6 +464,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8987 = { .fw_dump_enh = true, .can_auto_tdls = true, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static const struct mwifiex_sdio_device mwifiex_sdio_sd8801 = { @@ -455,6 +480,7 @@ static const struct mwifiex_sdio_device mwifiex_sdio_sd8801 = { .can_dump_fw = false, .can_auto_tdls = false, .can_ext_scan = true, + .fw_ready_extra_delay = false, }; static struct memory_type_mapping generic_mem_type_map[] = { @@ -482,7 +508,9 @@ static struct memory_type_mapping mem_type_mapping_tbl[] = { static const struct of_device_id mwifiex_sdio_of_match_table[] __maybe_unused = { { .compatible = "marvell,sd8787" }, { .compatible = "marvell,sd8897" }, + { .compatible = "marvell,sd8978" }, { .compatible = "marvell,sd8997" }, + { .compatible = "nxp,iw416" }, { } }; @@ -545,6 +573,7 @@ mwifiex_sdio_probe(struct sdio_func *func, const struct sdio_device_id *id) card->fw_dump_enh = data->fw_dump_enh; card->can_auto_tdls = data->can_auto_tdls; card->can_ext_scan = data->can_ext_scan; + card->fw_ready_extra_delay = data->fw_ready_extra_delay; INIT_WORK(&card->work, mwifiex_sdio_work); } @@ -748,8 +777,9 @@ mwifiex_sdio_read_fw_status(struct mwifiex_adapter *adapter, u16 *dat) static int mwifiex_check_fw_status(struct mwifiex_adapter *adapter, u32 poll_num) { + struct sdio_mmc_card *card = adapter->card; int ret = 0; - u16 firmware_stat; + u16 firmware_stat = 0; u32 tries; for (tries = 0; tries < poll_num; tries++) { @@ -765,6 +795,13 @@ static int mwifiex_check_fw_status(struct mwifiex_adapter *adapter, ret = -1; } + if (card->fw_ready_extra_delay && + firmware_stat == FIRMWARE_READY_SDIO) + /* firmware might pretend to be ready, when it's not. + * Wait a little bit more as a workaround. + */ + msleep(100); + return ret; } @@ -920,6 +957,8 @@ static const struct sdio_device_id mwifiex_ids[] = { .driver_data = (unsigned long)&mwifiex_sdio_sd8801}, {SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8977_WLAN), .driver_data = (unsigned long)&mwifiex_sdio_sd8977}, + {SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8978_WLAN), + .driver_data = (unsigned long)&mwifiex_sdio_sd8978}, {SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8987_WLAN), .driver_data = (unsigned long)&mwifiex_sdio_sd8987}, {SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8997_WLAN), @@ -3164,6 +3203,7 @@ MODULE_FIRMWARE(SD8797_DEFAULT_FW_NAME); MODULE_FIRMWARE(SD8897_DEFAULT_FW_NAME); MODULE_FIRMWARE(SD8887_DEFAULT_FW_NAME); MODULE_FIRMWARE(SD8977_DEFAULT_FW_NAME); +MODULE_FIRMWARE(SD8978_SDIOUART_FW_NAME); MODULE_FIRMWARE(SD8987_DEFAULT_FW_NAME); MODULE_FIRMWARE(SD8997_DEFAULT_FW_NAME); MODULE_FIRMWARE(SD8997_SDIOUART_FW_NAME); diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.h b/drivers/net/wireless/marvell/mwifiex/sdio.h index 3a24bb48b..a5112cb35 100644 --- a/drivers/net/wireless/marvell/mwifiex/sdio.h +++ b/drivers/net/wireless/marvell/mwifiex/sdio.h @@ -25,6 +25,7 @@ #define SD8887_DEFAULT_FW_NAME "mrvl/sd8887_uapsta.bin" #define SD8801_DEFAULT_FW_NAME "mrvl/sd8801_uapsta.bin" #define SD8977_DEFAULT_FW_NAME "mrvl/sdsd8977_combo_v2.bin" +#define SD8978_SDIOUART_FW_NAME "mrvl/sdiouartiw416_combo_v0.bin" #define SD8987_DEFAULT_FW_NAME "mrvl/sd8987_uapsta.bin" #define SD8997_DEFAULT_FW_NAME "mrvl/sdsd8997_combo_v4.bin" #define SD8997_SDIOUART_FW_NAME "mrvl/sdiouart8997_combo_v4.bin" @@ -257,6 +258,7 @@ struct sdio_mmc_card { bool fw_dump_enh; bool can_auto_tdls; bool can_ext_scan; + bool fw_ready_extra_delay; struct mwifiex_sdio_mpa_tx mpa_tx; struct mwifiex_sdio_mpa_rx mpa_rx; @@ -280,6 +282,7 @@ struct mwifiex_sdio_device { bool fw_dump_enh; bool can_auto_tdls; bool can_ext_scan; + bool fw_ready_extra_delay; }; /* diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c index 12b700c7b..517d9023a 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c @@ -8672,7 +8672,7 @@ static void rt2800_rxdcoc_calibration(struct rt2x00_dev *rt2x00dev) rt2800_rfcsr_write_bank(rt2x00dev, 5, 4, saverfb5r4); rt2800_rfcsr_write_bank(rt2x00dev, 7, 4, saverfb7r4); - rt2800_bbp_write(rt2x00dev, 158, 141); + rt2800_bbp_write(rt2x00dev, 158, 140); bbpreg = rt2800_bbp_read(rt2x00dev, 159); bbpreg = bbpreg & (~0x40); rt2800_bbp_write(rt2x00dev, 159, bbpreg); diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c b/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c index 9a9cfd0ce..00b945053 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c @@ -101,6 +101,7 @@ void rt2x00lib_disable_radio(struct rt2x00_dev *rt2x00dev) rt2x00link_stop_tuner(rt2x00dev); rt2x00queue_stop_queues(rt2x00dev); rt2x00queue_flush_queues(rt2x00dev, true); + rt2x00queue_stop_queue(rt2x00dev->bcn); /* * Disable radio. @@ -1286,6 +1287,7 @@ int rt2x00lib_start(struct rt2x00_dev *rt2x00dev) rt2x00dev->intf_ap_count = 0; rt2x00dev->intf_sta_count = 0; rt2x00dev->intf_associated = 0; + rt2x00dev->intf_beaconing = 0; /* Enable the radio */ retval = rt2x00lib_enable_radio(rt2x00dev); @@ -1312,6 +1314,7 @@ void rt2x00lib_stop(struct rt2x00_dev *rt2x00dev) rt2x00dev->intf_ap_count = 0; rt2x00dev->intf_sta_count = 0; rt2x00dev->intf_associated = 0; + rt2x00dev->intf_beaconing = 0; } static inline void rt2x00lib_set_if_combinations(struct rt2x00_dev *rt2x00dev) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c b/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c index 4202c6517..75fda72c1 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c @@ -598,6 +598,17 @@ void rt2x00mac_bss_info_changed(struct ieee80211_hw *hw, */ if (changes & BSS_CHANGED_BEACON_ENABLED) { mutex_lock(&intf->beacon_skb_mutex); + + /* + * Clear the 'enable_beacon' flag and clear beacon because + * the beacon queue has been stopped after hardware reset. + */ + if (test_bit(DEVICE_STATE_RESET, &rt2x00dev->flags) && + intf->enable_beacon) { + intf->enable_beacon = false; + rt2x00queue_clear_beacon(rt2x00dev, vif); + } + if (!bss_conf->enable_beacon && intf->enable_beacon) { rt2x00dev->intf_beaconing--; intf->enable_beacon = false; diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 3a9fa3ff3..6dd5ec1e4 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -7030,6 +7030,18 @@ static const struct usb_device_id dev_table[] = { .driver_info = (unsigned long)&rtl8192eu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_REALTEK, 0x818c, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192eu_fops}, +/* D-Link DWA-131 rev C1 */ +{USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x3312, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192eu_fops}, +/* TP-Link TL-WN8200ND V2 */ +{USB_DEVICE_AND_INTERFACE_INFO(0x2357, 0x0126, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192eu_fops}, +/* Mercusys MW300UM */ +{USB_DEVICE_AND_INTERFACE_INFO(0x2c4e, 0x0100, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192eu_fops}, +/* Mercusys MW300UH */ +{USB_DEVICE_AND_INTERFACE_INFO(0x2c4e, 0x0104, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192eu_fops}, #endif { } }; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/phy.c index fe9b407dc..71e29b103 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/phy.c @@ -49,7 +49,7 @@ u32 rtl8723e_phy_query_rf_reg(struct ieee80211_hw *hw, rfpath, regaddr); } - bitshift = rtl8723_phy_calculate_bit_shift(bitmask); + bitshift = calculate_bit_shift(bitmask); readback_value = (original_value & bitmask) >> bitshift; spin_unlock(&rtlpriv->locks.rf_lock); @@ -80,7 +80,7 @@ void rtl8723e_phy_set_rf_reg(struct ieee80211_hw *hw, original_value = rtl8723_phy_rf_serial_read(hw, rfpath, regaddr); - bitshift = rtl8723_phy_calculate_bit_shift(bitmask); + bitshift = calculate_bit_shift(bitmask); data = ((original_value & (~bitmask)) | (data << bitshift)); @@ -89,7 +89,7 @@ void rtl8723e_phy_set_rf_reg(struct ieee80211_hw *hw, rtl8723_phy_rf_serial_write(hw, rfpath, regaddr, data); } else { if (bitmask != RFREG_OFFSET_MASK) { - bitshift = rtl8723_phy_calculate_bit_shift(bitmask); + bitshift = calculate_bit_shift(bitmask); data = ((original_value & (~bitmask)) | (data << bitshift)); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c index 2b9313cb9..094cb3615 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c @@ -41,7 +41,7 @@ u32 rtl8723be_phy_query_rf_reg(struct ieee80211_hw *hw, enum radio_path rfpath, spin_lock(&rtlpriv->locks.rf_lock); original_value = rtl8723_phy_rf_serial_read(hw, rfpath, regaddr); - bitshift = rtl8723_phy_calculate_bit_shift(bitmask); + bitshift = calculate_bit_shift(bitmask); readback_value = (original_value & bitmask) >> bitshift; spin_unlock(&rtlpriv->locks.rf_lock); @@ -68,7 +68,7 @@ void rtl8723be_phy_set_rf_reg(struct ieee80211_hw *hw, enum radio_path path, if (bitmask != RFREG_OFFSET_MASK) { original_value = rtl8723_phy_rf_serial_read(hw, path, regaddr); - bitshift = rtl8723_phy_calculate_bit_shift(bitmask); + bitshift = calculate_bit_shift(bitmask); data = ((original_value & (~bitmask)) | (data << bitshift)); } diff --git a/drivers/net/wireless/silabs/wfx/sta.c b/drivers/net/wireless/silabs/wfx/sta.c index 626dfb4b7..073e870b2 100644 --- a/drivers/net/wireless/silabs/wfx/sta.c +++ b/drivers/net/wireless/silabs/wfx/sta.c @@ -354,29 +354,38 @@ static int wfx_upload_ap_templates(struct wfx_vif *wvif) return 0; } -static void wfx_set_mfp_ap(struct wfx_vif *wvif) +static int wfx_set_mfp_ap(struct wfx_vif *wvif) { struct ieee80211_vif *vif = wvif_to_vif(wvif); struct sk_buff *skb = ieee80211_beacon_get(wvif->wdev->hw, vif, 0); const int ieoffset = offsetof(struct ieee80211_mgmt, u.beacon.variable); - const u16 *ptr = (u16 *)cfg80211_find_ie(WLAN_EID_RSN, skb->data + ieoffset, - skb->len - ieoffset); const int pairwise_cipher_suite_count_offset = 8 / sizeof(u16); const int pairwise_cipher_suite_size = 4 / sizeof(u16); const int akm_suite_size = 4 / sizeof(u16); + const u16 *ptr; - if (ptr) { - ptr += pairwise_cipher_suite_count_offset; - if (WARN_ON(ptr > (u16 *)skb_tail_pointer(skb))) - return; - ptr += 1 + pairwise_cipher_suite_size * *ptr; - if (WARN_ON(ptr > (u16 *)skb_tail_pointer(skb))) - return; - ptr += 1 + akm_suite_size * *ptr; - if (WARN_ON(ptr > (u16 *)skb_tail_pointer(skb))) - return; - wfx_hif_set_mfp(wvif, *ptr & BIT(7), *ptr & BIT(6)); - } + if (unlikely(!skb)) + return -ENOMEM; + + ptr = (u16 *)cfg80211_find_ie(WLAN_EID_RSN, skb->data + ieoffset, + skb->len - ieoffset); + if (unlikely(!ptr)) + return -EINVAL; + + ptr += pairwise_cipher_suite_count_offset; + if (WARN_ON(ptr > (u16 *)skb_tail_pointer(skb))) + return -EINVAL; + + ptr += 1 + pairwise_cipher_suite_size * *ptr; + if (WARN_ON(ptr > (u16 *)skb_tail_pointer(skb))) + return -EINVAL; + + ptr += 1 + akm_suite_size * *ptr; + if (WARN_ON(ptr > (u16 *)skb_tail_pointer(skb))) + return -EINVAL; + + wfx_hif_set_mfp(wvif, *ptr & BIT(7), *ptr & BIT(6)); + return 0; } int wfx_start_ap(struct ieee80211_hw *hw, struct ieee80211_vif *vif, @@ -394,8 +403,7 @@ int wfx_start_ap(struct ieee80211_hw *hw, struct ieee80211_vif *vif, ret = wfx_hif_start(wvif, &vif->bss_conf, wvif->channel); if (ret > 0) return -EIO; - wfx_set_mfp_ap(wvif); - return ret; + return wfx_set_mfp_ap(wvif); } void wfx_stop_ap(struct ieee80211_hw *hw, struct ieee80211_vif *vif, diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 271604098..0d51c900c 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -104,13 +104,12 @@ bool provides_xdp_headroom = true; module_param(provides_xdp_headroom, bool, 0644); static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, - u8 status); + s8 status); static void make_tx_response(struct xenvif_queue *queue, - struct xen_netif_tx_request *txp, + const struct xen_netif_tx_request *txp, unsigned int extra_count, - s8 st); -static void push_tx_responses(struct xenvif_queue *queue); + s8 status); static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx); @@ -208,13 +207,9 @@ static void xenvif_tx_err(struct xenvif_queue *queue, unsigned int extra_count, RING_IDX end) { RING_IDX cons = queue->tx.req_cons; - unsigned long flags; do { - spin_lock_irqsave(&queue->response_lock, flags); make_tx_response(queue, txp, extra_count, XEN_NETIF_RSP_ERROR); - push_tx_responses(queue); - spin_unlock_irqrestore(&queue->response_lock, flags); if (cons == end) break; RING_COPY_REQUEST(&queue->tx, cons++, txp); @@ -465,12 +460,7 @@ static void xenvif_get_requests(struct xenvif_queue *queue, for (shinfo->nr_frags = 0; nr_slots > 0 && shinfo->nr_frags < MAX_SKB_FRAGS; nr_slots--) { if (unlikely(!txp->size)) { - unsigned long flags; - - spin_lock_irqsave(&queue->response_lock, flags); make_tx_response(queue, txp, 0, XEN_NETIF_RSP_OKAY); - push_tx_responses(queue); - spin_unlock_irqrestore(&queue->response_lock, flags); ++txp; continue; } @@ -496,14 +486,8 @@ static void xenvif_get_requests(struct xenvif_queue *queue, for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; ++txp) { if (unlikely(!txp->size)) { - unsigned long flags; - - spin_lock_irqsave(&queue->response_lock, flags); make_tx_response(queue, txp, 0, XEN_NETIF_RSP_OKAY); - push_tx_responses(queue); - spin_unlock_irqrestore(&queue->response_lock, - flags); continue; } @@ -997,7 +981,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, (ret == 0) ? XEN_NETIF_RSP_OKAY : XEN_NETIF_RSP_ERROR); - push_tx_responses(queue); continue; } @@ -1009,7 +992,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, make_tx_response(queue, &txreq, extra_count, XEN_NETIF_RSP_OKAY); - push_tx_responses(queue); continue; } @@ -1444,8 +1426,35 @@ int xenvif_tx_action(struct xenvif_queue *queue, int budget) return work_done; } +static void _make_tx_response(struct xenvif_queue *queue, + const struct xen_netif_tx_request *txp, + unsigned int extra_count, + s8 status) +{ + RING_IDX i = queue->tx.rsp_prod_pvt; + struct xen_netif_tx_response *resp; + + resp = RING_GET_RESPONSE(&queue->tx, i); + resp->id = txp->id; + resp->status = status; + + while (extra_count-- != 0) + RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL; + + queue->tx.rsp_prod_pvt = ++i; +} + +static void push_tx_responses(struct xenvif_queue *queue) +{ + int notify; + + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify); + if (notify) + notify_remote_via_irq(queue->tx_irq); +} + static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, - u8 status) + s8 status) { struct pending_tx_info *pending_tx_info; pending_ring_idx_t index; @@ -1455,8 +1464,8 @@ static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, spin_lock_irqsave(&queue->response_lock, flags); - make_tx_response(queue, &pending_tx_info->req, - pending_tx_info->extra_count, status); + _make_tx_response(queue, &pending_tx_info->req, + pending_tx_info->extra_count, status); /* Release the pending index before pusing the Tx response so * its available before a new Tx request is pushed by the @@ -1470,32 +1479,19 @@ static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, spin_unlock_irqrestore(&queue->response_lock, flags); } - static void make_tx_response(struct xenvif_queue *queue, - struct xen_netif_tx_request *txp, + const struct xen_netif_tx_request *txp, unsigned int extra_count, - s8 st) + s8 status) { - RING_IDX i = queue->tx.rsp_prod_pvt; - struct xen_netif_tx_response *resp; - - resp = RING_GET_RESPONSE(&queue->tx, i); - resp->id = txp->id; - resp->status = st; - - while (extra_count-- != 0) - RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL; + unsigned long flags; - queue->tx.rsp_prod_pvt = ++i; -} + spin_lock_irqsave(&queue->response_lock, flags); -static void push_tx_responses(struct xenvif_queue *queue) -{ - int notify; + _make_tx_response(queue, txp, extra_count, status); + push_tx_responses(queue); - RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify); - if (notify) - notify_remote_via_irq(queue->tx_irq); + spin_unlock_irqrestore(&queue->response_lock, flags); } static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 177a365b8..3dbf926fd 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -221,11 +221,6 @@ static LIST_HEAD(nvme_fc_lport_list); static DEFINE_IDA(nvme_fc_local_port_cnt); static DEFINE_IDA(nvme_fc_ctrl_cnt); -static struct workqueue_struct *nvme_fc_wq; - -static bool nvme_fc_waiting_to_unload; -static DECLARE_COMPLETION(nvme_fc_unload_proceed); - /* * These items are short-term. They will eventually be moved into * a generic FC class. See comments in module init. @@ -255,8 +250,6 @@ nvme_fc_free_lport(struct kref *ref) /* remove from transport list */ spin_lock_irqsave(&nvme_fc_lock, flags); list_del(&lport->port_list); - if (nvme_fc_waiting_to_unload && list_empty(&nvme_fc_lport_list)) - complete(&nvme_fc_unload_proceed); spin_unlock_irqrestore(&nvme_fc_lock, flags); ida_free(&nvme_fc_local_port_cnt, lport->localport.port_num); @@ -3869,10 +3862,6 @@ static int __init nvme_fc_init_module(void) { int ret; - nvme_fc_wq = alloc_workqueue("nvme_fc_wq", WQ_MEM_RECLAIM, 0); - if (!nvme_fc_wq) - return -ENOMEM; - /* * NOTE: * It is expected that in the future the kernel will combine @@ -3890,7 +3879,7 @@ static int __init nvme_fc_init_module(void) ret = class_register(&fc_class); if (ret) { pr_err("couldn't register class fc\n"); - goto out_destroy_wq; + return ret; } /* @@ -3914,8 +3903,6 @@ out_destroy_device: device_destroy(&fc_class, MKDEV(0, 0)); out_destroy_class: class_unregister(&fc_class); -out_destroy_wq: - destroy_workqueue(nvme_fc_wq); return ret; } @@ -3935,45 +3922,23 @@ nvme_fc_delete_controllers(struct nvme_fc_rport *rport) spin_unlock(&rport->lock); } -static void -nvme_fc_cleanup_for_unload(void) +static void __exit nvme_fc_exit_module(void) { struct nvme_fc_lport *lport; struct nvme_fc_rport *rport; - - list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { - list_for_each_entry(rport, &lport->endp_list, endp_list) { - nvme_fc_delete_controllers(rport); - } - } -} - -static void __exit nvme_fc_exit_module(void) -{ unsigned long flags; - bool need_cleanup = false; spin_lock_irqsave(&nvme_fc_lock, flags); - nvme_fc_waiting_to_unload = true; - if (!list_empty(&nvme_fc_lport_list)) { - need_cleanup = true; - nvme_fc_cleanup_for_unload(); - } + list_for_each_entry(lport, &nvme_fc_lport_list, port_list) + list_for_each_entry(rport, &lport->endp_list, endp_list) + nvme_fc_delete_controllers(rport); spin_unlock_irqrestore(&nvme_fc_lock, flags); - if (need_cleanup) { - pr_info("%s: waiting for ctlr deletes\n", __func__); - wait_for_completion(&nvme_fc_unload_proceed); - pr_info("%s: ctrl deletes complete\n", __func__); - } + flush_workqueue(nvme_delete_wq); nvmf_unregister_transport(&nvme_fc_transport); - ida_destroy(&nvme_fc_local_port_cnt); - ida_destroy(&nvme_fc_ctrl_cnt); - device_destroy(&fc_class, MKDEV(0, 0)); class_unregister(&fc_class); - destroy_workqueue(nvme_fc_wq); } module_init(nvme_fc_init_module); diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 1ab6601fd..8a02ed63b 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -111,6 +111,8 @@ struct nvmet_fc_tgtport { struct nvmet_fc_port_entry *pe; struct kref ref; u32 max_sg_cnt; + + struct work_struct put_work; }; struct nvmet_fc_port_entry { @@ -165,7 +167,7 @@ struct nvmet_fc_tgt_assoc { struct nvmet_fc_hostport *hostport; struct nvmet_fc_ls_iod *rcv_disconn; struct list_head a_list; - struct nvmet_fc_tgt_queue __rcu *queues[NVMET_NR_QUEUES + 1]; + struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES + 1]; struct kref ref; struct work_struct del_work; struct rcu_head rcu; @@ -248,6 +250,13 @@ static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc); static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue); static int nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue); static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport); +static void nvmet_fc_put_tgtport_work(struct work_struct *work) +{ + struct nvmet_fc_tgtport *tgtport = + container_of(work, struct nvmet_fc_tgtport, put_work); + + nvmet_fc_tgtport_put(tgtport); +} static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport); static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_fcp_iod *fod); @@ -359,7 +368,7 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) if (!lsop->req_queued) { spin_unlock_irqrestore(&tgtport->lock, flags); - return; + goto out_putwork; } list_del(&lsop->lsreq_list); @@ -372,7 +381,8 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) (lsreq->rqstlen + lsreq->rsplen), DMA_BIDIRECTIONAL); - nvmet_fc_tgtport_put(tgtport); +out_putwork: + queue_work(nvmet_wq, &tgtport->put_work); } static int @@ -801,14 +811,11 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, if (!queue) return NULL; - if (!nvmet_fc_tgt_a_get(assoc)) - goto out_free_queue; - queue->work_q = alloc_workqueue("ntfc%d.%d.%d", 0, 0, assoc->tgtport->fc_target_port.port_num, assoc->a_id, qid); if (!queue->work_q) - goto out_a_put; + goto out_free_queue; queue->qid = qid; queue->sqsize = sqsize; @@ -830,15 +837,13 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, goto out_fail_iodlist; WARN_ON(assoc->queues[qid]); - rcu_assign_pointer(assoc->queues[qid], queue); + assoc->queues[qid] = queue; return queue; out_fail_iodlist: nvmet_fc_destroy_fcp_iodlist(assoc->tgtport, queue); destroy_workqueue(queue->work_q); -out_a_put: - nvmet_fc_tgt_a_put(assoc); out_free_queue: kfree(queue); return NULL; @@ -851,12 +856,8 @@ nvmet_fc_tgt_queue_free(struct kref *ref) struct nvmet_fc_tgt_queue *queue = container_of(ref, struct nvmet_fc_tgt_queue, ref); - rcu_assign_pointer(queue->assoc->queues[queue->qid], NULL); - nvmet_fc_destroy_fcp_iodlist(queue->assoc->tgtport, queue); - nvmet_fc_tgt_a_put(queue->assoc); - destroy_workqueue(queue->work_q); kfree_rcu(queue, rcu); @@ -968,7 +969,7 @@ nvmet_fc_find_target_queue(struct nvmet_fc_tgtport *tgtport, rcu_read_lock(); list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { if (association_id == assoc->association_id) { - queue = rcu_dereference(assoc->queues[qid]); + queue = assoc->queues[qid]; if (queue && (!atomic_read(&queue->connected) || !nvmet_fc_tgt_q_get(queue))) @@ -1077,8 +1078,6 @@ nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) /* new allocation not needed */ kfree(newhost); newhost = match; - /* no new allocation - release reference */ - nvmet_fc_tgtport_put(tgtport); } else { newhost->tgtport = tgtport; newhost->hosthandle = hosthandle; @@ -1093,13 +1092,28 @@ nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) } static void -nvmet_fc_delete_assoc(struct work_struct *work) +nvmet_fc_delete_assoc(struct nvmet_fc_tgt_assoc *assoc) +{ + nvmet_fc_delete_target_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); +} + +static void +nvmet_fc_delete_assoc_work(struct work_struct *work) { struct nvmet_fc_tgt_assoc *assoc = container_of(work, struct nvmet_fc_tgt_assoc, del_work); + struct nvmet_fc_tgtport *tgtport = assoc->tgtport; - nvmet_fc_delete_target_assoc(assoc); - nvmet_fc_tgt_a_put(assoc); + nvmet_fc_delete_assoc(assoc); + nvmet_fc_tgtport_put(tgtport); +} + +static void +nvmet_fc_schedule_delete_assoc(struct nvmet_fc_tgt_assoc *assoc) +{ + nvmet_fc_tgtport_get(assoc->tgtport); + queue_work(nvmet_wq, &assoc->del_work); } static struct nvmet_fc_tgt_assoc * @@ -1111,6 +1125,9 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) int idx; bool needrandom = true; + if (!tgtport->pe) + return NULL; + assoc = kzalloc(sizeof(*assoc), GFP_KERNEL); if (!assoc) return NULL; @@ -1130,7 +1147,7 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) assoc->a_id = idx; INIT_LIST_HEAD(&assoc->a_list); kref_init(&assoc->ref); - INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc); + INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc_work); atomic_set(&assoc->terminating, 0); while (needrandom) { @@ -1171,13 +1188,18 @@ nvmet_fc_target_assoc_free(struct kref *ref) struct nvmet_fc_tgtport *tgtport = assoc->tgtport; struct nvmet_fc_ls_iod *oldls; unsigned long flags; + int i; + + for (i = NVMET_NR_QUEUES; i >= 0; i--) { + if (assoc->queues[i]) + nvmet_fc_delete_target_queue(assoc->queues[i]); + } /* Send Disconnect now that all i/o has completed */ nvmet_fc_xmt_disconnect_assoc(assoc); nvmet_fc_free_hostport(assoc->hostport); spin_lock_irqsave(&tgtport->lock, flags); - list_del_rcu(&assoc->a_list); oldls = assoc->rcv_disconn; spin_unlock_irqrestore(&tgtport->lock, flags); /* if pending Rcv Disconnect Association LS, send rsp now */ @@ -1207,7 +1229,7 @@ static void nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) { struct nvmet_fc_tgtport *tgtport = assoc->tgtport; - struct nvmet_fc_tgt_queue *queue; + unsigned long flags; int i, terminating; terminating = atomic_xchg(&assoc->terminating, 1); @@ -1216,29 +1238,21 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) if (terminating) return; + spin_lock_irqsave(&tgtport->lock, flags); + list_del_rcu(&assoc->a_list); + spin_unlock_irqrestore(&tgtport->lock, flags); - for (i = NVMET_NR_QUEUES; i >= 0; i--) { - rcu_read_lock(); - queue = rcu_dereference(assoc->queues[i]); - if (!queue) { - rcu_read_unlock(); - continue; - } + synchronize_rcu(); - if (!nvmet_fc_tgt_q_get(queue)) { - rcu_read_unlock(); - continue; - } - rcu_read_unlock(); - nvmet_fc_delete_target_queue(queue); - nvmet_fc_tgt_q_put(queue); + /* ensure all in-flight I/Os have been processed */ + for (i = NVMET_NR_QUEUES; i >= 0; i--) { + if (assoc->queues[i]) + flush_workqueue(assoc->queues[i]->work_q); } dev_info(tgtport->dev, "{%d:%d} Association deleted\n", tgtport->fc_target_port.port_num, assoc->a_id); - - nvmet_fc_tgt_a_put(assoc); } static struct nvmet_fc_tgt_assoc * @@ -1414,6 +1428,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, kref_init(&newrec->ref); ida_init(&newrec->assoc_cnt); newrec->max_sg_cnt = template->max_sgl_segments; + INIT_WORK(&newrec->put_work, nvmet_fc_put_tgtport_work); ret = nvmet_fc_alloc_ls_iodlist(newrec); if (ret) { @@ -1491,9 +1506,8 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport) list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { if (!nvmet_fc_tgt_a_get(assoc)) continue; - if (!queue_work(nvmet_wq, &assoc->del_work)) - /* already deleting - release local reference */ - nvmet_fc_tgt_a_put(assoc); + nvmet_fc_schedule_delete_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); } rcu_read_unlock(); } @@ -1546,9 +1560,8 @@ nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port, continue; assoc->hostport->invalid = 1; noassoc = false; - if (!queue_work(nvmet_wq, &assoc->del_work)) - /* already deleting - release local reference */ - nvmet_fc_tgt_a_put(assoc); + nvmet_fc_schedule_delete_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); } spin_unlock_irqrestore(&tgtport->lock, flags); @@ -1580,7 +1593,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) rcu_read_lock(); list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { - queue = rcu_dereference(assoc->queues[0]); + queue = assoc->queues[0]; if (queue && queue->nvme_sq.ctrl == ctrl) { if (nvmet_fc_tgt_a_get(assoc)) found_ctrl = true; @@ -1592,9 +1605,8 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) nvmet_fc_tgtport_put(tgtport); if (found_ctrl) { - if (!queue_work(nvmet_wq, &assoc->del_work)) - /* already deleting - release local reference */ - nvmet_fc_tgt_a_put(assoc); + nvmet_fc_schedule_delete_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); return; } @@ -1624,6 +1636,8 @@ nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port) /* terminate any outstanding associations */ __nvmet_fc_free_assocs(tgtport); + flush_workqueue(nvmet_wq); + /* * should terminate LS's as well. However, LS's will be generated * at the tail end of association termination, so they likely don't @@ -1869,9 +1883,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, sizeof(struct fcnvme_ls_disconnect_assoc_acc)), FCNVME_LS_DISCONNECT_ASSOC); - /* release get taken in nvmet_fc_find_target_assoc */ - nvmet_fc_tgt_a_put(assoc); - /* * The rules for LS response says the response cannot * go back until ABTS's have been sent for all outstanding @@ -1886,8 +1897,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, assoc->rcv_disconn = iod; spin_unlock_irqrestore(&tgtport->lock, flags); - nvmet_fc_delete_target_assoc(assoc); - if (oldls) { dev_info(tgtport->dev, "{%d:%d} Multiple Disconnect Association LS's " @@ -1903,6 +1912,9 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, nvmet_fc_xmt_ls_rsp(tgtport, oldls); } + nvmet_fc_schedule_delete_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); + return false; } @@ -2539,8 +2551,9 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, fod->req.cmd = &fod->cmdiubuf.sqe; fod->req.cqe = &fod->rspiubuf.cqe; - if (tgtport->pe) - fod->req.port = tgtport->pe->port; + if (!tgtport->pe) + goto transport_error; + fod->req.port = tgtport->pe->port; /* clear any response payload */ memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf)); @@ -2901,6 +2914,9 @@ nvmet_fc_remove_port(struct nvmet_port *port) nvmet_fc_portentry_unbind(pe); + /* terminate any outstanding associations */ + __nvmet_fc_free_assocs(pe->tgtport); + kfree(pe); } @@ -2932,6 +2948,9 @@ static int __init nvmet_fc_init_module(void) static void __exit nvmet_fc_exit_module(void) { + /* ensure any shutdown operation, e.g. delete ctrls have finished */ + flush_workqueue(nvmet_wq); + /* sanity check - all lports should be removed */ if (!list_empty(&nvmet_fc_target_list)) pr_warn("%s: targetport list not empty\n", __func__); diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index c780af36c..f5b8442b6 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -358,7 +358,7 @@ fcloop_h2t_ls_req(struct nvme_fc_local_port *localport, if (!rport->targetport) { tls_req->status = -ECONNREFUSED; spin_lock(&rport->lock); - list_add_tail(&rport->ls_list, &tls_req->ls_list); + list_add_tail(&tls_req->ls_list, &rport->ls_list); spin_unlock(&rport->lock); queue_work(nvmet_wq, &rport->ls_work); return ret; @@ -391,7 +391,7 @@ fcloop_h2t_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, if (remoteport) { rport = remoteport->private; spin_lock(&rport->lock); - list_add_tail(&rport->ls_list, &tls_req->ls_list); + list_add_tail(&tls_req->ls_list, &rport->ls_list); spin_unlock(&rport->lock); queue_work(nvmet_wq, &rport->ls_work); } @@ -446,7 +446,7 @@ fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle, if (!tport->remoteport) { tls_req->status = -ECONNREFUSED; spin_lock(&tport->lock); - list_add_tail(&tport->ls_list, &tls_req->ls_list); + list_add_tail(&tls_req->ls_list, &tport->ls_list); spin_unlock(&tport->lock); queue_work(nvmet_wq, &tport->ls_work); return ret; diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index ce42afe8f..348076827 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1884,6 +1884,7 @@ static void __exit nvmet_tcp_exit(void) flush_workqueue(nvmet_wq); destroy_workqueue(nvmet_tcp_wq); + ida_destroy(&nvmet_tcp_queue_ida); } module_init(nvmet_tcp_init); diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c index 4402871b5..e663d5585 100644 --- a/drivers/of/overlay.c +++ b/drivers/of/overlay.c @@ -45,8 +45,8 @@ struct target { /** * struct fragment - info about fragment nodes in overlay expanded device tree - * @target: target of the overlay operation * @overlay: pointer to the __overlay__ node + * @target: target of the overlay operation */ struct fragment { struct device_node *overlay; diff --git a/drivers/of/property.c b/drivers/of/property.c index b636777e6..da5d71219 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -762,7 +762,9 @@ struct device_node *of_graph_get_port_parent(struct device_node *node) /* Walk 3 levels up only if there is 'ports' node. */ for (depth = 3; depth && node; depth--) { node = of_get_next_parent(node); - if (depth == 2 && !of_node_name_eq(node, "ports")) + if (depth == 2 && !of_node_name_eq(node, "ports") && + !of_node_name_eq(node, "in-ports") && + !of_node_name_eq(node, "out-ports")) break; } return node; @@ -1243,7 +1245,7 @@ DEFINE_SIMPLE_PROP(clocks, "clocks", "#clock-cells") DEFINE_SIMPLE_PROP(interconnects, "interconnects", "#interconnect-cells") DEFINE_SIMPLE_PROP(iommus, "iommus", "#iommu-cells") DEFINE_SIMPLE_PROP(mboxes, "mboxes", "#mbox-cells") -DEFINE_SIMPLE_PROP(io_channels, "io-channel", "#io-channel-cells") +DEFINE_SIMPLE_PROP(io_channels, "io-channels", "#io-channel-cells") DEFINE_SIMPLE_PROP(interrupt_parent, "interrupt-parent", NULL) DEFINE_SIMPLE_PROP(dmas, "dmas", "#dma-cells") DEFINE_SIMPLE_PROP(power_domains, "power-domains", "#power-domain-cells") @@ -1261,7 +1263,6 @@ DEFINE_SIMPLE_PROP(pinctrl5, "pinctrl-5", NULL) DEFINE_SIMPLE_PROP(pinctrl6, "pinctrl-6", NULL) DEFINE_SIMPLE_PROP(pinctrl7, "pinctrl-7", NULL) DEFINE_SIMPLE_PROP(pinctrl8, "pinctrl-8", NULL) -DEFINE_SIMPLE_PROP(remote_endpoint, "remote-endpoint", NULL) DEFINE_SIMPLE_PROP(pwms, "pwms", "#pwm-cells") DEFINE_SIMPLE_PROP(resets, "resets", "#reset-cells") DEFINE_SIMPLE_PROP(leds, "leds", NULL) @@ -1326,6 +1327,17 @@ static struct device_node *parse_interrupts(struct device_node *np, return of_irq_parse_one(np, index, &sup_args) ? NULL : sup_args.np; } +static struct device_node *parse_remote_endpoint(struct device_node *np, + const char *prop_name, + int index) +{ + /* Return NULL for index > 0 to signify end of remote-endpoints. */ + if (index > 0 || strcmp(prop_name, "remote-endpoint")) + return NULL; + + return of_graph_get_remote_port_parent(np); +} + static const struct supplier_bindings of_supplier_bindings[] = { { .parse_prop = parse_clocks, }, { .parse_prop = parse_interconnects, }, diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index e541a8960..ce1386074 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -49,6 +49,12 @@ static struct unittest_results { failed; \ }) +#ifdef CONFIG_OF_KOBJ +#define OF_KREF_READ(NODE) kref_read(&(NODE)->kobj.kref) +#else +#define OF_KREF_READ(NODE) 1 +#endif + /* * Expected message may have a message level other than KERN_INFO. * Print the expected message only if the current loglevel will allow @@ -562,7 +568,7 @@ static void __init of_unittest_parse_phandle_with_args_map(void) pr_err("missing testcase data\n"); return; } - prefs[i] = kref_read(&p[i]->kobj.kref); + prefs[i] = OF_KREF_READ(p[i]); } rc = of_count_phandle_with_args(np, "phandle-list", "#phandle-cells"); @@ -685,9 +691,9 @@ static void __init of_unittest_parse_phandle_with_args_map(void) unittest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc); for (i = 0; i < ARRAY_SIZE(p); ++i) { - unittest(prefs[i] == kref_read(&p[i]->kobj.kref), + unittest(prefs[i] == OF_KREF_READ(p[i]), "provider%d: expected:%d got:%d\n", - i, prefs[i], kref_read(&p[i]->kobj.kref)); + i, prefs[i], OF_KREF_READ(p[i])); of_node_put(p[i]); } } diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c b/drivers/pci/controller/dwc/pci-layerscape-ep.c index ad99707b3..dd7d74fec 100644 --- a/drivers/pci/controller/dwc/pci-layerscape-ep.c +++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c @@ -18,6 +18,20 @@ #include "pcie-designware.h" +#define PEX_PF0_CONFIG 0xC0014 +#define PEX_PF0_CFG_READY BIT(0) + +/* PEX PFa PCIE PME and message interrupt registers*/ +#define PEX_PF0_PME_MES_DR 0xC0020 +#define PEX_PF0_PME_MES_DR_LUD BIT(7) +#define PEX_PF0_PME_MES_DR_LDD BIT(9) +#define PEX_PF0_PME_MES_DR_HRD BIT(10) + +#define PEX_PF0_PME_MES_IER 0xC0028 +#define PEX_PF0_PME_MES_IER_LUDIE BIT(7) +#define PEX_PF0_PME_MES_IER_LDDIE BIT(9) +#define PEX_PF0_PME_MES_IER_HRDIE BIT(10) + #define to_ls_pcie_ep(x) dev_get_drvdata((x)->dev) struct ls_pcie_ep_drvdata { @@ -30,8 +44,99 @@ struct ls_pcie_ep { struct dw_pcie *pci; struct pci_epc_features *ls_epc; const struct ls_pcie_ep_drvdata *drvdata; + int irq; + u32 lnkcap; + bool big_endian; }; +static u32 ls_lut_readl(struct ls_pcie_ep *pcie, u32 offset) +{ + struct dw_pcie *pci = pcie->pci; + + if (pcie->big_endian) + return ioread32be(pci->dbi_base + offset); + else + return ioread32(pci->dbi_base + offset); +} + +static void ls_lut_writel(struct ls_pcie_ep *pcie, u32 offset, u32 value) +{ + struct dw_pcie *pci = pcie->pci; + + if (pcie->big_endian) + iowrite32be(value, pci->dbi_base + offset); + else + iowrite32(value, pci->dbi_base + offset); +} + +static irqreturn_t ls_pcie_ep_event_handler(int irq, void *dev_id) +{ + struct ls_pcie_ep *pcie = dev_id; + struct dw_pcie *pci = pcie->pci; + u32 val, cfg; + u8 offset; + + val = ls_lut_readl(pcie, PEX_PF0_PME_MES_DR); + ls_lut_writel(pcie, PEX_PF0_PME_MES_DR, val); + + if (!val) + return IRQ_NONE; + + if (val & PEX_PF0_PME_MES_DR_LUD) { + + offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP); + + /* + * The values of the Maximum Link Width and Supported Link + * Speed from the Link Capabilities Register will be lost + * during link down or hot reset. Restore initial value + * that configured by the Reset Configuration Word (RCW). + */ + dw_pcie_dbi_ro_wr_en(pci); + dw_pcie_writel_dbi(pci, offset + PCI_EXP_LNKCAP, pcie->lnkcap); + dw_pcie_dbi_ro_wr_dis(pci); + + cfg = ls_lut_readl(pcie, PEX_PF0_CONFIG); + cfg |= PEX_PF0_CFG_READY; + ls_lut_writel(pcie, PEX_PF0_CONFIG, cfg); + dw_pcie_ep_linkup(&pci->ep); + + dev_dbg(pci->dev, "Link up\n"); + } else if (val & PEX_PF0_PME_MES_DR_LDD) { + dev_dbg(pci->dev, "Link down\n"); + } else if (val & PEX_PF0_PME_MES_DR_HRD) { + dev_dbg(pci->dev, "Hot reset\n"); + } + + return IRQ_HANDLED; +} + +static int ls_pcie_ep_interrupt_init(struct ls_pcie_ep *pcie, + struct platform_device *pdev) +{ + u32 val; + int ret; + + pcie->irq = platform_get_irq_byname(pdev, "pme"); + if (pcie->irq < 0) + return pcie->irq; + + ret = devm_request_irq(&pdev->dev, pcie->irq, ls_pcie_ep_event_handler, + IRQF_SHARED, pdev->name, pcie); + if (ret) { + dev_err(&pdev->dev, "Can't register PCIe IRQ\n"); + return ret; + } + + /* Enable interrupts */ + val = ls_lut_readl(pcie, PEX_PF0_PME_MES_IER); + val |= PEX_PF0_PME_MES_IER_LDDIE | PEX_PF0_PME_MES_IER_HRDIE | + PEX_PF0_PME_MES_IER_LUDIE; + ls_lut_writel(pcie, PEX_PF0_PME_MES_IER, val); + + return 0; +} + static const struct pci_epc_features* ls_pcie_ep_get_features(struct dw_pcie_ep *ep) { @@ -124,6 +229,8 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev) struct ls_pcie_ep *pcie; struct pci_epc_features *ls_epc; struct resource *dbi_base; + u8 offset; + int ret; pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL); if (!pcie) @@ -143,6 +250,7 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev) pci->ops = pcie->drvdata->dw_pcie_ops; ls_epc->bar_fixed_64bit = (1 << BAR_2) | (1 << BAR_4); + ls_epc->linkup_notifier = true; pcie->pci = pci; pcie->ls_epc = ls_epc; @@ -154,9 +262,18 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev) pci->ep.ops = &ls_pcie_ep_ops; + pcie->big_endian = of_property_read_bool(dev->of_node, "big-endian"); + platform_set_drvdata(pdev, pcie); - return dw_pcie_ep_init(&pci->ep); + offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP); + pcie->lnkcap = dw_pcie_readl_dbi(pci, offset + PCI_EXP_LNKCAP); + + ret = dw_pcie_ep_init(&pci->ep); + if (ret) + return ret; + + return ls_pcie_ep_interrupt_init(pcie, pdev); } static struct platform_driver ls_pcie_ep_driver = { diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c index 59c164b5c..4086a7818 100644 --- a/drivers/pci/controller/dwc/pcie-designware-ep.c +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c @@ -6,6 +6,7 @@ * Author: Kishon Vijay Abraham I <kishon@ti.com> */ +#include <linux/align.h> #include <linux/of.h> #include <linux/platform_device.h> @@ -600,7 +601,7 @@ int dw_pcie_ep_raise_msix_irq(struct dw_pcie_ep *ep, u8 func_no, } aligned_offset = msg_addr & (epc->mem->window.page_size - 1); - msg_addr &= ~aligned_offset; + msg_addr = ALIGN_DOWN(msg_addr, epc->mem->window.page_size); ret = dw_pcie_ep_map_addr(epc, func_no, 0, ep->msi_mem_phys, msg_addr, epc->mem->window.page_size); if (ret) diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c index e9cf318e6..34877a1f4 100644 --- a/drivers/pci/msi/irqdomain.c +++ b/drivers/pci/msi/irqdomain.c @@ -60,7 +60,7 @@ static irq_hw_number_t pci_msi_domain_calc_hwirq(struct msi_desc *desc) return (irq_hw_number_t)desc->msi_index | pci_dev_id(dev) << 11 | - (pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 27; + ((irq_hw_number_t)(pci_domain_nr(dev->bus) & 0xFFFFFFFF)) << 27; } static inline bool pci_msi_desc_is_multi_msi(struct msi_desc *desc) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index ed6d75d13..e1d02b7c6 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -274,7 +274,7 @@ void pci_bus_put(struct pci_bus *bus); /* PCIe speed to Mb/s reduced by encoding overhead */ #define PCIE_SPEED2MBS_ENC(speed) \ - ((speed) == PCIE_SPEED_64_0GT ? 64000*128/130 : \ + ((speed) == PCIE_SPEED_64_0GT ? 64000*1/1 : \ (speed) == PCIE_SPEED_32_0GT ? 32000*128/130 : \ (speed) == PCIE_SPEED_16_0GT ? 16000*128/130 : \ (speed) == PCIE_SPEED_8_0GT ? 8000*128/130 : \ diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index e2d8a74f8..5426f450c 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -748,7 +748,7 @@ static void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info) u8 bus = info->id >> 8; u8 devfn = info->id & 0xff; - pci_info(dev, "%s%s error received: %04x:%02x:%02x.%d\n", + pci_info(dev, "%s%s error message received from %04x:%02x:%02x.%d\n", info->multi_error_valid ? "Multiple " : "", aer_error_severity_string[info->severity], pci_domain_nr(dev->bus), bus, PCI_SLOT(devfn), @@ -936,7 +936,12 @@ static bool find_source_device(struct pci_dev *parent, pci_walk_bus(parent->subordinate, find_device_iter, e_info); if (!e_info->error_dev_num) { - pci_info(parent, "can't find device of ID%04x\n", e_info->id); + u8 bus = e_info->id >> 8; + u8 devfn = e_info->id & 0xff; + + pci_info(parent, "found no error details for %04x:%02x:%02x.%d\n", + pci_domain_nr(parent->bus), bus, PCI_SLOT(devfn), + PCI_FUNC(devfn)); return false; } return true; diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 8765544ba..51d634fbd 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -607,10 +607,13 @@ static void quirk_amd_dwc_class(struct pci_dev *pdev) { u32 class = pdev->class; - /* Use "USB Device (not host controller)" class */ - pdev->class = PCI_CLASS_SERIAL_USB_DEVICE; - pci_info(pdev, "PCI class overridden (%#08x -> %#08x) so dwc3 driver can claim this instead of xhci\n", - class, pdev->class); + if (class != PCI_CLASS_SERIAL_USB_DEVICE) { + /* Use "USB Device (not host controller)" class */ + pdev->class = PCI_CLASS_SERIAL_USB_DEVICE; + pci_info(pdev, + "PCI class overridden (%#08x -> %#08x) so dwc3 driver can claim this instead of xhci\n", + class, pdev->class); + } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_NL_USB, quirk_amd_dwc_class); @@ -3692,6 +3695,19 @@ DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_ATI, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA, 8, quirk_no_pm_reset); /* + * Spectrum-{1,2,3,4} devices report that a D3hot->D0 transition causes a reset + * (i.e., they advertise NoSoftRst-). However, this transition does not have + * any effect on the device: It continues to be operational and network ports + * remain up. Advertising this support makes it seem as if a PM reset is viable + * for these devices. Mark it as unavailable to skip it when testing reset + * methods. + */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcb84, quirk_no_pm_reset); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcf6c, quirk_no_pm_reset); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcf70, quirk_no_pm_reset); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcf80, quirk_no_pm_reset); + +/* * Thunderbolt controllers with broken MSI hotplug signaling: * Entire 1st generation (Light Ridge, Eagle Ridge, Light Peak) and part * of the 2nd generation (Cactus Ridge 4C up to revision 1, Port Ridge). diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index 0c1faa6c1..3f3320d0a 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -1308,13 +1308,6 @@ static void stdev_release(struct device *dev) { struct switchtec_dev *stdev = to_stdev(dev); - if (stdev->dma_mrpc) { - iowrite32(0, &stdev->mmio_mrpc->dma_en); - flush_wc_buf(stdev); - writeq(0, &stdev->mmio_mrpc->dma_addr); - dma_free_coherent(&stdev->pdev->dev, sizeof(*stdev->dma_mrpc), - stdev->dma_mrpc, stdev->dma_mrpc_dma_addr); - } kfree(stdev); } @@ -1358,7 +1351,7 @@ static struct switchtec_dev *stdev_create(struct pci_dev *pdev) return ERR_PTR(-ENOMEM); stdev->alive = true; - stdev->pdev = pdev; + stdev->pdev = pci_dev_get(pdev); INIT_LIST_HEAD(&stdev->mrpc_queue); mutex_init(&stdev->mrpc_mutex); stdev->mrpc_busy = 0; @@ -1391,6 +1384,7 @@ static struct switchtec_dev *stdev_create(struct pci_dev *pdev) return stdev; err_put: + pci_dev_put(stdev->pdev); put_device(&stdev->dev); return ERR_PTR(rc); } @@ -1646,6 +1640,18 @@ static int switchtec_init_pci(struct switchtec_dev *stdev, return 0; } +static void switchtec_exit_pci(struct switchtec_dev *stdev) +{ + if (stdev->dma_mrpc) { + iowrite32(0, &stdev->mmio_mrpc->dma_en); + flush_wc_buf(stdev); + writeq(0, &stdev->mmio_mrpc->dma_addr); + dma_free_coherent(&stdev->pdev->dev, sizeof(*stdev->dma_mrpc), + stdev->dma_mrpc, stdev->dma_mrpc_dma_addr); + stdev->dma_mrpc = NULL; + } +} + static int switchtec_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { @@ -1705,6 +1711,9 @@ static void switchtec_pci_remove(struct pci_dev *pdev) ida_free(&switchtec_minor_ida, MINOR(stdev->dev.devt)); dev_info(&stdev->dev, "unregistered.\n"); stdev_kill(stdev); + switchtec_exit_pci(stdev); + pci_dev_put(stdev->pdev); + stdev->pdev = NULL; put_device(&stdev->dev); } diff --git a/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c b/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c index e625b3288..0928a526e 100644 --- a/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c +++ b/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c @@ -706,7 +706,7 @@ static int mixel_dphy_probe(struct platform_device *pdev) return ret; } - priv->id = of_alias_get_id(np, "mipi_dphy"); + priv->id = of_alias_get_id(np, "mipi-dphy"); if (priv->id < 0) { dev_err(dev, "Failed to get phy node alias id: %d\n", priv->id); diff --git a/drivers/phy/renesas/phy-rcar-gen3-usb2.c b/drivers/phy/renesas/phy-rcar-gen3-usb2.c index 9de617ca9..7e61c6b27 100644 --- a/drivers/phy/renesas/phy-rcar-gen3-usb2.c +++ b/drivers/phy/renesas/phy-rcar-gen3-usb2.c @@ -675,8 +675,6 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev) channel->irq = platform_get_irq_optional(pdev, 0); channel->dr_mode = rcar_gen3_get_dr_mode(dev->of_node); if (channel->dr_mode != USB_DR_MODE_UNKNOWN) { - int ret; - channel->is_otg_channel = true; channel->uses_otg_pins = !of_property_read_bool(dev->of_node, "renesas,no-otg-pins"); @@ -740,8 +738,6 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev) ret = PTR_ERR(provider); goto error; } else if (channel->is_otg_channel) { - int ret; - ret = device_create_file(dev, &dev_attr_role); if (ret < 0) goto error; diff --git a/drivers/phy/ti/phy-omap-usb2.c b/drivers/phy/ti/phy-omap-usb2.c index 31a775877..63c458099 100644 --- a/drivers/phy/ti/phy-omap-usb2.c +++ b/drivers/phy/ti/phy-omap-usb2.c @@ -116,7 +116,7 @@ static int omap_usb_set_vbus(struct usb_otg *otg, bool enabled) { struct omap_usb *phy = phy_to_omapusb(otg->usb_phy); - if (!phy->comparator) + if (!phy->comparator || !phy->comparator->set_vbus) return -ENODEV; return phy->comparator->set_vbus(phy->comparator, enabled); @@ -126,7 +126,7 @@ static int omap_usb_start_srp(struct usb_otg *otg) { struct omap_usb *phy = phy_to_omapusb(otg->usb_phy); - if (!phy->comparator) + if (!phy->comparator || !phy->comparator->start_srp) return -ENODEV; return phy->comparator->start_srp(phy->comparator); diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c index 8e2b07ed2..c10c99a31 100644 --- a/drivers/platform/x86/intel/vbtn.c +++ b/drivers/platform/x86/intel/vbtn.c @@ -200,9 +200,6 @@ static void notify_handler(acpi_handle handle, u32 event, void *context) autorelease = val && (!ke_rel || ke_rel->type == KE_IGNORE); sparse_keymap_report_event(input_dev, event, val, autorelease); - - /* Some devices need this to report further events */ - acpi_evaluate_object(handle, "VBDL", NULL, NULL); } /* diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 6edd2e294..c2fb19af1 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -10511,6 +10511,7 @@ static int convert_dytc_to_profile(int funcmode, int dytcmode, return 0; default: /* Unknown function */ + pr_debug("unknown function 0x%x\n", funcmode); return -EOPNOTSUPP; } return 0; @@ -10696,8 +10697,8 @@ static void dytc_profile_refresh(void) return; perfmode = (output >> DYTC_GET_MODE_BIT) & 0xF; - convert_dytc_to_profile(funcmode, perfmode, &profile); - if (profile != dytc_current_profile) { + err = convert_dytc_to_profile(funcmode, perfmode, &profile); + if (!err && profile != dytc_current_profile) { dytc_current_profile = profile; platform_profile_notify(); } diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 9a92d515a..11d72a353 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -50,7 +50,7 @@ static const struct property_entry chuwi_hi8_air_props[] = { }; static const struct ts_dmi_data chuwi_hi8_air_data = { - .acpi_name = "MSSL1680:00", + .acpi_name = "MSSL1680", .properties = chuwi_hi8_air_props, }; @@ -913,6 +913,32 @@ static const struct ts_dmi_data teclast_tbook11_data = { .properties = teclast_tbook11_props, }; +static const struct property_entry teclast_x16_plus_props[] = { + PROPERTY_ENTRY_U32("touchscreen-min-x", 8), + PROPERTY_ENTRY_U32("touchscreen-min-y", 14), + PROPERTY_ENTRY_U32("touchscreen-size-x", 1916), + PROPERTY_ENTRY_U32("touchscreen-size-y", 1264), + PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"), + PROPERTY_ENTRY_STRING("firmware-name", "gsl3692-teclast-x16-plus.fw"), + PROPERTY_ENTRY_U32("silead,max-fingers", 10), + PROPERTY_ENTRY_BOOL("silead,home-button"), + { } +}; + +static const struct ts_dmi_data teclast_x16_plus_data = { + .embedded_fw = { + .name = "silead/gsl3692-teclast-x16-plus.fw", + .prefix = { 0xf0, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00 }, + .length = 43560, + .sha256 = { 0x9d, 0xb0, 0x3d, 0xf1, 0x00, 0x3c, 0xb5, 0x25, + 0x62, 0x8a, 0xa0, 0x93, 0x4b, 0xe0, 0x4e, 0x75, + 0xd1, 0x27, 0xb1, 0x65, 0x3c, 0xba, 0xa5, 0x0f, + 0xcd, 0xb4, 0xbe, 0x00, 0xbb, 0xf6, 0x43, 0x29 }, + }, + .acpi_name = "MSSL1680:00", + .properties = teclast_x16_plus_props, +}; + static const struct property_entry teclast_x3_plus_props[] = { PROPERTY_ENTRY_U32("touchscreen-size-x", 1980), PROPERTY_ENTRY_U32("touchscreen-size-y", 1500), @@ -1568,6 +1594,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = { }, }, { + /* Teclast X16 Plus */ + .driver_data = (void *)&teclast_x16_plus_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TECLAST"), + DMI_MATCH(DMI_PRODUCT_NAME, "Default string"), + DMI_MATCH(DMI_PRODUCT_SKU, "D3A5_A1"), + }, + }, + { /* Teclast X3 Plus */ .driver_data = (void *)&teclast_x3_plus_data, .matches = { @@ -1741,7 +1776,7 @@ static void ts_dmi_add_props(struct i2c_client *client) int error; if (has_acpi_companion(dev) && - !strncmp(ts_data->acpi_name, client->name, I2C_NAME_SIZE)) { + strstarts(client->name, ts_data->acpi_name)) { error = device_create_managed_software_node(dev, ts_data->properties, NULL); if (error) dev_err(dev, "failed to add properties: %d\n", error); diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c index 4f05f6103..c02ce0834 100644 --- a/drivers/pnp/pnpacpi/rsparser.c +++ b/drivers/pnp/pnpacpi/rsparser.c @@ -151,13 +151,13 @@ static int vendor_resource_matches(struct pnp_dev *dev, static void pnpacpi_parse_allocated_vendor(struct pnp_dev *dev, struct acpi_resource_vendor_typed *vendor) { - if (vendor_resource_matches(dev, vendor, &hp_ccsr_uuid, 16)) { - u64 start, length; + struct { u64 start, length; } range; - memcpy(&start, vendor->byte_data, sizeof(start)); - memcpy(&length, vendor->byte_data + 8, sizeof(length)); - - pnp_add_mem_resource(dev, start, start + length - 1, 0); + if (vendor_resource_matches(dev, vendor, &hp_ccsr_uuid, + sizeof(range))) { + memcpy(&range, vendor->byte_data, sizeof(range)); + pnp_add_mem_resource(dev, range.start, range.start + + range.length - 1, 0); } } diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c index 0713a52a2..17b37354e 100644 --- a/drivers/power/supply/bq27xxx_battery_i2c.c +++ b/drivers/power/supply/bq27xxx_battery_i2c.c @@ -209,7 +209,9 @@ static void bq27xxx_battery_i2c_remove(struct i2c_client *client) { struct bq27xxx_device_info *di = i2c_get_clientdata(client); - free_irq(client->irq, di); + if (client->irq) + free_irq(client->irq, di); + bq27xxx_battery_teardown(di); mutex_lock(&battery_mutex); diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 34d3d8281..c8702011b 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -2925,7 +2925,8 @@ static int _regulator_enable(struct regulator *regulator) /* Fallthrough on positive return values - already enabled */ } - rdev->use_count++; + if (regulator->enable_count == 1) + rdev->use_count++; return 0; @@ -3000,37 +3001,40 @@ static int _regulator_disable(struct regulator *regulator) lockdep_assert_held_once(&rdev->mutex.base); - if (WARN(rdev->use_count <= 0, + if (WARN(regulator->enable_count == 0, "unbalanced disables for %s\n", rdev_get_name(rdev))) return -EIO; - /* are we the last user and permitted to disable ? */ - if (rdev->use_count == 1 && - (rdev->constraints && !rdev->constraints->always_on)) { - - /* we are last user */ - if (regulator_ops_is_valid(rdev, REGULATOR_CHANGE_STATUS)) { - ret = _notifier_call_chain(rdev, - REGULATOR_EVENT_PRE_DISABLE, - NULL); - if (ret & NOTIFY_STOP_MASK) - return -EINVAL; - - ret = _regulator_do_disable(rdev); - if (ret < 0) { - rdev_err(rdev, "failed to disable: %pe\n", ERR_PTR(ret)); - _notifier_call_chain(rdev, - REGULATOR_EVENT_ABORT_DISABLE, + if (regulator->enable_count == 1) { + /* disabling last enable_count from this regulator */ + /* are we the last user and permitted to disable ? */ + if (rdev->use_count == 1 && + (rdev->constraints && !rdev->constraints->always_on)) { + + /* we are last user */ + if (regulator_ops_is_valid(rdev, REGULATOR_CHANGE_STATUS)) { + ret = _notifier_call_chain(rdev, + REGULATOR_EVENT_PRE_DISABLE, + NULL); + if (ret & NOTIFY_STOP_MASK) + return -EINVAL; + + ret = _regulator_do_disable(rdev); + if (ret < 0) { + rdev_err(rdev, "failed to disable: %pe\n", ERR_PTR(ret)); + _notifier_call_chain(rdev, + REGULATOR_EVENT_ABORT_DISABLE, + NULL); + return ret; + } + _notifier_call_chain(rdev, REGULATOR_EVENT_DISABLE, NULL); - return ret; } - _notifier_call_chain(rdev, REGULATOR_EVENT_DISABLE, - NULL); - } - rdev->use_count = 0; - } else if (rdev->use_count > 1) { - rdev->use_count--; + rdev->use_count = 0; + } else if (rdev->use_count > 1) { + rdev->use_count--; + } } if (ret == 0) diff --git a/drivers/regulator/pwm-regulator.c b/drivers/regulator/pwm-regulator.c index b9eeaff1c..925e486f7 100644 --- a/drivers/regulator/pwm-regulator.c +++ b/drivers/regulator/pwm-regulator.c @@ -158,6 +158,9 @@ static int pwm_regulator_get_voltage(struct regulator_dev *rdev) pwm_get_state(drvdata->pwm, &pstate); voltage = pwm_get_relative_duty_cycle(&pstate, duty_unit); + if (voltage < min(max_uV_duty, min_uV_duty) || + voltage > max(max_uV_duty, min_uV_duty)) + return -ENOTRECOVERABLE; /* * The dutycycle for min_uV might be greater than the one for max_uV. diff --git a/drivers/regulator/ti-abb-regulator.c b/drivers/regulator/ti-abb-regulator.c index 115345e9f..82af27b0e 100644 --- a/drivers/regulator/ti-abb-regulator.c +++ b/drivers/regulator/ti-abb-regulator.c @@ -734,9 +734,25 @@ static int ti_abb_probe(struct platform_device *pdev) return PTR_ERR(abb->setup_reg); } - abb->int_base = devm_platform_ioremap_resource_byname(pdev, "int-address"); - if (IS_ERR(abb->int_base)) - return PTR_ERR(abb->int_base); + pname = "int-address"; + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, pname); + if (!res) { + dev_err(dev, "Missing '%s' IO resource\n", pname); + return -ENODEV; + } + /* + * The MPU interrupt status register (PRM_IRQSTATUS_MPU) is + * shared between regulator-abb-{ivahd,dspeve,gpu} driver + * instances. Therefore use devm_ioremap() rather than + * devm_platform_ioremap_resource_byname() to avoid busy + * resource region conflicts. + */ + abb->int_base = devm_ioremap(dev, res->start, + resource_size(res)); + if (!abb->int_base) { + dev_err(dev, "Unable to map '%s'\n", pname); + return -ENOMEM; + } /* Map Optional resources */ pname = "efuse-address"; diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index c533d1dad..a5dba3829 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -202,7 +202,8 @@ int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa, return -EINVAL; if (cdev->private->state == DEV_STATE_NOT_OPER) return -ENODEV; - if (cdev->private->state == DEV_STATE_VERIFY) { + if (cdev->private->state == DEV_STATE_VERIFY || + cdev->private->flags.doverify) { /* Remember to fake irb when finished. */ if (!cdev->private->flags.fake_irb) { cdev->private->flags.fake_irb = FAKE_CMD_IRB; @@ -214,8 +215,7 @@ int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa, } if (cdev->private->state != DEV_STATE_ONLINE || ((sch->schib.scsw.cmd.stctl & SCSW_STCTL_PRIM_STATUS) && - !(sch->schib.scsw.cmd.stctl & SCSW_STCTL_SEC_STATUS)) || - cdev->private->flags.doverify) + !(sch->schib.scsw.cmd.stctl & SCSW_STCTL_SEC_STATUS))) return -EBUSY; ret = cio_set_options (sch, flags); if (ret) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 0659aa286..86a8bd532 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -1742,6 +1742,7 @@ static ssize_t status_show(struct device *dev, { ssize_t nchars = 0; struct vfio_ap_queue *q; + unsigned long apid, apqi; struct ap_matrix_mdev *matrix_mdev; struct ap_device *apdev = to_ap_dev(dev); @@ -1749,8 +1750,21 @@ static ssize_t status_show(struct device *dev, q = dev_get_drvdata(&apdev->device); matrix_mdev = vfio_ap_mdev_for_queue(q); + /* If the queue is assigned to the matrix mediated device, then + * determine whether it is passed through to a guest; otherwise, + * indicate that it is unassigned. + */ if (matrix_mdev) { - if (matrix_mdev->kvm) + apid = AP_QID_CARD(q->apqn); + apqi = AP_QID_QUEUE(q->apqn); + /* + * If the queue is passed through to the guest, then indicate + * that it is in use; otherwise, indicate that it is + * merely assigned to a matrix mediated device. + */ + if (matrix_mdev->kvm && + test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) && + test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) nchars = scnprintf(buf, PAGE_SIZE, "%s\n", AP_QUEUE_IN_USE); else diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index c0f30cefe..a41601139 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -254,9 +254,10 @@ static void qeth_l3_clear_ip_htable(struct qeth_card *card, int recover) if (!recover) { hash_del(&addr->hnode); kfree(addr); - continue; + } else { + /* prepare for recovery */ + addr->disp_flag = QETH_DISP_ADDR_ADD; } - addr->disp_flag = QETH_DISP_ADDR_ADD; } mutex_unlock(&card->ip_lock); @@ -277,9 +278,11 @@ static void qeth_l3_recover_ip(struct qeth_card *card) if (addr->disp_flag == QETH_DISP_ADDR_ADD) { rc = qeth_l3_register_addr_entry(card, addr); - if (!rc) { + if (!rc || rc == -EADDRINUSE || rc == -ENETDOWN) { + /* keep it in the records */ addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING; } else { + /* bad address */ hash_del(&addr->hnode); kfree(addr); } diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 03e71e3d5..3b990cf2c 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -1285,7 +1285,7 @@ source "drivers/scsi/arm/Kconfig" config JAZZ_ESP bool "MIPS JAZZ FAS216 SCSI support" - depends on MACH_JAZZ && SCSI + depends on MACH_JAZZ && SCSI=y select SCSI_SPI_ATTRS help This is the driver for the onboard SCSI host adapter of MIPS Magnum diff --git a/drivers/scsi/arcmsr/arcmsr.h b/drivers/scsi/arcmsr/arcmsr.h index 07df255c4..b513d4d9c 100644 --- a/drivers/scsi/arcmsr/arcmsr.h +++ b/drivers/scsi/arcmsr/arcmsr.h @@ -77,9 +77,13 @@ struct device_attribute; #ifndef PCI_DEVICE_ID_ARECA_1203 #define PCI_DEVICE_ID_ARECA_1203 0x1203 #endif +#ifndef PCI_DEVICE_ID_ARECA_1883 +#define PCI_DEVICE_ID_ARECA_1883 0x1883 +#endif #ifndef PCI_DEVICE_ID_ARECA_1884 #define PCI_DEVICE_ID_ARECA_1884 0x1884 #endif +#define PCI_DEVICE_ID_ARECA_1886_0 0x1886 #define PCI_DEVICE_ID_ARECA_1886 0x188A #define ARCMSR_HOURS (1000 * 60 * 60 * 4) #define ARCMSR_MINUTES (1000 * 60 * 60) diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c index d3fb8a9c1..fc9d40058 100644 --- a/drivers/scsi/arcmsr/arcmsr_hba.c +++ b/drivers/scsi/arcmsr/arcmsr_hba.c @@ -214,8 +214,12 @@ static struct pci_device_id arcmsr_device_id_table[] = { .driver_data = ACB_ADAPTER_TYPE_A}, {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1880), .driver_data = ACB_ADAPTER_TYPE_C}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1883), + .driver_data = ACB_ADAPTER_TYPE_C}, {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1884), .driver_data = ACB_ADAPTER_TYPE_E}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1886_0), + .driver_data = ACB_ADAPTER_TYPE_F}, {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1886), .driver_data = ACB_ADAPTER_TYPE_F}, {0, 0}, /* Terminating entry */ @@ -4708,9 +4712,11 @@ static const char *arcmsr_info(struct Scsi_Host *host) case PCI_DEVICE_ID_ARECA_1680: case PCI_DEVICE_ID_ARECA_1681: case PCI_DEVICE_ID_ARECA_1880: + case PCI_DEVICE_ID_ARECA_1883: case PCI_DEVICE_ID_ARECA_1884: type = "SAS/SATA"; break; + case PCI_DEVICE_ID_ARECA_1886_0: case PCI_DEVICE_ID_ARECA_1886: type = "NVMe/SAS/SATA"; break; diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c index 8a4124e7d..ddc048069 100644 --- a/drivers/scsi/fcoe/fcoe_ctlr.c +++ b/drivers/scsi/fcoe/fcoe_ctlr.c @@ -319,17 +319,16 @@ static void fcoe_ctlr_announce(struct fcoe_ctlr *fip) { struct fcoe_fcf *sel; struct fcoe_fcf *fcf; - unsigned long flags; mutex_lock(&fip->ctlr_mutex); - spin_lock_irqsave(&fip->ctlr_lock, flags); + spin_lock_bh(&fip->ctlr_lock); kfree_skb(fip->flogi_req); fip->flogi_req = NULL; list_for_each_entry(fcf, &fip->fcfs, list) fcf->flogi_sent = 0; - spin_unlock_irqrestore(&fip->ctlr_lock, flags); + spin_unlock_bh(&fip->ctlr_lock); sel = fip->sel_fcf; if (sel && ether_addr_equal(sel->fcf_mac, fip->dest_addr)) @@ -700,7 +699,6 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport, { struct fc_frame *fp; struct fc_frame_header *fh; - unsigned long flags; u16 old_xid; u8 op; u8 mac[ETH_ALEN]; @@ -734,11 +732,11 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport, op = FIP_DT_FLOGI; if (fip->mode == FIP_MODE_VN2VN) break; - spin_lock_irqsave(&fip->ctlr_lock, flags); + spin_lock_bh(&fip->ctlr_lock); kfree_skb(fip->flogi_req); fip->flogi_req = skb; fip->flogi_req_send = 1; - spin_unlock_irqrestore(&fip->ctlr_lock, flags); + spin_unlock_bh(&fip->ctlr_lock); schedule_work(&fip->timer_work); return -EINPROGRESS; case ELS_FDISC: @@ -1707,11 +1705,10 @@ static int fcoe_ctlr_flogi_send_locked(struct fcoe_ctlr *fip) static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip) { struct fcoe_fcf *fcf; - unsigned long flags; int error; mutex_lock(&fip->ctlr_mutex); - spin_lock_irqsave(&fip->ctlr_lock, flags); + spin_lock_bh(&fip->ctlr_lock); LIBFCOE_FIP_DBG(fip, "re-sending FLOGI - reselect\n"); fcf = fcoe_ctlr_select(fip); if (!fcf || fcf->flogi_sent) { @@ -1722,7 +1719,7 @@ static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip) fcoe_ctlr_solicit(fip, NULL); error = fcoe_ctlr_flogi_send_locked(fip); } - spin_unlock_irqrestore(&fip->ctlr_lock, flags); + spin_unlock_bh(&fip->ctlr_lock); mutex_unlock(&fip->ctlr_mutex); return error; } @@ -1739,9 +1736,8 @@ static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip) static void fcoe_ctlr_flogi_send(struct fcoe_ctlr *fip) { struct fcoe_fcf *fcf; - unsigned long flags; - spin_lock_irqsave(&fip->ctlr_lock, flags); + spin_lock_bh(&fip->ctlr_lock); fcf = fip->sel_fcf; if (!fcf || !fip->flogi_req_send) goto unlock; @@ -1768,7 +1764,7 @@ static void fcoe_ctlr_flogi_send(struct fcoe_ctlr *fip) } else /* XXX */ LIBFCOE_FIP_DBG(fip, "No FCF selected - defer send\n"); unlock: - spin_unlock_irqrestore(&fip->ctlr_lock, flags); + spin_unlock_bh(&fip->ctlr_lock); } /** diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 0c80ff9af..7ae56a2fe 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -1565,6 +1565,11 @@ static irqreturn_t phy_up_v3_hw(int phy_no, struct hisi_hba *hisi_hba) } phy->port_id = port_id; + spin_lock(&phy->lock); + /* Delete timer and set phy_attached atomically */ + del_timer(&phy->timer); + phy->phy_attached = 1; + spin_unlock(&phy->lock); /* * Call pm_runtime_get_noresume() which pairs with @@ -1578,11 +1583,6 @@ static irqreturn_t phy_up_v3_hw(int phy_no, struct hisi_hba *hisi_hba) res = IRQ_HANDLED; - spin_lock(&phy->lock); - /* Delete timer and set phy_attached atomically */ - del_timer(&phy->timer); - phy->phy_attached = 1; - spin_unlock(&phy->lock); end: if (phy->reset_completion) complete(phy->reset_completion); diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c index 6370cdbfb..0f0732d56 100644 --- a/drivers/scsi/isci/request.c +++ b/drivers/scsi/isci/request.c @@ -3390,7 +3390,7 @@ static enum sci_status isci_io_request_build(struct isci_host *ihost, return SCI_FAILURE; } - return SCI_SUCCESS; + return status; } static struct isci_request *isci_request_from_tag(struct isci_host *ihost, u16 tag) diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c index 945adca5e..05be0810b 100644 --- a/drivers/scsi/libfc/fc_fcp.c +++ b/drivers/scsi/libfc/fc_fcp.c @@ -265,6 +265,11 @@ static int fc_fcp_send_abort(struct fc_fcp_pkt *fsp) if (!fsp->seq_ptr) return -EINVAL; + if (fsp->state & FC_SRB_ABORT_PENDING) { + FC_FCP_DBG(fsp, "abort already pending\n"); + return -EBUSY; + } + this_cpu_inc(fsp->lp->stats->FcpPktAborts); fsp->state |= FC_SRB_ABORT_PENDING; @@ -1671,7 +1676,7 @@ static void fc_fcp_rec_error(struct fc_fcp_pkt *fsp, struct fc_frame *fp) if (fsp->recov_retry++ < FC_MAX_RECOV_RETRY) fc_fcp_rec(fsp); else - fc_fcp_recovery(fsp, FC_ERROR); + fc_fcp_recovery(fsp, FC_TIMED_OUT); break; } fc_fcp_unlock_pkt(fsp); @@ -1690,11 +1695,12 @@ static void fc_fcp_recovery(struct fc_fcp_pkt *fsp, u8 code) fsp->status_code = code; fsp->cdb_status = 0; fsp->io_status = 0; - /* - * if this fails then we let the scsi command timer fire and - * scsi-ml escalate. - */ - fc_fcp_send_abort(fsp); + if (!fsp->cmd) + /* + * Only abort non-scsi commands; otherwise let the + * scsi command timer fire and scsi-ml escalate. + */ + fc_fcp_send_abort(fsp); } /** diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 664ac3069..dc5ac3cc7 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -33,6 +33,7 @@ struct lpfc_sli2_slim; #define ELX_MODEL_NAME_SIZE 80 +#define ELX_FW_NAME_SIZE 84 #define LPFC_PCI_DEV_LP 0x1 #define LPFC_PCI_DEV_OC 0x2 diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index f59de6180..1a0bafde3 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -14778,7 +14778,7 @@ out: int lpfc_sli4_request_firmware_update(struct lpfc_hba *phba, uint8_t fw_upgrade) { - uint8_t file_name[ELX_MODEL_NAME_SIZE]; + char file_name[ELX_FW_NAME_SIZE] = {0}; int ret; const struct firmware *fw; @@ -14787,7 +14787,7 @@ lpfc_sli4_request_firmware_update(struct lpfc_hba *phba, uint8_t fw_upgrade) LPFC_SLI_INTF_IF_TYPE_2) return -EPERM; - snprintf(file_name, ELX_MODEL_NAME_SIZE, "%s.grp", phba->ModelName); + scnprintf(file_name, sizeof(file_name), "%s.grp", phba->ModelName); if (fw_upgrade == INT_FW_UPGRADE) { ret = request_firmware_nowait(THIS_MODULE, FW_ACTION_UEVENT, diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 7aac9fc71..0bb7e164b 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -1919,7 +1919,7 @@ out: * * Returns the number of SGEs added to the SGL. **/ -static int +static uint32_t lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc, struct sli4_sge *sgl, int datasegcnt, struct lpfc_io_buf *lpfc_cmd) @@ -1927,8 +1927,8 @@ lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc, struct scatterlist *sgde = NULL; /* s/g data entry */ struct sli4_sge_diseed *diseed = NULL; dma_addr_t physaddr; - int i = 0, num_sge = 0, status; - uint32_t reftag; + int i = 0, status; + uint32_t reftag, num_sge = 0; uint8_t txop, rxop; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS uint32_t rc; @@ -2100,7 +2100,7 @@ out: * * Returns the number of SGEs added to the SGL. **/ -static int +static uint32_t lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc, struct sli4_sge *sgl, int datacnt, int protcnt, struct lpfc_io_buf *lpfc_cmd) @@ -2124,8 +2124,8 @@ lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc, uint32_t rc; #endif uint32_t checking = 1; - uint32_t dma_offset = 0; - int num_sge = 0, j = 2; + uint32_t dma_offset = 0, num_sge = 0; + int j = 2; struct sli4_hybrid_sgl *sgl_xtra = NULL; sgpe = scsi_prot_sglist(sc); diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index d2c7de804..41636c4c4 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -1886,7 +1886,8 @@ static int mpi3mr_create_op_reply_q(struct mpi3mr_ioc *mrioc, u16 qidx) reply_qid = qidx + 1; op_reply_q->num_replies = MPI3MR_OP_REP_Q_QD; - if (!mrioc->pdev->revision) + if ((mrioc->pdev->device == MPI3_MFGPAGE_DEVID_SAS4116) && + !mrioc->pdev->revision) op_reply_q->num_replies = MPI3MR_OP_REP_Q_QD4K; op_reply_q->ci = 0; op_reply_q->ephase = 1; diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c index 85f5b349c..7a6b006e7 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_os.c +++ b/drivers/scsi/mpi3mr/mpi3mr_os.c @@ -4963,7 +4963,10 @@ mpi3mr_probe(struct pci_dev *pdev, const struct pci_device_id *id) mpi3mr_init_drv_cmd(&mrioc->evtack_cmds[i], MPI3MR_HOSTTAG_EVTACKCMD_MIN + i); - if (pdev->revision) + if ((pdev->device == MPI3_MFGPAGE_DEVID_SAS4116) && + !pdev->revision) + mrioc->enable_segqueue = false; + else mrioc->enable_segqueue = true; init_waitqueue_head(&mrioc->reset_waitq); diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 3cda5d26b..e70ab8db3 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -328,21 +328,39 @@ static int scsi_vpd_inquiry(struct scsi_device *sdev, unsigned char *buffer, return result + 4; } +enum scsi_vpd_parameters { + SCSI_VPD_HEADER_SIZE = 4, + SCSI_VPD_LIST_SIZE = 36, +}; + static int scsi_get_vpd_size(struct scsi_device *sdev, u8 page) { - unsigned char vpd_header[SCSI_VPD_HEADER_SIZE] __aligned(4); + unsigned char vpd[SCSI_VPD_LIST_SIZE] __aligned(4); int result; if (sdev->no_vpd_size) return SCSI_DEFAULT_VPD_LEN; /* + * Fetch the supported pages VPD and validate that the requested page + * number is present. + */ + if (page != 0) { + result = scsi_vpd_inquiry(sdev, vpd, 0, sizeof(vpd)); + if (result < SCSI_VPD_HEADER_SIZE) + return 0; + + result -= SCSI_VPD_HEADER_SIZE; + if (!memchr(&vpd[SCSI_VPD_HEADER_SIZE], page, result)) + return 0; + } + /* * Fetch the VPD page header to find out how big the page * is. This is done to prevent problems on legacy devices * which can not handle allocation lengths as large as * potentially requested by the caller. */ - result = scsi_vpd_inquiry(sdev, vpd_header, page, sizeof(vpd_header)); + result = scsi_vpd_inquiry(sdev, vpd, page, SCSI_VPD_HEADER_SIZE); if (result < 0) return 0; diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 9a289d6f2..cfa6f0edf 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -61,11 +61,11 @@ static int scsi_eh_try_stu(struct scsi_cmnd *scmd); static enum scsi_disposition scsi_try_to_abort_cmd(struct scsi_host_template *, struct scsi_cmnd *); -void scsi_eh_wakeup(struct Scsi_Host *shost) +void scsi_eh_wakeup(struct Scsi_Host *shost, unsigned int busy) { lockdep_assert_held(shost->host_lock); - if (scsi_host_busy(shost) == shost->host_failed) { + if (busy == shost->host_failed) { trace_scsi_eh_wakeup(shost); wake_up_process(shost->ehandler); SCSI_LOG_ERROR_RECOVERY(5, shost_printk(KERN_INFO, shost, @@ -88,7 +88,7 @@ void scsi_schedule_eh(struct Scsi_Host *shost) if (scsi_host_set_state(shost, SHOST_RECOVERY) == 0 || scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY) == 0) { shost->host_eh_scheduled++; - scsi_eh_wakeup(shost); + scsi_eh_wakeup(shost, scsi_host_busy(shost)); } spin_unlock_irqrestore(shost->host_lock, flags); @@ -277,11 +277,12 @@ static void scsi_eh_inc_host_failed(struct rcu_head *head) { struct scsi_cmnd *scmd = container_of(head, typeof(*scmd), rcu); struct Scsi_Host *shost = scmd->device->host; + unsigned int busy = scsi_host_busy(shost); unsigned long flags; spin_lock_irqsave(shost->host_lock, flags); shost->host_failed++; - scsi_eh_wakeup(shost); + scsi_eh_wakeup(shost, busy); spin_unlock_irqrestore(shost->host_lock, flags); } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index d25e1c247..edd296f95 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -185,39 +185,37 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason) __scsi_queue_insert(cmd, reason, true); } - /** - * __scsi_execute - insert request and wait for the result - * @sdev: scsi device + * scsi_execute_cmd - insert request and wait for the result + * @sdev: scsi_device * @cmd: scsi command - * @data_direction: data direction + * @opf: block layer request cmd_flags * @buffer: data buffer * @bufflen: len of buffer - * @sense: optional sense buffer - * @sshdr: optional decoded sense header * @timeout: request timeout in HZ * @retries: number of times to retry request - * @flags: flags for ->cmd_flags - * @rq_flags: flags for ->rq_flags - * @resid: optional residual length + * @args: Optional args. See struct definition for field descriptions * * Returns the scsi_cmnd result field if a command was executed, or a negative * Linux error code if we didn't get that far. */ -int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, - int data_direction, void *buffer, unsigned bufflen, - unsigned char *sense, struct scsi_sense_hdr *sshdr, - int timeout, int retries, blk_opf_t flags, - req_flags_t rq_flags, int *resid) +int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, + blk_opf_t opf, void *buffer, unsigned int bufflen, + int timeout, int retries, + const struct scsi_exec_args *args) { + static const struct scsi_exec_args default_args; struct request *req; struct scsi_cmnd *scmd; int ret; - req = scsi_alloc_request(sdev->request_queue, - data_direction == DMA_TO_DEVICE ? - REQ_OP_DRV_OUT : REQ_OP_DRV_IN, - rq_flags & RQF_PM ? BLK_MQ_REQ_PM : 0); + if (!args) + args = &default_args; + else if (WARN_ON_ONCE(args->sense && + args->sense_len != SCSI_SENSE_BUFFERSIZE)) + return -EINVAL; + + req = scsi_alloc_request(sdev->request_queue, opf, args->req_flags); if (IS_ERR(req)) return PTR_ERR(req); @@ -232,8 +230,7 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, memcpy(scmd->cmnd, cmd, scmd->cmd_len); scmd->allowed = retries; req->timeout = timeout; - req->cmd_flags |= flags; - req->rq_flags |= rq_flags | RQF_QUIET; + req->rq_flags |= RQF_QUIET; /* * head injection *required* here otherwise quiesce won't work @@ -249,20 +246,21 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, if (unlikely(scmd->resid_len > 0 && scmd->resid_len <= bufflen)) memset(buffer + bufflen - scmd->resid_len, 0, scmd->resid_len); - if (resid) - *resid = scmd->resid_len; - if (sense && scmd->sense_len) - memcpy(sense, scmd->sense_buffer, SCSI_SENSE_BUFFERSIZE); - if (sshdr) + if (args->resid) + *args->resid = scmd->resid_len; + if (args->sense) + memcpy(args->sense, scmd->sense_buffer, SCSI_SENSE_BUFFERSIZE); + if (args->sshdr) scsi_normalize_sense(scmd->sense_buffer, scmd->sense_len, - sshdr); + args->sshdr); + ret = scmd->result; out: blk_mq_free_request(req); return ret; } -EXPORT_SYMBOL(__scsi_execute); +EXPORT_SYMBOL(scsi_execute_cmd); /* * Wake up the error handler if necessary. Avoid as follows that the error @@ -280,9 +278,11 @@ static void scsi_dec_host_busy(struct Scsi_Host *shost, struct scsi_cmnd *cmd) rcu_read_lock(); __clear_bit(SCMD_STATE_INFLIGHT, &cmd->state); if (unlikely(scsi_host_in_recovery(shost))) { + unsigned int busy = scsi_host_busy(shost); + spin_lock_irqsave(shost->host_lock, flags); if (shost->host_failed || shost->host_eh_scheduled) - scsi_eh_wakeup(shost); + scsi_eh_wakeup(shost, busy); spin_unlock_irqrestore(shost->host_lock, flags); } rcu_read_unlock(); diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index b14545acb..9b5fb30e6 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -86,7 +86,7 @@ extern void scmd_eh_abort_handler(struct work_struct *work); extern enum blk_eh_timer_return scsi_timeout(struct request *req); extern int scsi_error_handler(void *host); extern enum scsi_disposition scsi_decide_disposition(struct scsi_cmnd *cmd); -extern void scsi_eh_wakeup(struct Scsi_Host *shost); +extern void scsi_eh_wakeup(struct Scsi_Host *shost, unsigned int busy); extern void scsi_eh_scmd_add(struct scsi_cmnd *); void scsi_eh_ready_devs(struct Scsi_Host *shost, struct list_head *work_q, diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 31b5273f4..4433b02c8 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3284,6 +3284,24 @@ static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp, return true; } +static void sd_read_block_zero(struct scsi_disk *sdkp) +{ + unsigned int buf_len = sdkp->device->sector_size; + char *buffer, cmd[10] = { }; + + buffer = kmalloc(buf_len, GFP_KERNEL); + if (!buffer) + return; + + cmd[0] = READ_10; + put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */ + put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */ + + scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN, buffer, buf_len, + SD_TIMEOUT, sdkp->max_retries, NULL); + kfree(buffer); +} + /** * sd_revalidate_disk - called the first time a new disk is seen, * performs disk spin up, read_capacity, etc. @@ -3323,7 +3341,13 @@ static int sd_revalidate_disk(struct gendisk *disk) */ if (sdkp->media_present) { sd_read_capacity(sdkp, buffer); - + /* + * Some USB/UAS devices return generic values for mode pages + * until the media has been accessed. Trigger a READ operation + * to force the device to populate mode pages. + */ + if (sdp->read_before_ms) + sd_read_block_zero(sdkp); /* * set the default to rotational. All non-rotational devices * support the block characteristics VPD page, which will diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 47d487729..e44f6bb25 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -6449,8 +6449,11 @@ static void pqi_map_queues(struct Scsi_Host *shost) { struct pqi_ctrl_info *ctrl_info = shost_to_hba(shost); - blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], + if (!ctrl_info->disable_managed_interrupts) + return blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], ctrl_info->pci_dev, 0); + else + return blk_mq_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT]); } static inline bool pqi_is_tape_changer_device(struct pqi_scsi_dev *device) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index c2d981d5a..4fad9d85b 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -326,6 +326,7 @@ enum storvsc_request_type { */ static int storvsc_ringbuffer_size = (128 * 1024); +static int aligned_ringbuffer_size; static u32 max_outstanding_req_per_channel; static int storvsc_change_queue_depth(struct scsi_device *sdev, int queue_depth); @@ -683,8 +684,8 @@ static void handle_sc_creation(struct vmbus_channel *new_sc) new_sc->next_request_id_callback = storvsc_next_request_id; ret = vmbus_open(new_sc, - storvsc_ringbuffer_size, - storvsc_ringbuffer_size, + aligned_ringbuffer_size, + aligned_ringbuffer_size, (void *)&props, sizeof(struct vmstorage_channel_properties), storvsc_on_channel_callback, new_sc); @@ -1964,7 +1965,7 @@ static int storvsc_probe(struct hv_device *device, dma_set_min_align_mask(&device->device, HV_HYP_PAGE_SIZE - 1); stor_device->port_number = host->host_no; - ret = storvsc_connect_to_vsp(device, storvsc_ringbuffer_size, is_fc); + ret = storvsc_connect_to_vsp(device, aligned_ringbuffer_size, is_fc); if (ret) goto err_out1; @@ -2157,7 +2158,7 @@ static int storvsc_resume(struct hv_device *hv_dev) { int ret; - ret = storvsc_connect_to_vsp(hv_dev, storvsc_ringbuffer_size, + ret = storvsc_connect_to_vsp(hv_dev, aligned_ringbuffer_size, hv_dev_is_fc(hv_dev)); return ret; } @@ -2191,8 +2192,9 @@ static int __init storvsc_drv_init(void) * the ring buffer indices) by the max request size (which is * vmbus_channel_packet_multipage_buffer + struct vstor_packet + u64) */ + aligned_ringbuffer_size = VMBUS_RING_SIZE(storvsc_ringbuffer_size); max_outstanding_req_per_channel = - ((storvsc_ringbuffer_size - PAGE_SIZE) / + ((aligned_ringbuffer_size - PAGE_SIZE) / ALIGN(MAX_MULTIPAGE_BUFFER_PACKET + sizeof(struct vstor_packet) + sizeof(u64), sizeof(u64))); diff --git a/drivers/soc/mediatek/mtk-pm-domains.c b/drivers/soc/mediatek/mtk-pm-domains.c index 474b272f9..832adb570 100644 --- a/drivers/soc/mediatek/mtk-pm-domains.c +++ b/drivers/soc/mediatek/mtk-pm-domains.c @@ -499,6 +499,11 @@ static int scpsys_add_subdomain(struct scpsys *scpsys, struct device_node *paren goto err_put_node; } + /* recursive call to add all subdomains */ + ret = scpsys_add_subdomain(scpsys, child); + if (ret) + goto err_put_node; + ret = pm_genpd_add_subdomain(parent_pd, child_pd); if (ret) { dev_err(scpsys->dev, "failed to add %s subdomain to parent %s\n", @@ -508,11 +513,6 @@ static int scpsys_add_subdomain(struct scpsys *scpsys, struct device_node *paren dev_dbg(scpsys->dev, "%s add subdomain: %s\n", parent_pd->name, child_pd->name); } - - /* recursive call to add all subdomains */ - ret = scpsys_add_subdomain(scpsys, child); - if (ret) - goto err_put_node; } return 0; @@ -526,9 +526,6 @@ static void scpsys_remove_one_domain(struct scpsys_domain *pd) { int ret; - if (scpsys_domain_is_on(pd)) - scpsys_power_off(&pd->genpd); - /* * We're in the error cleanup already, so we only complain, * but won't emit another error on top of the original one. @@ -538,6 +535,8 @@ static void scpsys_remove_one_domain(struct scpsys_domain *pd) dev_err(pd->scpsys->dev, "failed to remove domain '%s' : %d - state may be inconsistent\n", pd->genpd.name, ret); + if (scpsys_domain_is_on(pd)) + scpsys_power_off(&pd->genpd); clk_bulk_put(pd->num_clks, pd->clks); clk_bulk_put(pd->num_subsys_clks, pd->subsys_clks); diff --git a/drivers/soc/qcom/rpmhpd.c b/drivers/soc/qcom/rpmhpd.c index 092f6ab09..9a90f241b 100644 --- a/drivers/soc/qcom/rpmhpd.c +++ b/drivers/soc/qcom/rpmhpd.c @@ -492,12 +492,15 @@ static int rpmhpd_aggregate_corner(struct rpmhpd *pd, unsigned int corner) unsigned int active_corner, sleep_corner; unsigned int this_active_corner = 0, this_sleep_corner = 0; unsigned int peer_active_corner = 0, peer_sleep_corner = 0; + unsigned int peer_enabled_corner; to_active_sleep(pd, corner, &this_active_corner, &this_sleep_corner); - if (peer && peer->enabled) - to_active_sleep(peer, peer->corner, &peer_active_corner, + if (peer && peer->enabled) { + peer_enabled_corner = max(peer->corner, peer->enable_corner); + to_active_sleep(peer, peer_enabled_corner, &peer_active_corner, &peer_sleep_corner); + } active_corner = max(this_active_corner, peer_active_corner); diff --git a/drivers/soc/renesas/r8a77980-sysc.c b/drivers/soc/renesas/r8a77980-sysc.c index 39ca84a67..621e411fc 100644 --- a/drivers/soc/renesas/r8a77980-sysc.c +++ b/drivers/soc/renesas/r8a77980-sysc.c @@ -25,7 +25,8 @@ static const struct rcar_sysc_area r8a77980_areas[] __initconst = { PD_CPU_NOCR }, { "ca53-cpu3", 0x200, 3, R8A77980_PD_CA53_CPU3, R8A77980_PD_CA53_SCU, PD_CPU_NOCR }, - { "cr7", 0x240, 0, R8A77980_PD_CR7, R8A77980_PD_ALWAYS_ON }, + { "cr7", 0x240, 0, R8A77980_PD_CR7, R8A77980_PD_ALWAYS_ON, + PD_CPU_NOCR }, { "a3ir", 0x180, 0, R8A77980_PD_A3IR, R8A77980_PD_ALWAYS_ON }, { "a2ir0", 0x400, 0, R8A77980_PD_A2IR0, R8A77980_PD_A3IR }, { "a2ir1", 0x400, 1, R8A77980_PD_A2IR1, R8A77980_PD_A3IR }, diff --git a/drivers/soc/xilinx/xlnx_event_manager.c b/drivers/soc/xilinx/xlnx_event_manager.c index f9d9b82b5..8293cc400 100644 --- a/drivers/soc/xilinx/xlnx_event_manager.c +++ b/drivers/soc/xilinx/xlnx_event_manager.c @@ -477,7 +477,7 @@ static void xlnx_call_notify_cb_handler(const u32 *payload) } } if (!is_callback_found) - pr_warn("Didn't find any registered callback for 0x%x 0x%x\n", + pr_warn("Unhandled SGI node 0x%x event 0x%x. Expected with Xen hypervisor\n", payload[1], payload[2]); } @@ -555,7 +555,7 @@ static void xlnx_disable_percpu_irq(void *data) static int xlnx_event_init_sgi(struct platform_device *pdev) { int ret = 0; - int cpu = smp_processor_id(); + int cpu; /* * IRQ related structures are used for the following: * for each SGI interrupt ensure its mapped by GIC IRQ domain @@ -592,9 +592,12 @@ static int xlnx_event_init_sgi(struct platform_device *pdev) sgi_fwspec.param[0] = sgi_num; virq_sgi = irq_create_fwspec_mapping(&sgi_fwspec); + cpu = get_cpu(); per_cpu(cpu_number1, cpu) = cpu; ret = request_percpu_irq(virq_sgi, xlnx_event_handler, "xlnx_event_mgmt", &cpu_number1); + put_cpu(); + WARN_ON(ret); if (ret) { irq_dispose_mapping(virq_sgi); diff --git a/drivers/spi/spi-hisi-sfc-v3xx.c b/drivers/spi/spi-hisi-sfc-v3xx.c index d3a23b1c2..61bf00dfe 100644 --- a/drivers/spi/spi-hisi-sfc-v3xx.c +++ b/drivers/spi/spi-hisi-sfc-v3xx.c @@ -377,6 +377,11 @@ static const struct spi_controller_mem_ops hisi_sfc_v3xx_mem_ops = { static irqreturn_t hisi_sfc_v3xx_isr(int irq, void *data) { struct hisi_sfc_v3xx_host *host = data; + u32 reg; + + reg = readl(host->regbase + HISI_SFC_V3XX_INT_STAT); + if (!reg) + return IRQ_NONE; hisi_sfc_v3xx_disable_int(host); diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c index d65f047b6..1179a1115 100644 --- a/drivers/spi/spi-ppc4xx.c +++ b/drivers/spi/spi-ppc4xx.c @@ -166,10 +166,8 @@ static int spi_ppc4xx_setupxfer(struct spi_device *spi, struct spi_transfer *t) int scr; u8 cdm = 0; u32 speed; - u8 bits_per_word; /* Start with the generic configuration for this device. */ - bits_per_word = spi->bits_per_word; speed = spi->max_speed_hz; /* @@ -177,9 +175,6 @@ static int spi_ppc4xx_setupxfer(struct spi_device *spi, struct spi_transfer *t) * the transfer to overwrite the generic configuration with zeros. */ if (t) { - if (t->bits_per_word) - bits_per_word = t->bits_per_word; - if (t->speed_hz) speed = min(t->speed_hz, spi->max_speed_hz); } diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index 51ceaa485..ec3a4939e 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -137,14 +137,14 @@ struct sh_msiof_spi_priv { /* SIFCTR */ #define SIFCTR_TFWM_MASK GENMASK(31, 29) /* Transmit FIFO Watermark */ -#define SIFCTR_TFWM_64 (0 << 29) /* Transfer Request when 64 empty stages */ -#define SIFCTR_TFWM_32 (1 << 29) /* Transfer Request when 32 empty stages */ -#define SIFCTR_TFWM_24 (2 << 29) /* Transfer Request when 24 empty stages */ -#define SIFCTR_TFWM_16 (3 << 29) /* Transfer Request when 16 empty stages */ -#define SIFCTR_TFWM_12 (4 << 29) /* Transfer Request when 12 empty stages */ -#define SIFCTR_TFWM_8 (5 << 29) /* Transfer Request when 8 empty stages */ -#define SIFCTR_TFWM_4 (6 << 29) /* Transfer Request when 4 empty stages */ -#define SIFCTR_TFWM_1 (7 << 29) /* Transfer Request when 1 empty stage */ +#define SIFCTR_TFWM_64 (0UL << 29) /* Transfer Request when 64 empty stages */ +#define SIFCTR_TFWM_32 (1UL << 29) /* Transfer Request when 32 empty stages */ +#define SIFCTR_TFWM_24 (2UL << 29) /* Transfer Request when 24 empty stages */ +#define SIFCTR_TFWM_16 (3UL << 29) /* Transfer Request when 16 empty stages */ +#define SIFCTR_TFWM_12 (4UL << 29) /* Transfer Request when 12 empty stages */ +#define SIFCTR_TFWM_8 (5UL << 29) /* Transfer Request when 8 empty stages */ +#define SIFCTR_TFWM_4 (6UL << 29) /* Transfer Request when 4 empty stages */ +#define SIFCTR_TFWM_1 (7UL << 29) /* Transfer Request when 1 empty stage */ #define SIFCTR_TFUA_MASK GENMASK(26, 20) /* Transmit FIFO Usable Area */ #define SIFCTR_TFUA_SHIFT 20 #define SIFCTR_TFUA(i) ((i) << SIFCTR_TFUA_SHIFT) diff --git a/drivers/spmi/spmi-mtk-pmif.c b/drivers/spmi/spmi-mtk-pmif.c index 01e8851e6..bf8c0d410 100644 --- a/drivers/spmi/spmi-mtk-pmif.c +++ b/drivers/spmi/spmi-mtk-pmif.c @@ -475,7 +475,7 @@ static int mtk_spmi_probe(struct platform_device *pdev) for (i = 0; i < arb->nclks; i++) arb->clks[i].id = pmif_clock_names[i]; - err = devm_clk_bulk_get(&pdev->dev, arb->nclks, arb->clks); + err = clk_bulk_get(&pdev->dev, arb->nclks, arb->clks); if (err) { dev_err(&pdev->dev, "Failed to get clocks: %d\n", err); goto err_put_ctrl; @@ -484,7 +484,7 @@ static int mtk_spmi_probe(struct platform_device *pdev) err = clk_bulk_prepare_enable(arb->nclks, arb->clks); if (err) { dev_err(&pdev->dev, "Failed to enable clocks: %d\n", err); - goto err_put_ctrl; + goto err_put_clks; } ctrl->cmd = pmif_arb_cmd; @@ -510,6 +510,8 @@ static int mtk_spmi_probe(struct platform_device *pdev) err_domain_remove: clk_bulk_disable_unprepare(arb->nclks, arb->clks); +err_put_clks: + clk_bulk_put(arb->nclks, arb->clks); err_put_ctrl: spmi_controller_put(ctrl); return err; @@ -521,6 +523,7 @@ static int mtk_spmi_remove(struct platform_device *pdev) struct pmif *arb = spmi_controller_get_drvdata(ctrl); clk_bulk_disable_unprepare(arb->nclks, arb->clks); + clk_bulk_put(arb->nclks, arb->clks); spmi_controller_remove(ctrl); spmi_controller_put(ctrl); return 0; diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c index f177b20f0..ceba63213 100644 --- a/drivers/staging/iio/impedance-analyzer/ad5933.c +++ b/drivers/staging/iio/impedance-analyzer/ad5933.c @@ -608,7 +608,7 @@ static void ad5933_work(struct work_struct *work) struct ad5933_state, work.work); struct iio_dev *indio_dev = i2c_get_clientdata(st->client); __be16 buf[2]; - int val[2]; + u16 val[2]; unsigned char status; int ret; diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 301fe376a..13558cbd9 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -147,7 +147,6 @@ int transport_lookup_tmr_lun(struct se_cmd *se_cmd) struct se_session *se_sess = se_cmd->se_sess; struct se_node_acl *nacl = se_sess->se_node_acl; struct se_tmr_req *se_tmr = se_cmd->se_tmr_req; - unsigned long flags; rcu_read_lock(); deve = target_nacl_find_deve(nacl, se_cmd->orig_fe_lun); @@ -178,10 +177,6 @@ out_unlock: se_cmd->se_dev = rcu_dereference_raw(se_lun->lun_se_dev); se_tmr->tmr_dev = rcu_dereference_raw(se_lun->lun_se_dev); - spin_lock_irqsave(&se_tmr->tmr_dev->se_tmr_lock, flags); - list_add_tail(&se_tmr->tmr_list, &se_tmr->tmr_dev->dev_tmr_list); - spin_unlock_irqrestore(&se_tmr->tmr_dev->se_tmr_lock, flags); - return 0; } EXPORT_SYMBOL(transport_lookup_tmr_lun); diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 69a4c9581..7aec34c09 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -910,12 +910,15 @@ new_bio: return 0; fail: - if (bio) - bio_put(bio); + if (bio) { + bio_uninit(bio); + kfree(bio); + } while (req->bio) { bio = req->bio; req->bio = bio->bi_next; - bio_put(bio); + bio_uninit(bio); + kfree(bio); } req->biotail = NULL; return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 0686882bc..fb93d74c5 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -3627,6 +3627,10 @@ int transport_generic_handle_tmr( unsigned long flags; bool aborted = false; + spin_lock_irqsave(&cmd->se_dev->se_tmr_lock, flags); + list_add_tail(&cmd->se_tmr_req->tmr_list, &cmd->se_dev->dev_tmr_list); + spin_unlock_irqrestore(&cmd->se_dev->se_tmr_lock, flags); + spin_lock_irqsave(&cmd->t_state_lock, flags); if (cmd->transport_state & CMD_T_ABORTED) { aborted = true; diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index c74eaf255..2f0f05259 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -1345,11 +1345,41 @@ static void pl011_start_tx_pio(struct uart_amba_port *uap) } } +static void pl011_rs485_tx_start(struct uart_amba_port *uap) +{ + struct uart_port *port = &uap->port; + u32 cr; + + /* Enable transmitter */ + cr = pl011_read(uap, REG_CR); + cr |= UART011_CR_TXE; + + /* Disable receiver if half-duplex */ + if (!(port->rs485.flags & SER_RS485_RX_DURING_TX)) + cr &= ~UART011_CR_RXE; + + if (port->rs485.flags & SER_RS485_RTS_ON_SEND) + cr &= ~UART011_CR_RTS; + else + cr |= UART011_CR_RTS; + + pl011_write(cr, uap, REG_CR); + + if (port->rs485.delay_rts_before_send) + mdelay(port->rs485.delay_rts_before_send); + + uap->rs485_tx_started = true; +} + static void pl011_start_tx(struct uart_port *port) { struct uart_amba_port *uap = container_of(port, struct uart_amba_port, port); + if ((uap->port.rs485.flags & SER_RS485_ENABLED) && + !uap->rs485_tx_started) + pl011_rs485_tx_start(uap); + if (!pl011_dma_tx_start(uap)) pl011_start_tx_pio(uap); } @@ -1431,42 +1461,12 @@ static bool pl011_tx_char(struct uart_amba_port *uap, unsigned char c, return true; } -static void pl011_rs485_tx_start(struct uart_amba_port *uap) -{ - struct uart_port *port = &uap->port; - u32 cr; - - /* Enable transmitter */ - cr = pl011_read(uap, REG_CR); - cr |= UART011_CR_TXE; - - /* Disable receiver if half-duplex */ - if (!(port->rs485.flags & SER_RS485_RX_DURING_TX)) - cr &= ~UART011_CR_RXE; - - if (port->rs485.flags & SER_RS485_RTS_ON_SEND) - cr &= ~UART011_CR_RTS; - else - cr |= UART011_CR_RTS; - - pl011_write(cr, uap, REG_CR); - - if (port->rs485.delay_rts_before_send) - mdelay(port->rs485.delay_rts_before_send); - - uap->rs485_tx_started = true; -} - /* Returns true if tx interrupts have to be (kept) enabled */ static bool pl011_tx_chars(struct uart_amba_port *uap, bool from_irq) { struct circ_buf *xmit = &uap->port.state->xmit; int count = uap->fifosize >> 1; - if ((uap->port.rs485.flags & SER_RS485_ENABLED) && - !uap->rs485_tx_started) - pl011_rs485_tx_start(uap); - if (uap->port.x_char) { if (!pl011_tx_char(uap, uap->port.x_char, from_irq)) return true; diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 338cb19de..163a89f84 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -237,6 +237,14 @@ #define MAX310x_REV_MASK (0xf8) #define MAX310X_WRITE_BIT 0x80 +/* Port startup definitions */ +#define MAX310X_PORT_STARTUP_WAIT_RETRIES 20 /* Number of retries */ +#define MAX310X_PORT_STARTUP_WAIT_DELAY_MS 10 /* Delay between retries */ + +/* Crystal-related definitions */ +#define MAX310X_XTAL_WAIT_RETRIES 20 /* Number of retries */ +#define MAX310X_XTAL_WAIT_DELAY_MS 10 /* Delay between retries */ + /* MAX3107 specific */ #define MAX3107_REV_ID (0xa0) @@ -583,7 +591,7 @@ static int max310x_update_best_err(unsigned long f, long *besterr) return 1; } -static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, +static s32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, unsigned long freq, bool xtal) { unsigned int div, clksrc, pllcfg = 0; @@ -641,12 +649,20 @@ static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, /* Wait for crystal */ if (xtal) { - unsigned int val; - msleep(10); - regmap_read(s->regmap, MAX310X_STS_IRQSTS_REG, &val); - if (!(val & MAX310X_STS_CLKREADY_BIT)) { - dev_warn(dev, "clock is not stable yet\n"); - } + bool stable = false; + unsigned int try = 0, val = 0; + + do { + msleep(MAX310X_XTAL_WAIT_DELAY_MS); + regmap_read(s->regmap, MAX310X_STS_IRQSTS_REG, &val); + + if (val & MAX310X_STS_CLKREADY_BIT) + stable = true; + } while (!stable && (++try < MAX310X_XTAL_WAIT_RETRIES)); + + if (!stable) + return dev_err_probe(dev, -EAGAIN, + "clock is not stable\n"); } return bestfreq; @@ -1274,7 +1290,7 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty { int i, ret, fmin, fmax, freq; struct max310x_port *s; - u32 uartclk = 0; + s32 uartclk = 0; bool xtal; for (i = 0; i < devtype->nr; i++) @@ -1337,6 +1353,9 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty goto out_clk; for (i = 0; i < devtype->nr; i++) { + bool started = false; + unsigned int try = 0, val = 0; + /* Reset port */ regmap_write(regmaps[i], MAX310X_MODE2_REG, MAX310X_MODE2_RST_BIT); @@ -1345,13 +1364,27 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty /* Wait for port startup */ do { - regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &ret); - } while (ret != 0x01); + msleep(MAX310X_PORT_STARTUP_WAIT_DELAY_MS); + regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &val); + + if (val == 0x01) + started = true; + } while (!started && (++try < MAX310X_PORT_STARTUP_WAIT_RETRIES)); + + if (!started) { + ret = dev_err_probe(dev, -EAGAIN, "port reset failed\n"); + goto out_uart; + } regmap_write(regmaps[i], MAX310X_MODE1_REG, devtype->mode1); } uartclk = max310x_set_ref_clk(dev, s, freq, xtal); + if (uartclk < 0) { + ret = uartclk; + goto out_uart; + } + dev_dbg(dev, "Reference clock set to %i Hz\n", uartclk); for (i = 0; i < devtype->nr; i++) { diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c index 8767c504b..9d3b23730 100644 --- a/drivers/tty/tty_ioctl.c +++ b/drivers/tty/tty_ioctl.c @@ -859,7 +859,7 @@ int tty_mode_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) ret = -EFAULT; return ret; case TIOCSLCKTRMIOS: - if (!capable(CAP_SYS_ADMIN)) + if (!checkpoint_restore_ns_capable(&init_user_ns)) return -EPERM; copy_termios_locked(real_tty, &kterm); if (user_termios_to_kernel_termios(&kterm, @@ -876,7 +876,7 @@ int tty_mode_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) ret = -EFAULT; return ret; case TIOCSLCKTRMIOS: - if (!capable(CAP_SYS_ADMIN)) + if (!checkpoint_restore_ns_capable(&init_user_ns)) return -EPERM; copy_termios_locked(real_tty, &kterm); if (user_termios_to_kernel_termios_1(&kterm, diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 9fd4e9ed9..f3c25467e 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -6159,7 +6159,6 @@ static void ufshcd_err_handling_prepare(struct ufs_hba *hba) ufshcd_hold(hba, false); if (!ufshcd_is_clkgating_allowed(hba)) ufshcd_setup_clocks(hba, true); - ufshcd_release(hba); pm_op = hba->is_sys_suspended ? UFS_SYSTEM_PM : UFS_RUNTIME_PM; ufshcd_vops_resume(hba, pm_op); } else { diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index ccdd525bd..2b8f98f07 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -826,7 +826,11 @@ void cdns3_gadget_giveback(struct cdns3_endpoint *priv_ep, return; } - if (request->complete) { + /* + * zlp request is appended by driver, needn't call usb_gadget_giveback_request() to notify + * gadget composite driver. + */ + if (request->complete && request->buf != priv_dev->zlp_buf) { spin_unlock(&priv_dev->lock); usb_gadget_giveback_request(&priv_ep->endpoint, request); @@ -2537,11 +2541,11 @@ static int cdns3_gadget_ep_disable(struct usb_ep *ep) while (!list_empty(&priv_ep->wa2_descmiss_req_list)) { priv_req = cdns3_next_priv_request(&priv_ep->wa2_descmiss_req_list); + list_del_init(&priv_req->list); kfree(priv_req->request.buf); cdns3_gadget_ep_free_request(&priv_ep->endpoint, &priv_req->request); - list_del_init(&priv_req->list); --priv_ep->wa2_counter; } diff --git a/drivers/usb/cdns3/core.c b/drivers/usb/cdns3/core.c index 7b20d2d5c..7242591b3 100644 --- a/drivers/usb/cdns3/core.c +++ b/drivers/usb/cdns3/core.c @@ -394,7 +394,6 @@ pm_put: return ret; } - /** * cdns_wakeup_irq - interrupt handler for wakeup events * @irq: irq number for cdns3/cdnsp core device diff --git a/drivers/usb/cdns3/drd.c b/drivers/usb/cdns3/drd.c index d00ff98df..33ba30f79 100644 --- a/drivers/usb/cdns3/drd.c +++ b/drivers/usb/cdns3/drd.c @@ -156,7 +156,8 @@ bool cdns_is_device(struct cdns *cdns) */ static void cdns_otg_disable_irq(struct cdns *cdns) { - writel(0, &cdns->otg_irq_regs->ien); + if (cdns->version) + writel(0, &cdns->otg_irq_regs->ien); } /** @@ -418,15 +419,20 @@ int cdns_drd_init(struct cdns *cdns) cdns->otg_regs = (void __iomem *)&cdns->otg_v1_regs->cmd; - if (readl(&cdns->otg_cdnsp_regs->did) == OTG_CDNSP_DID) { + state = readl(&cdns->otg_cdnsp_regs->did); + + if (OTG_CDNSP_CHECK_DID(state)) { cdns->otg_irq_regs = (struct cdns_otg_irq_regs __iomem *) &cdns->otg_cdnsp_regs->ien; cdns->version = CDNSP_CONTROLLER_V2; - } else { + } else if (OTG_CDNS3_CHECK_DID(state)) { cdns->otg_irq_regs = (struct cdns_otg_irq_regs __iomem *) &cdns->otg_v1_regs->ien; writel(1, &cdns->otg_v1_regs->simulate); cdns->version = CDNS3_CONTROLLER_V1; + } else { + dev_err(cdns->dev, "not supporte DID=0x%08x\n", state); + return -EINVAL; } dev_dbg(cdns->dev, "DRD version v1 (ID: %08x, rev: %08x)\n", @@ -479,7 +485,6 @@ int cdns_drd_exit(struct cdns *cdns) return 0; } - /* Indicate the cdns3 core was power lost before */ bool cdns_power_is_lost(struct cdns *cdns) { diff --git a/drivers/usb/cdns3/drd.h b/drivers/usb/cdns3/drd.h index cbdf94f73..d72370c32 100644 --- a/drivers/usb/cdns3/drd.h +++ b/drivers/usb/cdns3/drd.h @@ -79,7 +79,11 @@ struct cdnsp_otg_regs { __le32 susp_timing_ctrl; }; -#define OTG_CDNSP_DID 0x0004034E +/* CDNSP driver supports 0x000403xx Cadence USB controller family. */ +#define OTG_CDNSP_CHECK_DID(did) (((did) & GENMASK(31, 8)) == 0x00040300) + +/* CDNS3 driver supports 0x000402xx Cadence USB controller family. */ +#define OTG_CDNS3_CHECK_DID(did) (((did) & GENMASK(31, 8)) == 0x00040200) /* * Common registers interface for both CDNS3 and CDNSP version of DRD. diff --git a/drivers/usb/cdns3/host.c b/drivers/usb/cdns3/host.c index 6164fc4c9..ceca4d839 100644 --- a/drivers/usb/cdns3/host.c +++ b/drivers/usb/cdns3/host.c @@ -18,6 +18,11 @@ #include "../host/xhci.h" #include "../host/xhci-plat.h" +/* + * The XECP_PORT_CAP_REG and XECP_AUX_CTRL_REG1 exist only + * in Cadence USB3 dual-role controller, so it can't be used + * with Cadence CDNSP dual-role controller. + */ #define XECP_PORT_CAP_REG 0x8000 #define XECP_AUX_CTRL_REG1 0x8120 @@ -57,6 +62,8 @@ static const struct xhci_plat_priv xhci_plat_cdns3_xhci = { .resume_quirk = xhci_cdns3_resume_quirk, }; +static const struct xhci_plat_priv xhci_plat_cdnsp_xhci; + static int __cdns_host_init(struct cdns *cdns) { struct platform_device *xhci; @@ -81,8 +88,13 @@ static int __cdns_host_init(struct cdns *cdns) goto err1; } - cdns->xhci_plat_data = kmemdup(&xhci_plat_cdns3_xhci, - sizeof(struct xhci_plat_priv), GFP_KERNEL); + if (cdns->version < CDNSP_CONTROLLER_V2) + cdns->xhci_plat_data = kmemdup(&xhci_plat_cdns3_xhci, + sizeof(struct xhci_plat_priv), GFP_KERNEL); + else + cdns->xhci_plat_data = kmemdup(&xhci_plat_cdnsp_xhci, + sizeof(struct xhci_plat_priv), GFP_KERNEL); + if (!cdns->xhci_plat_data) { ret = -ENOMEM; goto err1; diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c index 38703781e..1283c427c 100644 --- a/drivers/usb/common/ulpi.c +++ b/drivers/usb/common/ulpi.c @@ -301,7 +301,7 @@ static int ulpi_register(struct device *dev, struct ulpi *ulpi) return ret; } - root = debugfs_create_dir(dev_name(dev), ulpi_root); + root = debugfs_create_dir(dev_name(&ulpi->dev), ulpi_root); debugfs_create_file("regs", 0444, root, ulpi, &ulpi_regs_fops); dev_dbg(&ulpi->dev, "registered ULPI PHY: vendor %04x, product %04x\n", diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 81c8f564c..d960a56b7 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -47,12 +47,18 @@ #define USB_VENDOR_TEXAS_INSTRUMENTS 0x0451 #define USB_PRODUCT_TUSB8041_USB3 0x8140 #define USB_PRODUCT_TUSB8041_USB2 0x8142 -#define HUB_QUIRK_CHECK_PORT_AUTOSUSPEND 0x01 -#define HUB_QUIRK_DISABLE_AUTOSUSPEND 0x02 +#define USB_VENDOR_MICROCHIP 0x0424 +#define USB_PRODUCT_USB4913 0x4913 +#define USB_PRODUCT_USB4914 0x4914 +#define USB_PRODUCT_USB4915 0x4915 +#define HUB_QUIRK_CHECK_PORT_AUTOSUSPEND BIT(0) +#define HUB_QUIRK_DISABLE_AUTOSUSPEND BIT(1) +#define HUB_QUIRK_REDUCE_FRAME_INTR_BINTERVAL BIT(2) #define USB_TP_TRANSMISSION_DELAY 40 /* ns */ #define USB_TP_TRANSMISSION_DELAY_MAX 65535 /* ns */ #define USB_PING_RESPONSE_TIME 400 /* ns */ +#define USB_REDUCE_FRAME_INTR_BINTERVAL 9 /* Protect struct usb_device->state and ->children members * Note: Both are also protected by ->dev.sem, except that ->state can @@ -1904,6 +1910,14 @@ static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id) usb_autopm_get_interface_no_resume(intf); } + if ((id->driver_info & HUB_QUIRK_REDUCE_FRAME_INTR_BINTERVAL) && + desc->endpoint[0].desc.bInterval > USB_REDUCE_FRAME_INTR_BINTERVAL) { + desc->endpoint[0].desc.bInterval = + USB_REDUCE_FRAME_INTR_BINTERVAL; + /* Tell the HCD about the interrupt ep's new bInterval */ + usb_set_interface(hdev, 0, 0); + } + if (hub_configure(hub, &desc->endpoint[0].desc) >= 0) { onboard_hub_create_pdevs(hdev, &hub->onboard_hub_devs); @@ -2375,17 +2389,25 @@ static int usb_enumerate_device_otg(struct usb_device *udev) } } else if (desc->bLength == sizeof (struct usb_otg_descriptor)) { - /* Set a_alt_hnp_support for legacy otg device */ - err = usb_control_msg(udev, - usb_sndctrlpipe(udev, 0), - USB_REQ_SET_FEATURE, 0, - USB_DEVICE_A_ALT_HNP_SUPPORT, - 0, NULL, 0, - USB_CTRL_SET_TIMEOUT); - if (err < 0) - dev_err(&udev->dev, - "set a_alt_hnp_support failed: %d\n", - err); + /* + * We are operating on a legacy OTP device + * These should be told that they are operating + * on the wrong port if we have another port that does + * support HNP + */ + if (bus->otg_port != 0) { + /* Set a_alt_hnp_support for legacy otg device */ + err = usb_control_msg(udev, + usb_sndctrlpipe(udev, 0), + USB_REQ_SET_FEATURE, 0, + USB_DEVICE_A_ALT_HNP_SUPPORT, + 0, NULL, 0, + USB_CTRL_SET_TIMEOUT); + if (err < 0) + dev_err(&udev->dev, + "set a_alt_hnp_support failed: %d\n", + err); + } } } #endif @@ -5885,6 +5907,21 @@ static const struct usb_device_id hub_id_table[] = { .idVendor = USB_VENDOR_TEXAS_INSTRUMENTS, .idProduct = USB_PRODUCT_TUSB8041_USB3, .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND}, + { .match_flags = USB_DEVICE_ID_MATCH_VENDOR + | USB_DEVICE_ID_MATCH_PRODUCT, + .idVendor = USB_VENDOR_MICROCHIP, + .idProduct = USB_PRODUCT_USB4913, + .driver_info = HUB_QUIRK_REDUCE_FRAME_INTR_BINTERVAL}, + { .match_flags = USB_DEVICE_ID_MATCH_VENDOR + | USB_DEVICE_ID_MATCH_PRODUCT, + .idVendor = USB_VENDOR_MICROCHIP, + .idProduct = USB_PRODUCT_USB4914, + .driver_info = HUB_QUIRK_REDUCE_FRAME_INTR_BINTERVAL}, + { .match_flags = USB_DEVICE_ID_MATCH_VENDOR + | USB_DEVICE_ID_MATCH_PRODUCT, + .idVendor = USB_VENDOR_MICROCHIP, + .idProduct = USB_PRODUCT_USB4915, + .driver_info = HUB_QUIRK_REDUCE_FRAME_INTR_BINTERVAL}, { .match_flags = USB_DEVICE_ID_MATCH_DEV_CLASS, .bDeviceClass = USB_CLASS_HUB}, { .match_flags = USB_DEVICE_ID_MATCH_INT_CLASS, diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index c4703f6b2..b134110cc 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2548,6 +2548,11 @@ static int dwc3_gadget_soft_disconnect(struct dwc3 *dwc) int ret; spin_lock_irqsave(&dwc->lock, flags); + if (!dwc->pullups_connected) { + spin_unlock_irqrestore(&dwc->lock, flags); + return 0; + } + dwc->connected = false; /* @@ -4583,15 +4588,13 @@ int dwc3_gadget_suspend(struct dwc3 *dwc) unsigned long flags; int ret; - if (!dwc->gadget_driver) - return 0; - ret = dwc3_gadget_soft_disconnect(dwc); if (ret) goto err; spin_lock_irqsave(&dwc->lock, flags); - dwc3_disconnect_gadget(dwc); + if (dwc->gadget_driver) + dwc3_disconnect_gadget(dwc); spin_unlock_irqrestore(&dwc->lock, flags); return 0; diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c index f6f13e7f1..f4d8e80c4 100644 --- a/drivers/usb/dwc3/host.c +++ b/drivers/usb/dwc3/host.c @@ -66,7 +66,7 @@ out: int dwc3_host_init(struct dwc3 *dwc) { - struct property_entry props[4]; + struct property_entry props[5]; struct platform_device *xhci; int ret, irq; int prop_idx = 0; @@ -94,6 +94,8 @@ int dwc3_host_init(struct dwc3 *dwc) memset(props, 0, sizeof(struct property_entry) * ARRAY_SIZE(props)); + props[prop_idx++] = PROPERTY_ENTRY_BOOL("xhci-sg-trb-cache-size-quirk"); + if (dwc->usb3_lpm_capable) props[prop_idx++] = PROPERTY_ENTRY_BOOL("usb3-lpm-capable"); diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index cb0a4e2cd..247cca46c 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -511,6 +511,19 @@ static u8 encode_bMaxPower(enum usb_device_speed speed, return min(val, 900U) / 8; } +void check_remote_wakeup_config(struct usb_gadget *g, + struct usb_configuration *c) +{ + if (USB_CONFIG_ATT_WAKEUP & c->bmAttributes) { + /* Reset the rw bit if gadget is not capable of it */ + if (!g->wakeup_capable && g->ops->set_remote_wakeup) { + WARN(c->cdev, "Clearing wakeup bit for config c.%d\n", + c->bConfigurationValue); + c->bmAttributes &= ~USB_CONFIG_ATT_WAKEUP; + } + } +} + static int config_buf(struct usb_configuration *config, enum usb_device_speed speed, void *buf, u8 type) { @@ -959,6 +972,11 @@ static int set_config(struct usb_composite_dev *cdev, power = min(power, 500U); else power = min(power, 900U); + + if (USB_CONFIG_ATT_WAKEUP & c->bmAttributes) + usb_gadget_set_remote_wakeup(gadget, 1); + else + usb_gadget_set_remote_wakeup(gadget, 0); done: if (power <= USB_SELF_POWER_VBUS_MAX_DRAW) usb_gadget_set_selfpowered(gadget); diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 4dcf29577..b94aec622 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1376,6 +1376,9 @@ static int configfs_composite_bind(struct usb_gadget *gadget, if (gadget_is_otg(gadget)) c->descriptors = otg_desc; + /* Properly configure the bmAttributes wakeup bit */ + check_remote_wakeup_config(gadget, c); + cfg = container_of(c, struct config_usb_cfg, c); if (!list_empty(&cfg->string_list)) { i = 0; diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index 7b9a4cf9b..d35f30a9c 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -544,21 +544,37 @@ static int start_transfer(struct fsg_dev *fsg, struct usb_ep *ep, static bool start_in_transfer(struct fsg_common *common, struct fsg_buffhd *bh) { + int rc; + if (!fsg_is_set(common)) return false; bh->state = BUF_STATE_SENDING; - if (start_transfer(common->fsg, common->fsg->bulk_in, bh->inreq)) + rc = start_transfer(common->fsg, common->fsg->bulk_in, bh->inreq); + if (rc) { bh->state = BUF_STATE_EMPTY; + if (rc == -ESHUTDOWN) { + common->running = 0; + return false; + } + } return true; } static bool start_out_transfer(struct fsg_common *common, struct fsg_buffhd *bh) { + int rc; + if (!fsg_is_set(common)) return false; bh->state = BUF_STATE_RECEIVING; - if (start_transfer(common->fsg, common->fsg->bulk_out, bh->outreq)) + rc = start_transfer(common->fsg, common->fsg->bulk_out, bh->outreq); + if (rc) { bh->state = BUF_STATE_FULL; + if (rc == -ESHUTDOWN) { + common->running = 0; + return false; + } + } return true; } diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index bbb6ff6b1..5e78fcc63 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -1340,7 +1340,15 @@ parse_ntb: "Parsed NTB with %d frames\n", dgram_counter); to_process -= block_len; - if (to_process != 0) { + + /* + * Windows NCM driver avoids USB ZLPs by adding a 1-byte + * zero pad as needed. + */ + if (to_process == 1 && + (*(unsigned char *)(ntb_ptr + block_len) == 0x00)) { + to_process--; + } else if (to_process > 0) { ntb_ptr = (unsigned char *)(ntb_ptr + block_len); goto parse_ntb; } diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index c40f2ecbe..0edd9e53f 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -526,6 +526,33 @@ out: EXPORT_SYMBOL_GPL(usb_gadget_wakeup); /** + * usb_gadget_set_remote_wakeup - configures the device remote wakeup feature. + * @gadget:the device being configured for remote wakeup + * @set:value to be configured. + * + * set to one to enable remote wakeup feature and zero to disable it. + * + * returns zero on success, else negative errno. + */ +int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set) +{ + int ret = 0; + + if (!gadget->ops->set_remote_wakeup) { + ret = -EOPNOTSUPP; + goto out; + } + + ret = gadget->ops->set_remote_wakeup(gadget, set); + +out: + trace_usb_gadget_set_remote_wakeup(gadget, ret); + + return ret; +} +EXPORT_SYMBOL_GPL(usb_gadget_set_remote_wakeup); + +/** * usb_gadget_set_selfpowered - sets the device selfpowered feature. * @gadget:the device being declared as self-powered * diff --git a/drivers/usb/gadget/udc/trace.h b/drivers/usb/gadget/udc/trace.h index abdbcb1ba..a5ed26fbc 100644 --- a/drivers/usb/gadget/udc/trace.h +++ b/drivers/usb/gadget/udc/trace.h @@ -91,6 +91,11 @@ DEFINE_EVENT(udc_log_gadget, usb_gadget_wakeup, TP_ARGS(g, ret) ); +DEFINE_EVENT(udc_log_gadget, usb_gadget_set_remote_wakeup, + TP_PROTO(struct usb_gadget *g, int ret), + TP_ARGS(g, ret) +); + DEFINE_EVENT(udc_log_gadget, usb_gadget_set_selfpowered, TP_PROTO(struct usb_gadget *g, int ret), TP_ARGS(g, ret) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index c9438dc56..b387d39bf 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -301,6 +301,9 @@ static int xhci_plat_probe(struct platform_device *pdev) if (device_property_read_bool(tmpdev, "quirk-broken-port-ped")) xhci->quirks |= XHCI_BROKEN_PORT_PED; + if (device_property_read_bool(tmpdev, "xhci-sg-trb-cache-size-quirk")) + xhci->quirks |= XHCI_SG_TRB_CACHE_SIZE_QUIRK; + device_property_read_u32(tmpdev, "imod-interval-ns", &xhci->imod_interval); } diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 1239e06df..239b5edee 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2363,6 +2363,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, /* handle completion code */ switch (trb_comp_code) { case COMP_SUCCESS: + /* Don't overwrite status if TD had an error, see xHCI 4.9.1 */ + if (td->error_mid_td) + break; if (remaining) { frame->status = short_framestatus; if (xhci->quirks & XHCI_TRUST_TX_LENGTH) @@ -2378,9 +2381,13 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, case COMP_BANDWIDTH_OVERRUN_ERROR: frame->status = -ECOMM; break; - case COMP_ISOCH_BUFFER_OVERRUN: case COMP_BABBLE_DETECTED_ERROR: + sum_trbs_for_length = true; + fallthrough; + case COMP_ISOCH_BUFFER_OVERRUN: frame->status = -EOVERFLOW; + if (ep_trb != td->last_trb) + td->error_mid_td = true; break; case COMP_INCOMPATIBLE_DEVICE_ERROR: case COMP_STALL_ERROR: @@ -2388,8 +2395,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, break; case COMP_USB_TRANSACTION_ERROR: frame->status = -EPROTO; + sum_trbs_for_length = true; if (ep_trb != td->last_trb) - return 0; + td->error_mid_td = true; break; case COMP_STOPPED: sum_trbs_for_length = true; @@ -2409,6 +2417,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, break; } + if (td->urb_length_set) + goto finish_td; + if (sum_trbs_for_length) frame->actual_length = sum_trb_lengths(xhci, ep->ring, ep_trb) + ep_trb_len - remaining; @@ -2417,6 +2428,14 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, td->urb->actual_length += frame->actual_length; +finish_td: + /* Don't give back TD yet if we encountered an error mid TD */ + if (td->error_mid_td && ep_trb != td->last_trb) { + xhci_dbg(xhci, "Error mid isoc TD, wait for final completion event\n"); + td->urb_length_set = true; + return 0; + } + return finish_td(xhci, ep, ep_ring, td, trb_comp_code); } @@ -2801,17 +2820,51 @@ static int handle_tx_event(struct xhci_hcd *xhci, } if (!ep_seg) { - if (!ep->skip || - !usb_endpoint_xfer_isoc(&td->urb->ep->desc)) { - /* Some host controllers give a spurious - * successful event after a short transfer. - * Ignore it. - */ - if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) && - ep_ring->last_td_was_short) { - ep_ring->last_td_was_short = false; - goto cleanup; + + if (ep->skip && usb_endpoint_xfer_isoc(&td->urb->ep->desc)) { + skip_isoc_td(xhci, td, ep, status); + goto cleanup; + } + + /* + * Some hosts give a spurious success event after a short + * transfer. Ignore it. + */ + if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) && + ep_ring->last_td_was_short) { + ep_ring->last_td_was_short = false; + goto cleanup; + } + + /* + * xhci 4.10.2 states isoc endpoints should continue + * processing the next TD if there was an error mid TD. + * So host like NEC don't generate an event for the last + * isoc TRB even if the IOC flag is set. + * xhci 4.9.1 states that if there are errors in mult-TRB + * TDs xHC should generate an error for that TRB, and if xHC + * proceeds to the next TD it should genete an event for + * any TRB with IOC flag on the way. Other host follow this. + * So this event might be for the next TD. + */ + if (td->error_mid_td && + !list_is_last(&td->td_list, &ep_ring->td_list)) { + struct xhci_td *td_next = list_next_entry(td, td_list); + + ep_seg = trb_in_td(xhci, td_next->start_seg, td_next->first_trb, + td_next->last_trb, ep_trb_dma, false); + if (ep_seg) { + /* give back previous TD, start handling new */ + xhci_dbg(xhci, "Missing TD completion event after mid TD error\n"); + ep_ring->dequeue = td->last_trb; + ep_ring->deq_seg = td->last_trb_seg; + inc_deq(xhci, ep_ring); + xhci_td_cleanup(xhci, td, ep_ring, td->status); + td = td_next; } + } + + if (!ep_seg) { /* HC is busted, give up! */ xhci_err(xhci, "ERROR Transfer event TRB DMA ptr not " @@ -2823,9 +2876,6 @@ static int handle_tx_event(struct xhci_hcd *xhci, ep_trb_dma, true); return -ESHUTDOWN; } - - skip_isoc_td(xhci, td, ep, status); - goto cleanup; } if (trb_comp_code == COMP_SHORT_PACKET) ep_ring->last_td_was_short = true; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 1354310cb..fc25a5b09 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1570,6 +1570,7 @@ struct xhci_td { struct xhci_segment *bounce_seg; /* actual_length of the URB has already been set */ bool urb_length_set; + bool error_mid_td; unsigned int num_trbs; }; diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c index 32e6d19f7..a327f8bc5 100644 --- a/drivers/usb/roles/class.c +++ b/drivers/usb/roles/class.c @@ -19,7 +19,9 @@ static struct class *role_class; struct usb_role_switch { struct device dev; struct mutex lock; /* device lock*/ + struct module *module; /* the module this device depends on */ enum usb_role role; + bool registered; /* From descriptor */ struct device *usb2_port; @@ -46,6 +48,9 @@ int usb_role_switch_set_role(struct usb_role_switch *sw, enum usb_role role) if (IS_ERR_OR_NULL(sw)) return 0; + if (!sw->registered) + return -EOPNOTSUPP; + mutex_lock(&sw->lock); ret = sw->set(sw, role); @@ -71,7 +76,7 @@ enum usb_role usb_role_switch_get_role(struct usb_role_switch *sw) { enum usb_role role; - if (IS_ERR_OR_NULL(sw)) + if (IS_ERR_OR_NULL(sw) || !sw->registered) return USB_ROLE_NONE; mutex_lock(&sw->lock); @@ -133,7 +138,7 @@ struct usb_role_switch *usb_role_switch_get(struct device *dev) usb_role_switch_match); if (!IS_ERR_OR_NULL(sw)) - WARN_ON(!try_module_get(sw->dev.parent->driver->owner)); + WARN_ON(!try_module_get(sw->module)); return sw; } @@ -155,7 +160,7 @@ struct usb_role_switch *fwnode_usb_role_switch_get(struct fwnode_handle *fwnode) sw = fwnode_connection_find_match(fwnode, "usb-role-switch", NULL, usb_role_switch_match); if (!IS_ERR_OR_NULL(sw)) - WARN_ON(!try_module_get(sw->dev.parent->driver->owner)); + WARN_ON(!try_module_get(sw->module)); return sw; } @@ -170,7 +175,7 @@ EXPORT_SYMBOL_GPL(fwnode_usb_role_switch_get); void usb_role_switch_put(struct usb_role_switch *sw) { if (!IS_ERR_OR_NULL(sw)) { - module_put(sw->dev.parent->driver->owner); + module_put(sw->module); put_device(&sw->dev); } } @@ -187,15 +192,18 @@ struct usb_role_switch * usb_role_switch_find_by_fwnode(const struct fwnode_handle *fwnode) { struct device *dev; + struct usb_role_switch *sw = NULL; if (!fwnode) return NULL; dev = class_find_device_by_fwnode(role_class, fwnode); - if (dev) - WARN_ON(!try_module_get(dev->parent->driver->owner)); + if (dev) { + sw = to_role_switch(dev); + WARN_ON(!try_module_get(sw->module)); + } - return dev ? to_role_switch(dev) : NULL; + return sw; } EXPORT_SYMBOL_GPL(usb_role_switch_find_by_fwnode); @@ -337,6 +345,7 @@ usb_role_switch_register(struct device *parent, sw->set = desc->set; sw->get = desc->get; + sw->module = parent->driver->owner; sw->dev.parent = parent; sw->dev.fwnode = desc->fwnode; sw->dev.class = role_class; @@ -351,6 +360,8 @@ usb_role_switch_register(struct device *parent, return ERR_PTR(ret); } + sw->registered = true; + /* TODO: Symlinks for the host port and the device controller. */ return sw; @@ -365,8 +376,10 @@ EXPORT_SYMBOL_GPL(usb_role_switch_register); */ void usb_role_switch_unregister(struct usb_role_switch *sw) { - if (!IS_ERR_OR_NULL(sw)) + if (!IS_ERR_OR_NULL(sw)) { + sw->registered = false; device_unregister(&sw->dev); + } } EXPORT_SYMBOL_GPL(usb_role_switch_unregister); diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index f1d7a5a86..b3e60b384 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -146,6 +146,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */ { USB_DEVICE(0x10C4, 0x8664) }, /* AC-Services CAN-IF */ { USB_DEVICE(0x10C4, 0x8665) }, /* AC-Services OBD-IF */ + { USB_DEVICE(0x10C4, 0x87ED) }, /* IMST USB-Stick for Smart Meter */ { USB_DEVICE(0x10C4, 0x8856) }, /* CEL EM357 ZigBee USB Stick - LR */ { USB_DEVICE(0x10C4, 0x8857) }, /* CEL EM357 ZigBee USB Stick */ { USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */ diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 4adef9259..c0a0cca65 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2269,6 +2269,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0111, 0xff) }, /* Fibocom FM160 (MBIM mode) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a2, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a3, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a4, 0xff), /* Fibocom FM101-GL (laptop MBIM) */ .driver_info = RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) }, /* LongSung M5710 */ diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index b1e844bf3..703a9c563 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -184,6 +184,8 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x413c, 0x81d0)}, /* Dell Wireless 5819 */ {DEVICE_SWI(0x413c, 0x81d1)}, /* Dell Wireless 5818 */ {DEVICE_SWI(0x413c, 0x81d2)}, /* Dell Wireless 5818 */ + {DEVICE_SWI(0x413c, 0x8217)}, /* Dell Wireless DW5826e */ + {DEVICE_SWI(0x413c, 0x8218)}, /* Dell Wireless DW5826e QDL */ /* Huawei devices */ {DEVICE_HWI(0x03f0, 0x581d)}, /* HP lt4112 LTE/HSPA+ Gobi 4G Modem (Huawei me906e) */ diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index c54e9805d..12cf9940e 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -180,6 +180,13 @@ static int slave_configure(struct scsi_device *sdev) sdev->use_192_bytes_for_3f = 1; /* + * Some devices report generic values until the media has been + * accessed. Force a READ(10) prior to querying device + * characteristics. + */ + sdev->read_before_ms = 1; + + /* * Some devices don't like MODE SENSE with page=0x3f, * which is the command used for checking if a device * is write-protected. Now that we tell the sd driver diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index de3836412..ed22053b3 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -879,6 +879,13 @@ static int uas_slave_configure(struct scsi_device *sdev) sdev->guess_capacity = 1; /* + * Some devices report generic values until the media has been + * accessed. Force a READ(10) prior to querying device + * characteristics. + */ + sdev->read_before_ms = 1; + + /* * Some devices don't like MODE SENSE with page=0x3f, * which is the command used for checking if a device * is write-protected. Now that we tell the sd driver diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index dc2dea376..0695ee54f 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -831,7 +831,9 @@ static void ucsi_handle_connector_change(struct work_struct *work) clear_bit(EVENT_PENDING, &con->ucsi->flags); + mutex_lock(&ucsi->ppm_lock); ret = ucsi_acknowledge_connector_change(ucsi); + mutex_unlock(&ucsi->ppm_lock); if (ret) dev_err(ucsi->dev, "%s: ACK failed (%d)", __func__, ret); diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index 217355f1f..48130d636 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -25,6 +25,8 @@ struct ucsi_acpi { unsigned long flags; guid_t guid; u64 cmd; + bool dell_quirk_probed; + bool dell_quirk_active; }; static int ucsi_acpi_dsm(struct ucsi_acpi *ua, int func) @@ -73,9 +75,13 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset, const void *val, size_t val_len) { struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); + bool ack = UCSI_COMMAND(*(u64 *)val) == UCSI_ACK_CC_CI; int ret; - set_bit(COMMAND_PENDING, &ua->flags); + if (ack) + set_bit(ACK_PENDING, &ua->flags); + else + set_bit(COMMAND_PENDING, &ua->flags); ret = ucsi_acpi_async_write(ucsi, offset, val, val_len); if (ret) @@ -85,7 +91,10 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset, ret = -ETIMEDOUT; out_clear_bit: - clear_bit(COMMAND_PENDING, &ua->flags); + if (ack) + clear_bit(ACK_PENDING, &ua->flags); + else + clear_bit(COMMAND_PENDING, &ua->flags); return ret; } @@ -119,12 +128,73 @@ static const struct ucsi_operations ucsi_zenbook_ops = { .async_write = ucsi_acpi_async_write }; -static const struct dmi_system_id zenbook_dmi_id[] = { +/* + * Some Dell laptops expect that an ACK command with the + * UCSI_ACK_CONNECTOR_CHANGE bit set is followed by a (separate) + * ACK command that only has the UCSI_ACK_COMMAND_COMPLETE bit set. + * If this is not done events are not delivered to OSPM and + * subsequent commands will timeout. + */ +static int +ucsi_dell_sync_write(struct ucsi *ucsi, unsigned int offset, + const void *val, size_t val_len) +{ + struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); + u64 cmd = *(u64 *)val, ack = 0; + int ret; + + if (UCSI_COMMAND(cmd) == UCSI_ACK_CC_CI && + cmd & UCSI_ACK_CONNECTOR_CHANGE) + ack = UCSI_ACK_CC_CI | UCSI_ACK_COMMAND_COMPLETE; + + ret = ucsi_acpi_sync_write(ucsi, offset, val, val_len); + if (ret != 0) + return ret; + if (ack == 0) + return ret; + + if (!ua->dell_quirk_probed) { + ua->dell_quirk_probed = true; + + cmd = UCSI_GET_CAPABILITY; + ret = ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &cmd, + sizeof(cmd)); + if (ret == 0) + return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, + &ack, sizeof(ack)); + if (ret != -ETIMEDOUT) + return ret; + + ua->dell_quirk_active = true; + dev_err(ua->dev, "Firmware bug: Additional ACK required after ACKing a connector change.\n"); + dev_err(ua->dev, "Firmware bug: Enabling workaround\n"); + } + + if (!ua->dell_quirk_active) + return ret; + + return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &ack, sizeof(ack)); +} + +static const struct ucsi_operations ucsi_dell_ops = { + .read = ucsi_acpi_read, + .sync_write = ucsi_dell_sync_write, + .async_write = ucsi_acpi_async_write +}; + +static const struct dmi_system_id ucsi_acpi_quirks[] = { { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX325UA_UM325UA"), }, + .driver_data = (void *)&ucsi_zenbook_ops, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + }, + .driver_data = (void *)&ucsi_dell_ops, }, { } }; @@ -142,8 +212,10 @@ static void ucsi_acpi_notify(acpi_handle handle, u32 event, void *data) if (UCSI_CCI_CONNECTOR(cci)) ucsi_connector_change(ua->ucsi, UCSI_CCI_CONNECTOR(cci)); - if (test_bit(COMMAND_PENDING, &ua->flags) && - cci & (UCSI_CCI_ACK_COMPLETE | UCSI_CCI_COMMAND_COMPLETE)) + if (cci & UCSI_CCI_ACK_COMPLETE && test_bit(ACK_PENDING, &ua->flags)) + complete(&ua->complete); + if (cci & UCSI_CCI_COMMAND_COMPLETE && + test_bit(COMMAND_PENDING, &ua->flags)) complete(&ua->complete); } @@ -151,6 +223,7 @@ static int ucsi_acpi_probe(struct platform_device *pdev) { struct acpi_device *adev = ACPI_COMPANION(&pdev->dev); const struct ucsi_operations *ops = &ucsi_acpi_ops; + const struct dmi_system_id *id; struct ucsi_acpi *ua; struct resource *res; acpi_status status; @@ -180,8 +253,9 @@ static int ucsi_acpi_probe(struct platform_device *pdev) init_completion(&ua->complete); ua->dev = &pdev->dev; - if (dmi_check_system(zenbook_dmi_id)) - ops = &ucsi_zenbook_ops; + id = dmi_first_match(ucsi_acpi_quirks); + if (id) + ops = id->driver_data; ua->ucsi = ucsi_create(&pdev->dev, ops); if (IS_ERR(ua->ucsi)) diff --git a/drivers/vfio/iova_bitmap.c b/drivers/vfio/iova_bitmap.c index 0f19d502f..dfab5b742 100644 --- a/drivers/vfio/iova_bitmap.c +++ b/drivers/vfio/iova_bitmap.c @@ -99,7 +99,7 @@ struct iova_bitmap { struct iova_bitmap_map mapped; /* userspace address of the bitmap */ - u64 __user *bitmap; + u8 __user *bitmap; /* u64 index that @mapped points to */ unsigned long mapped_base_index; @@ -161,7 +161,7 @@ static int iova_bitmap_get(struct iova_bitmap *bitmap) { struct iova_bitmap_map *mapped = &bitmap->mapped; unsigned long npages; - u64 __user *addr; + u8 __user *addr; long ret; /* @@ -175,17 +175,18 @@ static int iova_bitmap_get(struct iova_bitmap *bitmap) sizeof(*bitmap->bitmap), PAGE_SIZE); /* - * We always cap at max number of 'struct page' a base page can fit. - * This is, for example, on x86 means 2M of bitmap data max. - */ - npages = min(npages, PAGE_SIZE / sizeof(struct page *)); - - /* * Bitmap address to be pinned is calculated via pointer arithmetic * with bitmap u64 word index. */ addr = bitmap->bitmap + bitmap->mapped_base_index; + /* + * We always cap at max number of 'struct page' a base page can fit. + * This is, for example, on x86 means 2M of bitmap data max. + */ + npages = min(npages + !!offset_in_page(addr), + PAGE_SIZE / sizeof(struct page *)); + ret = pin_user_pages_fast((unsigned long)addr, npages, FOLL_WRITE, mapped->pages); if (ret <= 0) @@ -246,7 +247,7 @@ struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length, mapped = &bitmap->mapped; mapped->pgshift = __ffs(page_size); - bitmap->bitmap = data; + bitmap->bitmap = (u8 __user *)data; bitmap->mapped_total_index = iova_bitmap_offset_to_index(bitmap, length - 1) + 1; bitmap->iova = iova; @@ -301,7 +302,7 @@ static unsigned long iova_bitmap_mapped_remaining(struct iova_bitmap *bitmap) remaining = bitmap->mapped_total_index - bitmap->mapped_base_index; remaining = min_t(unsigned long, remaining, - bytes / sizeof(*bitmap->bitmap)); + DIV_ROUND_UP(bytes, sizeof(*bitmap->bitmap))); return remaining; } @@ -405,6 +406,7 @@ void iova_bitmap_set(struct iova_bitmap *bitmap, mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE; unsigned long last_bit = (((iova + length - 1) - mapped->iova) >> mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE; + unsigned long last_page_idx = mapped->npages - 1; do { unsigned int page_idx = cur_bit / BITS_PER_PAGE; @@ -413,6 +415,9 @@ void iova_bitmap_set(struct iova_bitmap *bitmap, last_bit - cur_bit + 1); void *kaddr; + if (unlikely(page_idx > last_page_idx)) + break; + kaddr = kmap_local_page(mapped->pages[page_idx]); bitmap_set(kaddr, offset, nbits); kunmap_local(kaddr); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 2eea08029..61c72e62a 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2588,12 +2588,11 @@ EXPORT_SYMBOL_GPL(vhost_disable_notify); /* Create a new message. */ struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type) { - struct vhost_msg_node *node = kmalloc(sizeof *node, GFP_KERNEL); + /* Make sure all padding within the structure is initialized. */ + struct vhost_msg_node *node = kzalloc(sizeof(*node), GFP_KERNEL); if (!node) return NULL; - /* Make sure all padding within the structure is initialized. */ - memset(&node->msg, 0, sizeof node->msg); node->vq = vq; node->msg.type = type; return node; diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index fa205be94..14498a0d1 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -2397,11 +2397,9 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, struct fbcon_ops *ops = info->fbcon_par; struct fbcon_display *p = &fb_display[vc->vc_num]; int resize, ret, old_userfont, old_width, old_height, old_charcount; - char *old_data = NULL; + u8 *old_data = vc->vc_font.data; resize = (w != vc->vc_font.width) || (h != vc->vc_font.height); - if (p->userfont) - old_data = vc->vc_font.data; vc->vc_font.data = (void *)(p->fontdata = data); old_userfont = p->userfont; if ((p->userfont = userfont)) @@ -2435,13 +2433,13 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, update_screen(vc); } - if (old_data && (--REFCOUNT(old_data) == 0)) + if (old_userfont && (--REFCOUNT(old_data) == 0)) kfree(old_data - FONT_EXTRA_WORDS * sizeof(int)); return 0; err_out: p->fontdata = old_data; - vc->vc_font.data = (void *)old_data; + vc->vc_font.data = old_data; if (userfont) { p->userfont = old_userfont; diff --git a/drivers/video/fbdev/savage/savagefb_driver.c b/drivers/video/fbdev/savage/savagefb_driver.c index b7818b652..a7b63c475 100644 --- a/drivers/video/fbdev/savage/savagefb_driver.c +++ b/drivers/video/fbdev/savage/savagefb_driver.c @@ -869,6 +869,9 @@ static int savagefb_check_var(struct fb_var_screeninfo *var, DBG("savagefb_check_var"); + if (!var->pixclock) + return -EINVAL; + var->transp.offset = 0; var->transp.length = 0; switch (var->bits_per_pixel) { diff --git a/drivers/video/fbdev/sis/sis_main.c b/drivers/video/fbdev/sis/sis_main.c index 1c197c3f9..fe8996461 100644 --- a/drivers/video/fbdev/sis/sis_main.c +++ b/drivers/video/fbdev/sis/sis_main.c @@ -1475,6 +1475,8 @@ sisfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) vtotal = var->upper_margin + var->lower_margin + var->vsync_len; + if (!var->pixclock) + return -EINVAL; pixclock = var->pixclock; if((var->vmode & FB_VMODE_MASK) == FB_VMODE_NONINTERLACED) { diff --git a/drivers/watchdog/it87_wdt.c b/drivers/watchdog/it87_wdt.c index bb1122909..843f9f8e3 100644 --- a/drivers/watchdog/it87_wdt.c +++ b/drivers/watchdog/it87_wdt.c @@ -255,6 +255,7 @@ static struct watchdog_device wdt_dev = { static int __init it87_wdt_init(void) { u8 chip_rev; + u8 ctrl; int rc; rc = superio_enter(); @@ -313,7 +314,18 @@ static int __init it87_wdt_init(void) superio_select(GPIO); superio_outb(WDT_TOV1, WDTCFG); - superio_outb(0x00, WDTCTRL); + + switch (chip_type) { + case IT8784_ID: + case IT8786_ID: + ctrl = superio_inb(WDTCTRL); + ctrl &= 0x08; + superio_outb(ctrl, WDTCTRL); + break; + default: + superio_outb(0x00, WDTCTRL); + } + superio_exit(); if (timeout < 1 || timeout > max_units * 60) { diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 00f8e3499..96b96516c 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -937,8 +937,8 @@ static void shutdown_pirq(struct irq_data *data) return; do_mask(info, EVT_MASK_REASON_EXPLICIT); - xen_evtchn_close(evtchn); xen_irq_info_cleanup(info); + xen_evtchn_close(evtchn); } static void enable_pirq(struct irq_data *data) @@ -982,8 +982,6 @@ static void __unbind_from_irq(unsigned int irq) unsigned int cpu = cpu_from_irq(irq); struct xenbus_device *dev; - xen_evtchn_close(evtchn); - switch (type_from_irq(irq)) { case IRQT_VIRQ: per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1; @@ -1001,6 +999,7 @@ static void __unbind_from_irq(unsigned int irq) } xen_irq_info_cleanup(info); + xen_evtchn_close(evtchn); } xen_free_irq(irq); diff --git a/drivers/xen/gntdev-dmabuf.c b/drivers/xen/gntdev-dmabuf.c index 940e5e9e8..335451309 100644 --- a/drivers/xen/gntdev-dmabuf.c +++ b/drivers/xen/gntdev-dmabuf.c @@ -11,6 +11,7 @@ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/dma-buf.h> +#include <linux/dma-direct.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/uaccess.h> @@ -50,7 +51,7 @@ struct gntdev_dmabuf { /* Number of pages this buffer has. */ int nr_pages; - /* Pages of this buffer. */ + /* Pages of this buffer (only for dma-buf export). */ struct page **pages; }; @@ -484,7 +485,7 @@ out: /* DMA buffer import support. */ static int -dmabuf_imp_grant_foreign_access(struct page **pages, u32 *refs, +dmabuf_imp_grant_foreign_access(unsigned long *gfns, u32 *refs, int count, int domid) { grant_ref_t priv_gref_head; @@ -507,7 +508,7 @@ dmabuf_imp_grant_foreign_access(struct page **pages, u32 *refs, } gnttab_grant_foreign_access_ref(cur_ref, domid, - xen_page_to_gfn(pages[i]), 0); + gfns[i], 0); refs[i] = cur_ref; } @@ -529,7 +530,6 @@ static void dmabuf_imp_end_foreign_access(u32 *refs, int count) static void dmabuf_imp_free_storage(struct gntdev_dmabuf *gntdev_dmabuf) { - kfree(gntdev_dmabuf->pages); kfree(gntdev_dmabuf->u.imp.refs); kfree(gntdev_dmabuf); } @@ -549,12 +549,6 @@ static struct gntdev_dmabuf *dmabuf_imp_alloc_storage(int count) if (!gntdev_dmabuf->u.imp.refs) goto fail; - gntdev_dmabuf->pages = kcalloc(count, - sizeof(gntdev_dmabuf->pages[0]), - GFP_KERNEL); - if (!gntdev_dmabuf->pages) - goto fail; - gntdev_dmabuf->nr_pages = count; for (i = 0; i < count; i++) @@ -576,7 +570,8 @@ dmabuf_imp_to_refs(struct gntdev_dmabuf_priv *priv, struct device *dev, struct dma_buf *dma_buf; struct dma_buf_attachment *attach; struct sg_table *sgt; - struct sg_page_iter sg_iter; + struct sg_dma_page_iter sg_iter; + unsigned long *gfns; int i; dma_buf = dma_buf_get(fd); @@ -624,26 +619,31 @@ dmabuf_imp_to_refs(struct gntdev_dmabuf_priv *priv, struct device *dev, gntdev_dmabuf->u.imp.sgt = sgt; - /* Now convert sgt to array of pages and check for page validity. */ + gfns = kcalloc(count, sizeof(*gfns), GFP_KERNEL); + if (!gfns) { + ret = ERR_PTR(-ENOMEM); + goto fail_unmap; + } + + /* + * Now convert sgt to array of gfns without accessing underlying pages. + * It is not allowed to access the underlying struct page of an sg table + * exported by DMA-buf, but since we deal with special Xen dma device here + * (not a normal physical one) look at the dma addresses in the sg table + * and then calculate gfns directly from them. + */ i = 0; - for_each_sgtable_page(sgt, &sg_iter, 0) { - struct page *page = sg_page_iter_page(&sg_iter); - /* - * Check if page is valid: this can happen if we are given - * a page from VRAM or other resources which are not backed - * by a struct page. - */ - if (!pfn_valid(page_to_pfn(page))) { - ret = ERR_PTR(-EINVAL); - goto fail_unmap; - } + for_each_sgtable_dma_page(sgt, &sg_iter, 0) { + dma_addr_t addr = sg_page_iter_dma_address(&sg_iter); + unsigned long pfn = bfn_to_pfn(XEN_PFN_DOWN(dma_to_phys(dev, addr))); - gntdev_dmabuf->pages[i++] = page; + gfns[i++] = pfn_to_gfn(pfn); } - ret = ERR_PTR(dmabuf_imp_grant_foreign_access(gntdev_dmabuf->pages, + ret = ERR_PTR(dmabuf_imp_grant_foreign_access(gfns, gntdev_dmabuf->u.imp.refs, count, domid)); + kfree(gfns); if (IS_ERR(ret)) goto fail_end_access; diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index bc417da7e..633fe4f52 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -46,6 +46,7 @@ struct inode *v9fs_alloc_inode(struct super_block *sb); void v9fs_free_inode(struct inode *inode); struct inode *v9fs_get_inode(struct super_block *sb, umode_t mode, dev_t rdev); +void v9fs_set_netfs_context(struct inode *inode); int v9fs_init_inode(struct v9fs_session_info *v9ses, struct inode *inode, umode_t mode, dev_t rdev); void v9fs_evict_inode(struct inode *inode); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 4d1a4a8d9..5e2657c1d 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -250,7 +250,7 @@ void v9fs_free_inode(struct inode *inode) /* * Set parameters for the netfs library */ -static void v9fs_set_netfs_context(struct inode *inode) +void v9fs_set_netfs_context(struct inode *inode) { struct v9fs_inode *v9inode = V9FS_I(inode); netfs_inode_init(&v9inode->netfs, &v9fs_req_ops); @@ -344,8 +344,6 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses, err = -EINVAL; goto error; } - - v9fs_set_netfs_context(inode); error: return err; @@ -377,6 +375,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, umode_t mode, dev_t rdev) iput(inode); return ERR_PTR(err); } + v9fs_set_netfs_context(inode); return inode; } @@ -479,6 +478,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb, goto error; v9fs_stat2inode(st, inode, sb, 0); + v9fs_set_netfs_context(inode); v9fs_cache_inode_get_cookie(inode); unlock_new_inode(inode); return inode; diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 5cfa4b4f0..e15ad4683 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -130,6 +130,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb, goto error; v9fs_stat2inode_dotl(st, inode, 0); + v9fs_set_netfs_context(inode); v9fs_cache_inode_get_cookie(inode); retval = v9fs_get_acl(inode, fid); if (retval) diff --git a/fs/afs/callback.c b/fs/afs/callback.c index a484fa642..90f9b2a46 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -110,13 +110,14 @@ static struct afs_volume *afs_lookup_volume_rcu(struct afs_cell *cell, { struct afs_volume *volume = NULL; struct rb_node *p; - int seq = 0; + int seq = 1; do { /* Unfortunately, rbtree walking doesn't give reliable results * under just the RCU read lock, so we have to check for * changes. */ + seq++; /* 2 on the 1st/lockless path, otherwise odd */ read_seqbegin_or_lock(&cell->volume_lock, &seq); p = rcu_dereference_raw(cell->volumes.rb_node); diff --git a/fs/afs/dir.c b/fs/afs/dir.c index cf811b77e..6e2c967fa 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -478,8 +478,10 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode, dire->u.name[0] == '.' && ctx->actor != afs_lookup_filldir && ctx->actor != afs_lookup_one_filldir && - memcmp(dire->u.name, ".__afs", 6) == 0) + memcmp(dire->u.name, ".__afs", 6) == 0) { + ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent); continue; + } /* found the next entry */ if (!dir_emit(ctx, dire->u.name, nlen, diff --git a/fs/afs/server.c b/fs/afs/server.c index b5237206e..0bd2f5ba6 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -27,7 +27,7 @@ struct afs_server *afs_find_server(struct afs_net *net, const struct afs_addr_list *alist; struct afs_server *server = NULL; unsigned int i; - int seq = 0, diff; + int seq = 1, diff; rcu_read_lock(); @@ -35,6 +35,7 @@ struct afs_server *afs_find_server(struct afs_net *net, if (server) afs_unuse_server_notime(net, server, afs_server_trace_put_find_rsq); server = NULL; + seq++; /* 2 on the 1st/lockless path, otherwise odd */ read_seqbegin_or_lock(&net->fs_addr_lock, &seq); if (srx->transport.family == AF_INET6) { @@ -90,7 +91,7 @@ struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uu { struct afs_server *server = NULL; struct rb_node *p; - int diff, seq = 0; + int diff, seq = 1; _enter("%pU", uuid); @@ -102,7 +103,7 @@ struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uu if (server) afs_unuse_server(net, server, afs_server_trace_put_uuid_rsq); server = NULL; - + seq++; /* 2 on the 1st/lockless path, otherwise odd */ read_seqbegin_or_lock(&net->fs_lock, &seq); p = net->fs_servers.rb_node; diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 1c9144e3e..a146d70ef 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -341,7 +341,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key) { struct afs_server_list *new, *old, *discard; struct afs_vldb_entry *vldb; - char idbuf[16]; + char idbuf[24]; int ret, idsz; _enter(""); @@ -349,7 +349,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key) /* We look up an ID by passing it as a decimal string in the * operation's name parameter. */ - idsz = sprintf(idbuf, "%llu", volume->vid); + idsz = snprintf(idbuf, sizeof(idbuf), "%llu", volume->vid); vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz); if (IS_ERR(vldb)) { @@ -595,6 +595,13 @@ void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel) struct kioctx *ctx = req->ki_ctx; unsigned long flags; + /* + * kiocb didn't come from aio or is neither a read nor a write, hence + * ignore it. + */ + if (!(iocb->ki_flags & IOCB_AIO_RW)) + return; + if (WARN_ON_ONCE(!list_empty(&req->ki_list))) return; @@ -1476,7 +1483,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb) req->ki_complete = aio_complete_rw; req->private = NULL; req->ki_pos = iocb->aio_offset; - req->ki_flags = req->ki_filp->f_iocb_flags; + req->ki_flags = req->ki_filp->f_iocb_flags | IOCB_AIO_RW; if (iocb->aio_flags & IOCB_FLAG_RESFD) req->ki_flags |= IOCB_EVENTFD; if (iocb->aio_flags & IOCB_FLAG_IOPRIO) { diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 08017b180..9f77565bd 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1318,6 +1318,7 @@ out: */ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) { + LIST_HEAD(retry_list); struct btrfs_block_group *block_group; struct btrfs_space_info *space_info; struct btrfs_trans_handle *trans; @@ -1339,6 +1340,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) spin_lock(&fs_info->unused_bgs_lock); while (!list_empty(&fs_info->unused_bgs)) { + u64 used; int trimming; block_group = list_first_entry(&fs_info->unused_bgs, @@ -1374,9 +1376,9 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) goto next; } + spin_lock(&space_info->lock); spin_lock(&block_group->lock); - if (block_group->reserved || block_group->pinned || - block_group->used || block_group->ro || + if (btrfs_is_block_group_used(block_group) || block_group->ro || list_is_singular(&block_group->list)) { /* * We want to bail if we made new allocations or have @@ -1386,10 +1388,49 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) */ trace_btrfs_skip_unused_block_group(block_group); spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); + up_write(&space_info->groups_sem); + goto next; + } + + /* + * The block group may be unused but there may be space reserved + * accounting with the existence of that block group, that is, + * space_info->bytes_may_use was incremented by a task but no + * space was yet allocated from the block group by the task. + * That space may or may not be allocated, as we are generally + * pessimistic about space reservation for metadata as well as + * for data when using compression (as we reserve space based on + * the worst case, when data can't be compressed, and before + * actually attempting compression, before starting writeback). + * + * So check if the total space of the space_info minus the size + * of this block group is less than the used space of the + * space_info - if that's the case, then it means we have tasks + * that might be relying on the block group in order to allocate + * extents, and add back the block group to the unused list when + * we finish, so that we retry later in case no tasks ended up + * needing to allocate extents from the block group. + */ + used = btrfs_space_info_used(space_info, true); + if (space_info->total_bytes - block_group->length < used) { + /* + * Add a reference for the list, compensate for the ref + * drop under the "next" label for the + * fs_info->unused_bgs list. + */ + btrfs_get_block_group(block_group); + list_add_tail(&block_group->bg_list, &retry_list); + + trace_btrfs_skip_unused_block_group(block_group); + spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); up_write(&space_info->groups_sem); goto next; } + spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); /* We don't want to force the issue, only flip if it's ok. */ ret = inc_block_group_ro(block_group, 0); @@ -1513,12 +1554,16 @@ next: btrfs_put_block_group(block_group); spin_lock(&fs_info->unused_bgs_lock); } + list_splice_tail(&retry_list, &fs_info->unused_bgs); spin_unlock(&fs_info->unused_bgs_lock); mutex_unlock(&fs_info->reclaim_bgs_lock); return; flip_async: btrfs_end_transaction(trans); + spin_lock(&fs_info->unused_bgs_lock); + list_splice_tail(&retry_list, &fs_info->unused_bgs); + spin_unlock(&fs_info->unused_bgs_lock); mutex_unlock(&fs_info->reclaim_bgs_lock); btrfs_put_block_group(block_group); btrfs_discard_punt_unused_bgs_list(fs_info); diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 47a2dcbfe..bace40a00 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -241,6 +241,13 @@ static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group) return (block_group->start + block_group->length); } +static inline bool btrfs_is_block_group_used(const struct btrfs_block_group *bg) +{ + lockdep_assert_held(&bg->lock); + + return (bg->used > 0 || bg->reserved > 0 || bg->pinned > 0); +} + static inline bool btrfs_is_block_group_data_only( struct btrfs_block_group *block_group) { diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c index f2bc5563c..63b7fa706 100644 --- a/fs/btrfs/delalloc-space.c +++ b/fs/btrfs/delalloc-space.c @@ -243,7 +243,6 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *block_rsv = &inode->block_rsv; u64 reserve_size = 0; u64 qgroup_rsv_size = 0; - u64 csum_leaves; unsigned outstanding_extents; lockdep_assert_held(&inode->lock); @@ -258,10 +257,12 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, outstanding_extents); reserve_size += btrfs_calc_metadata_size(fs_info, 1); } - csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, - inode->csum_bytes); - reserve_size += btrfs_calc_insert_metadata_size(fs_info, - csum_leaves); + if (!(inode->flags & BTRFS_INODE_NODATASUM)) { + u64 csum_leaves; + + csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, inode->csum_bytes); + reserve_size += btrfs_calc_insert_metadata_size(fs_info, csum_leaves); + } /* * For qgroup rsv, the calculation is very simple: * account one nodesize for each outstanding extent @@ -276,14 +277,20 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, spin_unlock(&block_rsv->lock); } -static void calc_inode_reservations(struct btrfs_fs_info *fs_info, +static void calc_inode_reservations(struct btrfs_inode *inode, u64 num_bytes, u64 disk_num_bytes, u64 *meta_reserve, u64 *qgroup_reserve) { + struct btrfs_fs_info *fs_info = inode->root->fs_info; u64 nr_extents = count_max_extents(fs_info, num_bytes); - u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes); + u64 csum_leaves; u64 inode_update = btrfs_calc_metadata_size(fs_info, 1); + if (inode->flags & BTRFS_INODE_NODATASUM) + csum_leaves = 0; + else + csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes); + *meta_reserve = btrfs_calc_insert_metadata_size(fs_info, nr_extents + csum_leaves); @@ -335,7 +342,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes, * everything out and try again, which is bad. This way we just * over-reserve slightly, and clean up the mess when we are done. */ - calc_inode_reservations(fs_info, num_bytes, disk_num_bytes, + calc_inode_reservations(inode, num_bytes, disk_num_bytes, &meta_reserve, &qgroup_reserve); ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true, noflush); @@ -356,7 +363,8 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes, spin_lock(&inode->lock); nr_extents = count_max_extents(fs_info, num_bytes); btrfs_mod_outstanding_extents(inode, nr_extents); - inode->csum_bytes += disk_num_bytes; + if (!(inode->flags & BTRFS_INODE_NODATASUM)) + inode->csum_bytes += disk_num_bytes; btrfs_calculate_inode_block_rsv_size(fs_info, inode); spin_unlock(&inode->lock); @@ -390,7 +398,8 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes, num_bytes = ALIGN(num_bytes, fs_info->sectorsize); spin_lock(&inode->lock); - inode->csum_bytes -= num_bytes; + if (!(inode->flags & BTRFS_INODE_NODATASUM)) + inode->csum_bytes -= num_bytes; btrfs_calculate_inode_block_rsv_size(fs_info, inode); spin_unlock(&inode->lock); diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 61e58066b..9c856a73d 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -740,6 +740,23 @@ leave: return ret; } +static int btrfs_check_replace_dev_names(struct btrfs_ioctl_dev_replace_args *args) +{ + if (args->start.srcdevid == 0) { + if (memchr(args->start.srcdev_name, 0, + sizeof(args->start.srcdev_name)) == NULL) + return -ENAMETOOLONG; + } else { + args->start.srcdev_name[0] = 0; + } + + if (memchr(args->start.tgtdev_name, 0, + sizeof(args->start.tgtdev_name)) == NULL) + return -ENAMETOOLONG; + + return 0; +} + int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info, struct btrfs_ioctl_dev_replace_args *args) { @@ -752,10 +769,9 @@ int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info, default: return -EINVAL; } - - if ((args->start.srcdevid == 0 && args->start.srcdev_name[0] == '\0') || - args->start.tgtdev_name[0] == '\0') - return -EINVAL; + ret = btrfs_check_replace_dev_names(args); + if (ret < 0) + return ret; ret = btrfs_dev_replace_start(fs_info, args->start.tgtdev_name, args->start.srcdevid, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 40152458e..5756edb37 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1643,12 +1643,12 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info) * * @objectid: root id * @anon_dev: preallocated anonymous block device number for new roots, - * pass 0 for new allocation. + * pass NULL for a new allocation. * @check_ref: whether to check root item references, If true, return -ENOENT * for orphan roots */ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, - u64 objectid, dev_t anon_dev, + u64 objectid, dev_t *anon_dev, bool check_ref) { struct btrfs_root *root; @@ -1662,8 +1662,17 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, again: root = btrfs_lookup_fs_root(fs_info, objectid); if (root) { - /* Shouldn't get preallocated anon_dev for cached roots */ - ASSERT(!anon_dev); + /* + * Some other caller may have read out the newly inserted + * subvolume already (for things like backref walk etc). Not + * that common but still possible. In that case, we just need + * to free the anon_dev. + */ + if (unlikely(anon_dev && *anon_dev)) { + free_anon_bdev(*anon_dev); + *anon_dev = 0; + } + if (check_ref && btrfs_root_refs(&root->root_item) == 0) { btrfs_put_root(root); return ERR_PTR(-ENOENT); @@ -1683,7 +1692,7 @@ again: goto fail; } - ret = btrfs_init_fs_root(root, anon_dev); + ret = btrfs_init_fs_root(root, anon_dev ? *anon_dev : 0); if (ret) goto fail; @@ -1719,7 +1728,7 @@ fail: * root's anon_dev to 0 to avoid a double free, once by btrfs_put_root() * and once again by our caller. */ - if (anon_dev) + if (anon_dev && *anon_dev) root->anon_dev = 0; btrfs_put_root(root); return ERR_PTR(ret); @@ -1735,7 +1744,7 @@ fail: struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, u64 objectid, bool check_ref) { - return btrfs_get_root_ref(fs_info, objectid, 0, check_ref); + return btrfs_get_root_ref(fs_info, objectid, NULL, check_ref); } /* @@ -1743,11 +1752,11 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, * the anonymous block device id * * @objectid: tree objectid - * @anon_dev: if zero, allocate a new anonymous block device or use the - * parameter value + * @anon_dev: if NULL, allocate a new anonymous block device or use the + * parameter value if not NULL */ struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info, - u64 objectid, dev_t anon_dev) + u64 objectid, dev_t *anon_dev) { return btrfs_get_root_ref(fs_info, objectid, anon_dev, true); } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 7322af63c..24bddca86 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -65,7 +65,7 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info); struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, u64 objectid, bool check_ref); struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info, - u64 objectid, dev_t anon_dev); + u64 objectid, dev_t *anon_dev); struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 objectid); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 82f92b565..f7f4bcc09 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3364,8 +3364,23 @@ out: unwritten_start += logical_len; clear_extent_uptodate(io_tree, unwritten_start, end, NULL); - /* Drop extent maps for the part of the extent we didn't write. */ - btrfs_drop_extent_map_range(inode, unwritten_start, end, false); + /* + * Drop extent maps for the part of the extent we didn't write. + * + * We have an exception here for the free_space_inode, this is + * because when we do btrfs_get_extent() on the free space inode + * we will search the commit root. If this is a new block group + * we won't find anything, and we will trip over the assert in + * writepage where we do ASSERT(em->block_start != + * EXTENT_MAP_HOLE). + * + * Theoretically we could also skip this for any NOCOW extent as + * we don't mess with the extent map tree in the NOCOW case, but + * for now simply skip this if we are the free space inode. + */ + if (!btrfs_is_free_space_inode(inode)) + btrfs_drop_extent_map_range(inode, unwritten_start, + end, false); /* * If the ordered extent had an IOERR or something else went @@ -10774,6 +10789,13 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from, if (encoded->encryption != BTRFS_ENCODED_IO_ENCRYPTION_NONE) return -EINVAL; + /* + * Compressed extents should always have checksums, so error out if we + * have a NOCOW file or inode was created while mounted with NODATASUM. + */ + if (inode->flags & BTRFS_INODE_NODATASUM) + return -EINVAL; + orig_count = iov_iter_count(from); /* The extent size must be sane. */ diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8516c70b5..64b37afb7 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -708,7 +708,7 @@ static noinline int create_subvol(struct user_namespace *mnt_userns, free_extent_buffer(leaf); leaf = NULL; - new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev); + new_root = btrfs_get_new_fs_root(fs_info, objectid, &anon_dev); if (IS_ERR(new_root)) { ret = PTR_ERR(new_root); btrfs_abort_transaction(trans, ret); @@ -4695,6 +4695,11 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) goto out; } + if (sa->create && is_fstree(sa->qgroupid)) { + ret = -EINVAL; + goto out; + } + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { ret = PTR_ERR(trans); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 96ec9ccc2..b3472bf6b 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1635,6 +1635,15 @@ out: return ret; } +static bool qgroup_has_usage(struct btrfs_qgroup *qgroup) +{ + return (qgroup->rfer > 0 || qgroup->rfer_cmpr > 0 || + qgroup->excl > 0 || qgroup->excl_cmpr > 0 || + qgroup->rsv.values[BTRFS_QGROUP_RSV_DATA] > 0 || + qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC] > 0 || + qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS] > 0); +} + int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) { struct btrfs_fs_info *fs_info = trans->fs_info; @@ -1654,6 +1663,11 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) goto out; } + if (is_fstree(qgroupid) && qgroup_has_usage(qgroup)) { + ret = -EBUSY; + goto out; + } + /* Check if there are no children of this qgroup */ if (!list_empty(&qgroup->members)) { ret = -EBUSY; diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 4a4d65b5e..9f7ffd9ef 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -6462,11 +6462,20 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) if (ret) goto out; } - if (sctx->cur_inode_last_extent < - sctx->cur_inode_size) { - ret = send_hole(sctx, sctx->cur_inode_size); - if (ret) + if (sctx->cur_inode_last_extent < sctx->cur_inode_size) { + ret = range_is_hole_in_parent(sctx, + sctx->cur_inode_last_extent, + sctx->cur_inode_size); + if (ret < 0) { goto out; + } else if (ret == 0) { + ret = send_hole(sctx, sctx->cur_inode_size); + if (ret < 0) + goto out; + } else { + /* Range is already a hole, skip. */ + ret = 0; + } } } if (need_truncate) { @@ -7852,7 +7861,7 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg) } if (arg->flags & ~BTRFS_SEND_FLAG_MASK) { - ret = -EINVAL; + ret = -EOPNOTSUPP; goto out; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 60db4c3b8..b172091f4 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1809,7 +1809,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, } key.offset = (u64)-1; - pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev); + pending->snap = btrfs_get_new_fs_root(fs_info, objectid, &pending->anon_dev); if (IS_ERR(pending->snap)) { ret = PTR_ERR(pending->snap); pending->snap = NULL; diff --git a/fs/cachefiles/cache.c b/fs/cachefiles/cache.c index 7077f72e6..f449f7340 100644 --- a/fs/cachefiles/cache.c +++ b/fs/cachefiles/cache.c @@ -168,6 +168,8 @@ error_unsupported: dput(root); error_open_root: cachefiles_end_secure(cache, saved_cred); + put_cred(cache->cache_cred); + cache->cache_cred = NULL; error_getsec: fscache_relinquish_cache(cache_cookie); cache->cache = NULL; diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index aa4efcabb..5f4df9588 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c @@ -805,6 +805,7 @@ static void cachefiles_daemon_unbind(struct cachefiles_cache *cache) cachefiles_put_directory(cache->graveyard); cachefiles_put_directory(cache->store); mntput(cache->mnt); + put_cred(cache->cache_cred); kfree(cache->rootdirname); kfree(cache->secctx); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 36052a362..57603782e 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1391,7 +1391,7 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap, if (flushing & CEPH_CAP_XATTR_EXCL) { arg->old_xattr_buf = __ceph_build_xattrs_blob(ci); arg->xattr_version = ci->i_xattrs.version; - arg->xattr_buf = ci->i_xattrs.blob; + arg->xattr_buf = ceph_buffer_get(ci->i_xattrs.blob); } else { arg->xattr_buf = NULL; arg->old_xattr_buf = NULL; @@ -1457,6 +1457,7 @@ static void __send_cap(struct cap_msg_args *arg, struct ceph_inode_info *ci) encode_cap_msg(msg, arg); ceph_con_send(&arg->session->s_con, msg); ceph_buffer_put(arg->old_xattr_buf); + ceph_buffer_put(arg->xattr_buf); if (arg->wake) wake_up_all(&ci->i_cap_wq); } @@ -4597,12 +4598,14 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry, struct inode *dir, int mds, int drop, int unless) { - struct dentry *parent = NULL; struct ceph_mds_request_release *rel = *p; struct ceph_dentry_info *di = ceph_dentry(dentry); int force = 0; int ret; + /* This shouldn't happen */ + BUG_ON(!dir); + /* * force an record for the directory caps if we have a dentry lease. * this is racy (can't take i_ceph_lock and d_lock together), but it @@ -4612,14 +4615,9 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry, spin_lock(&dentry->d_lock); if (di->lease_session && di->lease_session->s_mds == mds) force = 1; - if (!dir) { - parent = dget(dentry->d_parent); - dir = d_inode(parent); - } spin_unlock(&dentry->d_lock); ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force); - dput(parent); spin_lock(&dentry->d_lock); if (ret && di->lease_session && di->lease_session->s_mds == mds) { diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 82874be94..da9fcf48a 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3650,11 +3650,11 @@ static void handle_session(struct ceph_mds_session *session, if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) pr_info("mds%d reconnect success\n", session->s_mds); + session->s_features = features; if (session->s_state == CEPH_MDS_SESSION_OPEN) { pr_notice("mds%d is already opened\n", session->s_mds); } else { session->s_state = CEPH_MDS_SESSION_OPEN; - session->s_features = features; renewed_caps(mdsc, session, 0); if (test_bit(CEPHFS_FEATURE_METRIC_COLLECT, &session->s_features)) diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 3fbabc98e..4a089d70e 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -379,10 +379,11 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2) ceph_decode_skip_8(p, end, bad_ext); /* required_client_features */ ceph_decode_skip_set(p, end, 64, bad_ext); + /* bal_rank_mask */ + ceph_decode_skip_string(p, end, bad_ext); + } + if (mdsmap_ev >= 18) { ceph_decode_64_safe(p, end, m->m_max_xattr_size, bad_ext); - } else { - /* This forces the usage of the (sync) SETXATTR Op */ - m->m_max_xattr_size = 0; } bad_ext: dout("mdsmap_decode m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n", diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index f7fcf7f08..ca4932e6f 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -194,10 +194,10 @@ void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc) } /* - * This function walks through the snaprealm for an inode and returns the - * ceph_snap_realm for the first snaprealm that has quotas set (max_files, + * This function walks through the snaprealm for an inode and set the + * realmp with the first snaprealm that has quotas set (max_files, * max_bytes, or any, depending on the 'which_quota' argument). If the root is - * reached, return the root ceph_snap_realm instead. + * reached, set the realmp with the root ceph_snap_realm instead. * * Note that the caller is responsible for calling ceph_put_snap_realm() on the * returned realm. @@ -208,18 +208,19 @@ void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc) * this function will return -EAGAIN; otherwise, the snaprealms walk-through * will be restarted. */ -static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, - struct inode *inode, - enum quota_get_realm which_quota, - bool retry) +static int get_quota_realm(struct ceph_mds_client *mdsc, struct inode *inode, + enum quota_get_realm which_quota, + struct ceph_snap_realm **realmp, bool retry) { struct ceph_inode_info *ci = NULL; struct ceph_snap_realm *realm, *next; struct inode *in; bool has_quota; + if (realmp) + *realmp = NULL; if (ceph_snap(inode) != CEPH_NOSNAP) - return NULL; + return 0; restart: realm = ceph_inode(inode)->i_snap_realm; @@ -245,7 +246,7 @@ restart: break; ceph_put_snap_realm(mdsc, realm); if (!retry) - return ERR_PTR(-EAGAIN); + return -EAGAIN; goto restart; } @@ -254,8 +255,11 @@ restart: iput(in); next = realm->parent; - if (has_quota || !next) - return realm; + if (has_quota || !next) { + if (realmp) + *realmp = realm; + return 0; + } ceph_get_snap_realm(mdsc, next); ceph_put_snap_realm(mdsc, realm); @@ -264,7 +268,7 @@ restart: if (realm) ceph_put_snap_realm(mdsc, realm); - return NULL; + return 0; } bool ceph_quota_is_same_realm(struct inode *old, struct inode *new) @@ -272,6 +276,7 @@ bool ceph_quota_is_same_realm(struct inode *old, struct inode *new) struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old->i_sb); struct ceph_snap_realm *old_realm, *new_realm; bool is_same; + int ret; restart: /* @@ -281,9 +286,9 @@ restart: * dropped and we can then restart the whole operation. */ down_read(&mdsc->snap_rwsem); - old_realm = get_quota_realm(mdsc, old, QUOTA_GET_ANY, true); - new_realm = get_quota_realm(mdsc, new, QUOTA_GET_ANY, false); - if (PTR_ERR(new_realm) == -EAGAIN) { + get_quota_realm(mdsc, old, QUOTA_GET_ANY, &old_realm, true); + ret = get_quota_realm(mdsc, new, QUOTA_GET_ANY, &new_realm, false); + if (ret == -EAGAIN) { up_read(&mdsc->snap_rwsem); if (old_realm) ceph_put_snap_realm(mdsc, old_realm); @@ -485,8 +490,8 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf) bool is_updated = false; down_read(&mdsc->snap_rwsem); - realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root), - QUOTA_GET_MAX_BYTES, true); + get_quota_realm(mdsc, d_inode(fsc->sb->s_root), QUOTA_GET_MAX_BYTES, + &realm, true); up_read(&mdsc->snap_rwsem); if (!realm) return false; diff --git a/fs/dcache.c b/fs/dcache.c index 52e6d5fda..b09bc88db 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -787,12 +787,12 @@ static inline bool fast_dput(struct dentry *dentry) */ if (unlikely(ret < 0)) { spin_lock(&dentry->d_lock); - if (dentry->d_lockref.count > 1) { - dentry->d_lockref.count--; + if (WARN_ON_ONCE(dentry->d_lockref.count <= 0)) { spin_unlock(&dentry->d_lock); return true; } - return false; + dentry->d_lockref.count--; + goto locked; } /* @@ -850,6 +850,7 @@ static inline bool fast_dput(struct dentry *dentry) * else could have killed it and marked it dead. Either way, we * don't need to do anything else. */ +locked: if (dentry->d_lockref.count) { spin_unlock(&dentry->d_lock); return true; diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 72f34f96d..2c797eb51 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -174,7 +174,7 @@ static LIST_HEAD(dlm_node_addrs); static DEFINE_SPINLOCK(dlm_node_addrs_spin); static struct listen_connection listen_con; -static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT]; +static struct sockaddr_storage dlm_local_addr[DLM_MAX_ADDR_COUNT]; static int dlm_local_count; int dlm_allow_conn; @@ -398,7 +398,7 @@ static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out, if (!sa_out) return 0; - if (dlm_local_addr[0]->ss_family == AF_INET) { + if (dlm_local_addr[0].ss_family == AF_INET) { struct sockaddr_in *in4 = (struct sockaddr_in *) &sas; struct sockaddr_in *ret4 = (struct sockaddr_in *) sa_out; ret4->sin_addr.s_addr = in4->sin_addr.s_addr; @@ -727,7 +727,7 @@ static void add_sock(struct socket *sock, struct connection *con) static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port, int *addr_len) { - saddr->ss_family = dlm_local_addr[0]->ss_family; + saddr->ss_family = dlm_local_addr[0].ss_family; if (saddr->ss_family == AF_INET) { struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr; in4_addr->sin_port = cpu_to_be16(port); @@ -1167,7 +1167,7 @@ static int sctp_bind_addrs(struct socket *sock, uint16_t port) int i, addr_len, result = 0; for (i = 0; i < dlm_local_count; i++) { - memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr)); + memcpy(&localaddr, &dlm_local_addr[i], sizeof(localaddr)); make_sockaddr(&localaddr, port, &addr_len); if (!i) @@ -1187,7 +1187,7 @@ static int sctp_bind_addrs(struct socket *sock, uint16_t port) /* Get local addresses */ static void init_local(void) { - struct sockaddr_storage sas, *addr; + struct sockaddr_storage sas; int i; dlm_local_count = 0; @@ -1195,21 +1195,10 @@ static void init_local(void) if (dlm_our_addr(&sas, i)) break; - addr = kmemdup(&sas, sizeof(*addr), GFP_NOFS); - if (!addr) - break; - dlm_local_addr[dlm_local_count++] = addr; + memcpy(&dlm_local_addr[dlm_local_count++], &sas, sizeof(sas)); } } -static void deinit_local(void) -{ - int i; - - for (i = 0; i < dlm_local_count; i++) - kfree(dlm_local_addr[i]); -} - static struct writequeue_entry *new_writequeue_entry(struct connection *con) { struct writequeue_entry *entry; @@ -1575,7 +1564,7 @@ static void dlm_connect(struct connection *con) } /* Create a socket to communicate with */ - result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, + result = sock_create_kern(&init_net, dlm_local_addr[0].ss_family, SOCK_STREAM, dlm_proto_ops->proto, &sock); if (result < 0) goto socket_err; @@ -1786,7 +1775,6 @@ void dlm_lowcomms_stop(void) foreach_conn(free_conn); srcu_read_unlock(&connections_srcu, idx); work_stop(); - deinit_local(); dlm_proto_ops = NULL; } @@ -1803,7 +1791,7 @@ static int dlm_listen_for_all(void) if (result < 0) return result; - result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, + result = sock_create_kern(&init_net, dlm_local_addr[0].ss_family, SOCK_STREAM, dlm_proto_ops->proto, &sock); if (result < 0) { log_print("Can't create comms socket: %d", result); @@ -1842,7 +1830,7 @@ static int dlm_tcp_bind(struct socket *sock) /* Bind to our cluster-known address connecting to avoid * routing problems. */ - memcpy(&src_addr, dlm_local_addr[0], sizeof(src_addr)); + memcpy(&src_addr, &dlm_local_addr[0], sizeof(src_addr)); make_sockaddr(&src_addr, 0, &addr_len); result = kernel_bind(sock, (struct sockaddr *)&src_addr, @@ -1899,9 +1887,9 @@ static int dlm_tcp_listen_bind(struct socket *sock) int addr_len; /* Bind to our port */ - make_sockaddr(dlm_local_addr[0], dlm_config.ci_tcp_port, &addr_len); + make_sockaddr(&dlm_local_addr[0], dlm_config.ci_tcp_port, &addr_len); return kernel_bind(sock, (struct sockaddr *)&dlm_local_addr[0], - addr_len); + addr_len); } static const struct dlm_proto_ops dlm_tcp_ops = { @@ -1992,7 +1980,7 @@ int dlm_lowcomms_start(void) error = work_start(); if (error) - goto fail_local; + goto fail; dlm_allow_conn = 1; @@ -2022,8 +2010,6 @@ fail_listen: fail_proto_ops: dlm_allow_conn = 0; work_stop(); -fail_local: - deinit_local(); fail: return error; } diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index c214fe098..55340ac61 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -76,6 +76,14 @@ static struct inode *__ecryptfs_get_inode(struct inode *lower_inode, if (lower_inode->i_sb != ecryptfs_superblock_to_lower(sb)) return ERR_PTR(-EXDEV); + + /* Reject dealing with casefold directories. */ + if (IS_CASEFOLDED(lower_inode)) { + pr_err_ratelimited("%s: Can't handle casefolded directory.\n", + __func__); + return ERR_PTR(-EREMOTE); + } + if (!igrab(lower_inode)) return ERR_PTR(-ESTALE); inode = iget5_locked(sb, (unsigned long)lower_inode, diff --git a/fs/efivarfs/vars.c b/fs/efivarfs/vars.c index 9e4f47808..13bc60698 100644 --- a/fs/efivarfs/vars.c +++ b/fs/efivarfs/vars.c @@ -372,7 +372,7 @@ static void dup_variable_bug(efi_char16_t *str16, efi_guid_t *vendor_guid, int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), void *data, bool duplicates, struct list_head *head) { - unsigned long variable_name_size = 1024; + unsigned long variable_name_size = 512; efi_char16_t *variable_name; efi_status_t status; efi_guid_t vendor_guid; @@ -389,12 +389,13 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), goto free; /* - * Per EFI spec, the maximum storage allocated for both - * the variable name and variable data is 1024 bytes. + * A small set of old UEFI implementations reject sizes + * above a certain threshold, the lowest seen in the wild + * is 512. */ do { - variable_name_size = 1024; + variable_name_size = 512; status = efivar_get_next_variable(&variable_name_size, variable_name, @@ -431,9 +432,13 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), break; case EFI_NOT_FOUND: break; + case EFI_BUFFER_TOO_SMALL: + pr_warn("efivars: Variable name size exceeds maximum (%lu > 512)\n", + variable_name_size); + status = EFI_NOT_FOUND; + break; default: - printk(KERN_WARNING "efivars: get_next_variable: status=%lx\n", - status); + pr_warn("efivars: get_next_variable: status=%lx\n", status); status = EFI_NOT_FOUND; break; } diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h index 26fa17009..c4a3187bd 100644 --- a/fs/erofs/compress.h +++ b/fs/erofs/compress.h @@ -21,6 +21,8 @@ struct z_erofs_decompress_req { }; struct z_erofs_decompressor { + int (*config)(struct super_block *sb, struct erofs_super_block *dsb, + void *data, int size); int (*decompress)(struct z_erofs_decompress_req *rq, struct page **pagepool); char *name; @@ -93,6 +95,8 @@ int z_erofs_decompress(struct z_erofs_decompress_req *rq, struct page **pagepool); /* prototypes for specific algorithms */ +int z_erofs_load_lzma_config(struct super_block *sb, + struct erofs_super_block *dsb, void *data, int size); int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, struct page **pagepool); #endif diff --git a/fs/erofs/data.c b/fs/erofs/data.c index b32801d71..9d20e5d23 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -440,4 +440,5 @@ const struct file_operations erofs_file_fops = { .read_iter = erofs_file_read_iter, .mmap = erofs_file_mmap, .splice_read = generic_file_splice_read, + .get_unmapped_area = thp_get_unmapped_area, }; diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 0cfad7437..1eefa4411 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -24,11 +24,11 @@ struct z_erofs_lz4_decompress_ctx { unsigned int oend; }; -int z_erofs_load_lz4_config(struct super_block *sb, - struct erofs_super_block *dsb, - struct z_erofs_lz4_cfgs *lz4, int size) +static int z_erofs_load_lz4_config(struct super_block *sb, + struct erofs_super_block *dsb, void *data, int size) { struct erofs_sb_info *sbi = EROFS_SB(sb); + struct z_erofs_lz4_cfgs *lz4 = data; u16 distance; if (lz4) { @@ -374,17 +374,71 @@ static struct z_erofs_decompressor decompressors[] = { .name = "interlaced" }, [Z_EROFS_COMPRESSION_LZ4] = { + .config = z_erofs_load_lz4_config, .decompress = z_erofs_lz4_decompress, .name = "lz4" }, #ifdef CONFIG_EROFS_FS_ZIP_LZMA [Z_EROFS_COMPRESSION_LZMA] = { + .config = z_erofs_load_lzma_config, .decompress = z_erofs_lzma_decompress, .name = "lzma" }, #endif }; +int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb) +{ + struct erofs_sb_info *sbi = EROFS_SB(sb); + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; + unsigned int algs, alg; + erofs_off_t offset; + int size, ret = 0; + + if (!erofs_sb_has_compr_cfgs(sbi)) { + sbi->available_compr_algs = 1 << Z_EROFS_COMPRESSION_LZ4; + return z_erofs_load_lz4_config(sb, dsb, NULL, 0); + } + + sbi->available_compr_algs = le16_to_cpu(dsb->u1.available_compr_algs); + if (sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS) { + erofs_err(sb, "unidentified algorithms %x, please upgrade kernel", + sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS); + return -EOPNOTSUPP; + } + + offset = EROFS_SUPER_OFFSET + sbi->sb_size; + alg = 0; + for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) { + void *data; + + if (!(algs & 1)) + continue; + + data = erofs_read_metadata(sb, &buf, &offset, &size); + if (IS_ERR(data)) { + ret = PTR_ERR(data); + break; + } + + if (alg >= ARRAY_SIZE(decompressors) || + !decompressors[alg].config) { + erofs_err(sb, "algorithm %d isn't enabled on this kernel", + alg); + ret = -EOPNOTSUPP; + } else { + ret = decompressors[alg].config(sb, + dsb, data, size); + } + + kfree(data); + if (ret) + break; + } + erofs_put_metabuf(&buf); + return ret; +} + int z_erofs_decompress(struct z_erofs_decompress_req *rq, struct page **pagepool) { diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c index 49addc345..970464c4b 100644 --- a/fs/erofs/decompressor_lzma.c +++ b/fs/erofs/decompressor_lzma.c @@ -72,10 +72,10 @@ int z_erofs_lzma_init(void) } int z_erofs_load_lzma_config(struct super_block *sb, - struct erofs_super_block *dsb, - struct z_erofs_lzma_cfgs *lzma, int size) + struct erofs_super_block *dsb, void *data, int size) { static DEFINE_MUTEX(lzma_resize_mutex); + struct z_erofs_lzma_cfgs *lzma = data; unsigned int dict_size, i; struct z_erofs_lzma *strm, *head = NULL; int err; diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index d8d09fc3e..79a7a5815 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -471,6 +471,8 @@ struct erofs_map_dev { /* data.c */ extern const struct file_operations erofs_file_fops; +void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, + erofs_off_t *offset, int *lengthp); void erofs_unmap_metabuf(struct erofs_buf *buf); void erofs_put_metabuf(struct erofs_buf *buf); void *erofs_bread(struct erofs_buf *buf, struct inode *inode, @@ -565,9 +567,7 @@ void z_erofs_exit_zip_subsystem(void); int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, struct erofs_workgroup *egrp); int erofs_try_to_free_cached_page(struct page *page); -int z_erofs_load_lz4_config(struct super_block *sb, - struct erofs_super_block *dsb, - struct z_erofs_lz4_cfgs *lz4, int len); +int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb); #else static inline void erofs_shrinker_register(struct super_block *sb) {} static inline void erofs_shrinker_unregister(struct super_block *sb) {} @@ -575,36 +575,14 @@ static inline int erofs_init_shrinker(void) { return 0; } static inline void erofs_exit_shrinker(void) {} static inline int z_erofs_init_zip_subsystem(void) { return 0; } static inline void z_erofs_exit_zip_subsystem(void) {} -static inline int z_erofs_load_lz4_config(struct super_block *sb, - struct erofs_super_block *dsb, - struct z_erofs_lz4_cfgs *lz4, int len) -{ - if (lz4 || dsb->u1.lz4_max_distance) { - erofs_err(sb, "lz4 algorithm isn't enabled"); - return -EINVAL; - } - return 0; -} #endif /* !CONFIG_EROFS_FS_ZIP */ #ifdef CONFIG_EROFS_FS_ZIP_LZMA int z_erofs_lzma_init(void); void z_erofs_lzma_exit(void); -int z_erofs_load_lzma_config(struct super_block *sb, - struct erofs_super_block *dsb, - struct z_erofs_lzma_cfgs *lzma, int size); #else static inline int z_erofs_lzma_init(void) { return 0; } static inline int z_erofs_lzma_exit(void) { return 0; } -static inline int z_erofs_load_lzma_config(struct super_block *sb, - struct erofs_super_block *dsb, - struct z_erofs_lzma_cfgs *lzma, int size) { - if (lzma) { - erofs_err(sb, "lzma algorithm isn't enabled"); - return -EINVAL; - } - return 0; -} #endif /* !CONFIG_EROFS_FS_ZIP */ /* flags for erofs_fscache_register_cookie() */ diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c index 0dc347210..e8ccaa761 100644 --- a/fs/erofs/namei.c +++ b/fs/erofs/namei.c @@ -137,24 +137,24 @@ static void *find_target_block_classic(struct erofs_buf *target, /* string comparison without already matched prefix */ diff = erofs_dirnamecmp(name, &dname, &matched); - if (!diff) { - *_ndirents = 0; - goto out; - } else if (diff > 0) { - head = mid + 1; - startprfx = matched; - - if (!IS_ERR(candidate)) - erofs_put_metabuf(target); - *target = buf; - candidate = de; - *_ndirents = ndirents; - } else { + if (diff < 0) { erofs_put_metabuf(&buf); - back = mid - 1; endprfx = matched; + continue; + } + + if (!IS_ERR(candidate)) + erofs_put_metabuf(target); + *target = buf; + if (!diff) { + *_ndirents = 0; + return de; } + head = mid + 1; + startprfx = matched; + candidate = de; + *_ndirents = ndirents; continue; } out: /* free if the candidate is valid */ diff --git a/fs/erofs/super.c b/fs/erofs/super.c index bd8bf8fc2..f2647126c 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -126,8 +126,8 @@ static bool check_layout_compatibility(struct super_block *sb, #ifdef CONFIG_EROFS_FS_ZIP /* read variable-sized metadata, offset will be aligned by 4-byte */ -static void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, - erofs_off_t *offset, int *lengthp) +void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, + erofs_off_t *offset, int *lengthp) { u8 *buffer, *ptr; int len, i, cnt; @@ -159,64 +159,15 @@ static void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, } return buffer; } - -static int erofs_load_compr_cfgs(struct super_block *sb, - struct erofs_super_block *dsb) -{ - struct erofs_sb_info *sbi = EROFS_SB(sb); - struct erofs_buf buf = __EROFS_BUF_INITIALIZER; - unsigned int algs, alg; - erofs_off_t offset; - int size, ret = 0; - - sbi->available_compr_algs = le16_to_cpu(dsb->u1.available_compr_algs); - if (sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS) { - erofs_err(sb, "try to load compressed fs with unsupported algorithms %x", - sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS); - return -EINVAL; - } - - offset = EROFS_SUPER_OFFSET + sbi->sb_size; - alg = 0; - for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) { - void *data; - - if (!(algs & 1)) - continue; - - data = erofs_read_metadata(sb, &buf, &offset, &size); - if (IS_ERR(data)) { - ret = PTR_ERR(data); - break; - } - - switch (alg) { - case Z_EROFS_COMPRESSION_LZ4: - ret = z_erofs_load_lz4_config(sb, dsb, data, size); - break; - case Z_EROFS_COMPRESSION_LZMA: - ret = z_erofs_load_lzma_config(sb, dsb, data, size); - break; - default: - DBG_BUGON(1); - ret = -EFAULT; - } - kfree(data); - if (ret) - break; - } - erofs_put_metabuf(&buf); - return ret; -} #else -static int erofs_load_compr_cfgs(struct super_block *sb, - struct erofs_super_block *dsb) +static int z_erofs_parse_cfgs(struct super_block *sb, + struct erofs_super_block *dsb) { - if (dsb->u1.available_compr_algs) { - erofs_err(sb, "try to load compressed fs when compression is disabled"); - return -EINVAL; - } - return 0; + if (!dsb->u1.available_compr_algs) + return 0; + + erofs_err(sb, "compression disabled, unable to mount compressed EROFS"); + return -EOPNOTSUPP; } #endif @@ -398,10 +349,7 @@ static int erofs_read_superblock(struct super_block *sb) } /* parse on-disk compression configurations */ - if (erofs_sb_has_compr_cfgs(sbi)) - ret = erofs_load_compr_cfgs(sb, dsb); - else - ret = z_erofs_load_lz4_config(sb, dsb, NULL, 0); + ret = z_erofs_parse_cfgs(sb, dsb); if (ret < 0) goto out; diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index cf9a2fa7f..47e71964e 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -652,7 +652,6 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe) if (ztailpacking) { pcl->obj.index = 0; /* which indicates ztailpacking */ - pcl->pageofs_in = erofs_blkoff(map->m_pa); pcl->tailpacking_size = map->m_plen; } else { pcl->obj.index = map->m_pa >> PAGE_SHIFT; @@ -852,6 +851,7 @@ repeat: get_page(fe->map.buf.page); WRITE_ONCE(fe->pcl->compressed_bvecs[0].page, fe->map.buf.page); + fe->pcl->pageofs_in = map->m_pa & ~PAGE_MASK; fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE; } else { /* bind cache first when cached decompression is preferred */ diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 0337b70b2..abcded1ac 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -610,7 +610,7 @@ static int z_erofs_do_map_blocks(struct inode *inode, .map = map, }; int err = 0; - unsigned int lclusterbits, endoff; + unsigned int lclusterbits, endoff, afmt; unsigned long initial_lcn; unsigned long long ofs, end; @@ -700,17 +700,20 @@ static int z_erofs_do_map_blocks(struct inode *inode, err = -EFSCORRUPTED; goto unmap_out; } - if (vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER) - map->m_algorithmformat = - Z_EROFS_COMPRESSION_INTERLACED; - else - map->m_algorithmformat = - Z_EROFS_COMPRESSION_SHIFTED; - } else if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) { - map->m_algorithmformat = vi->z_algorithmtype[1]; + afmt = vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER ? + Z_EROFS_COMPRESSION_INTERLACED : + Z_EROFS_COMPRESSION_SHIFTED; } else { - map->m_algorithmformat = vi->z_algorithmtype[0]; + afmt = m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2 ? + vi->z_algorithmtype[1] : vi->z_algorithmtype[0]; + if (!(EROFS_I_SB(inode)->available_compr_algs & (1 << afmt))) { + erofs_err(inode->i_sb, "inconsistent algorithmtype %u for nid %llu", + afmt, vi->nid); + err = -EFSCORRUPTED; + goto unmap_out; + } } + map->m_algorithmformat = afmt; if ((flags & EROFS_GET_BLOCKS_FIEMAP) || ((flags & EROFS_GET_BLOCKS_READMORE) && diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index c648a493f..3204bd33e 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -18,7 +18,7 @@ #include <linux/sched.h> #include <linux/cred.h> -#define dprintk(fmt, args...) do{}while(0) +#define dprintk(fmt, args...) pr_debug(fmt, ##args) static int get_name(const struct path *path, char *name, struct dentry *child); @@ -132,8 +132,8 @@ static struct dentry *reconnect_one(struct vfsmount *mnt, inode_unlock(dentry->d_inode); if (IS_ERR(parent)) { - dprintk("%s: get_parent of %ld failed, err %d\n", - __func__, dentry->d_inode->i_ino, PTR_ERR(parent)); + dprintk("get_parent of %lu failed, err %ld\n", + dentry->d_inode->i_ino, PTR_ERR(parent)); return parent; } @@ -147,7 +147,7 @@ static struct dentry *reconnect_one(struct vfsmount *mnt, dprintk("%s: found name: %s\n", __func__, nbuf); tmp = lookup_one_unlocked(mnt_user_ns(mnt), nbuf, parent, strlen(nbuf)); if (IS_ERR(tmp)) { - dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp)); + dprintk("lookup failed: %ld\n", PTR_ERR(tmp)); err = PTR_ERR(tmp); goto out_err; } diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index aa5aadd70..67af684e4 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2229,7 +2229,7 @@ static int ext4_fill_es_cache_info(struct inode *inode, /* - * ext4_ext_determine_hole - determine hole around given block + * ext4_ext_find_hole - find hole around given block according to the given path * @inode: inode we lookup in * @path: path in extent tree to @lblk * @lblk: pointer to logical block around which we want to determine hole @@ -2241,9 +2241,9 @@ static int ext4_fill_es_cache_info(struct inode *inode, * The function returns the length of a hole starting at @lblk. We update @lblk * to the beginning of the hole if we managed to find it. */ -static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode, - struct ext4_ext_path *path, - ext4_lblk_t *lblk) +static ext4_lblk_t ext4_ext_find_hole(struct inode *inode, + struct ext4_ext_path *path, + ext4_lblk_t *lblk) { int depth = ext_depth(inode); struct ext4_extent *ex; @@ -2271,30 +2271,6 @@ static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode, } /* - * ext4_ext_put_gap_in_cache: - * calculate boundaries of the gap that the requested block fits into - * and cache this gap - */ -static void -ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start, - ext4_lblk_t hole_len) -{ - struct extent_status es; - - ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start, - hole_start + hole_len - 1, &es); - if (es.es_len) { - /* There's delayed extent containing lblock? */ - if (es.es_lblk <= hole_start) - return; - hole_len = min(es.es_lblk - hole_start, hole_len); - } - ext_debug(inode, " -> %u:%u\n", hole_start, hole_len); - ext4_es_insert_extent(inode, hole_start, hole_len, ~0, - EXTENT_STATUS_HOLE); -} - -/* * ext4_ext_rm_idx: * removes index from the index block. */ @@ -4064,6 +4040,69 @@ static int get_implied_cluster_alloc(struct super_block *sb, return 0; } +/* + * Determine hole length around the given logical block, first try to + * locate and expand the hole from the given @path, and then adjust it + * if it's partially or completely converted to delayed extents, insert + * it into the extent cache tree if it's indeed a hole, finally return + * the length of the determined extent. + */ +static ext4_lblk_t ext4_ext_determine_insert_hole(struct inode *inode, + struct ext4_ext_path *path, + ext4_lblk_t lblk) +{ + ext4_lblk_t hole_start, len; + struct extent_status es; + + hole_start = lblk; + len = ext4_ext_find_hole(inode, path, &hole_start); +again: + ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start, + hole_start + len - 1, &es); + if (!es.es_len) + goto insert_hole; + + /* + * There's a delalloc extent in the hole, handle it if the delalloc + * extent is in front of, behind and straddle the queried range. + */ + if (lblk >= es.es_lblk + es.es_len) { + /* + * The delalloc extent is in front of the queried range, + * find again from the queried start block. + */ + len -= lblk - hole_start; + hole_start = lblk; + goto again; + } else if (in_range(lblk, es.es_lblk, es.es_len)) { + /* + * The delalloc extent containing lblk, it must have been + * added after ext4_map_blocks() checked the extent status + * tree, adjust the length to the delalloc extent's after + * lblk. + */ + len = es.es_lblk + es.es_len - lblk; + return len; + } else { + /* + * The delalloc extent is partially or completely behind + * the queried range, update hole length until the + * beginning of the delalloc extent. + */ + len = min(es.es_lblk - hole_start, len); + } + +insert_hole: + /* Put just found gap into cache to speed up subsequent requests */ + ext_debug(inode, " -> %u:%u\n", hole_start, len); + ext4_es_insert_extent(inode, hole_start, len, ~0, EXTENT_STATUS_HOLE); + + /* Update hole_len to reflect hole size after lblk */ + if (hole_start != lblk) + len -= lblk - hole_start; + + return len; +} /* * Block allocation/map/preallocation routine for extents based files @@ -4181,22 +4220,12 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, * we couldn't try to create block if create flag is zero */ if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { - ext4_lblk_t hole_start, hole_len; + ext4_lblk_t len; - hole_start = map->m_lblk; - hole_len = ext4_ext_determine_hole(inode, path, &hole_start); - /* - * put just found gap into cache to speed up - * subsequent requests - */ - ext4_ext_put_gap_in_cache(inode, hole_start, hole_len); + len = ext4_ext_determine_insert_hole(inode, path, map->m_lblk); - /* Update hole_len to reflect hole size after map->m_lblk */ - if (hole_start != map->m_lblk) - hole_len -= map->m_lblk - hole_start; map->m_pblk = 0; - map->m_len = min_t(unsigned int, map->m_len, hole_len); - + map->m_len = min_t(unsigned int, map->m_len, len); goto out; } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 33be702d6..6a3e27771 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -831,7 +831,7 @@ mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp) struct ext4_sb_info *sbi = EXT4_SB(sb); int new_order; - if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_free == 0) + if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_fragments == 0) return; new_order = mb_avg_fragment_size_order(sb, @@ -1118,6 +1118,24 @@ void ext4_mb_generate_buddy(struct super_block *sb, atomic64_add(period, &sbi->s_mb_generation_time); } +static void mb_regenerate_buddy(struct ext4_buddy *e4b) +{ + int count; + int order = 1; + void *buddy; + + while ((buddy = mb_find_buddy(e4b, order++, &count))) + mb_set_bits(buddy, 0, count); + + e4b->bd_info->bb_fragments = 0; + memset(e4b->bd_info->bb_counters, 0, + sizeof(*e4b->bd_info->bb_counters) * + (e4b->bd_sb->s_blocksize_bits + 2)); + + ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy, + e4b->bd_bitmap, e4b->bd_group, e4b->bd_info); +} + /* The buddy information is attached the buddy cache inode * for convenience. The information regarding each group * is loaded via ext4_mb_load_buddy. The information involve @@ -1767,11 +1785,6 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, mb_check_buddy(e4b); mb_free_blocks_double(inode, e4b, first, count); - this_cpu_inc(discard_pa_seq); - e4b->bd_info->bb_free += count; - if (first < e4b->bd_info->bb_first_free) - e4b->bd_info->bb_first_free = first; - /* access memory sequentially: check left neighbour, * clear range and then check right neighbour */ @@ -1785,21 +1798,31 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t blocknr; + /* + * Fastcommit replay can free already freed blocks which + * corrupts allocation info. Regenerate it. + */ + if (sbi->s_mount_state & EXT4_FC_REPLAY) { + mb_regenerate_buddy(e4b); + goto check; + } + blocknr = ext4_group_first_block_no(sb, e4b->bd_group); blocknr += EXT4_C2B(sbi, block); - if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) { - ext4_grp_locked_error(sb, e4b->bd_group, - inode ? inode->i_ino : 0, - blocknr, - "freeing already freed block (bit %u); block bitmap corrupt.", - block); - ext4_mark_group_bitmap_corrupted( - sb, e4b->bd_group, + ext4_grp_locked_error(sb, e4b->bd_group, + inode ? inode->i_ino : 0, blocknr, + "freeing already freed block (bit %u); block bitmap corrupt.", + block); + ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, EXT4_GROUP_INFO_BBITMAP_CORRUPT); - } - goto done; + return; } + this_cpu_inc(discard_pa_seq); + e4b->bd_info->bb_free += count; + if (first < e4b->bd_info->bb_first_free) + e4b->bd_info->bb_first_free = first; + /* let's maintain fragments counter */ if (left_is_free && right_is_free) e4b->bd_info->bb_fragments--; @@ -1824,9 +1847,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, if (first <= last) mb_buddy_mark_free(e4b, first >> 1, last >> 1); -done: mb_set_largest_free_order(sb, e4b->bd_info); mb_update_avg_fragment_size(sb, e4b->bd_info); +check: mb_check_buddy(e4b); } @@ -2153,6 +2176,9 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac, return err; ext4_lock_group(ac->ac_sb, group); + if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) + goto out; + max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex); if (max > 0) { @@ -2160,6 +2186,7 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac, ext4_mb_use_best_found(ac, e4b); } +out: ext4_unlock_group(ac->ac_sb, group); ext4_mb_unload_buddy(e4b); @@ -2188,12 +2215,10 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, if (err) return err; - if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) { - ext4_mb_unload_buddy(e4b); - return 0; - } - ext4_lock_group(ac->ac_sb, group); + if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) + goto out; + max = mb_find_extent(e4b, ac->ac_g_ex.fe_start, ac->ac_g_ex.fe_len, &ex); ex.fe_logical = 0xDEADFA11; /* debug value */ @@ -2226,6 +2251,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, ac->ac_b_ex = ex; ext4_mb_use_best_found(ac, e4b); } +out: ext4_unlock_group(ac->ac_sb, group); ext4_mb_unload_buddy(e4b); @@ -6444,13 +6470,15 @@ static int ext4_try_to_trim_range(struct super_block *sb, __acquires(ext4_group_lock_ptr(sb, e4b->bd_group)) __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) { - ext4_grpblk_t next, count, free_count; + ext4_grpblk_t next, count, free_count, last, origin_start; bool set_trimmed = false; void *bitmap; + last = ext4_last_grp_cluster(sb, e4b->bd_group); bitmap = e4b->bd_bitmap; - if (start == 0 && max >= ext4_last_grp_cluster(sb, e4b->bd_group)) + if (start == 0 && max >= last) set_trimmed = true; + origin_start = start; start = max(e4b->bd_info->bb_first_free, start); count = 0; free_count = 0; @@ -6459,7 +6487,10 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) start = mb_find_next_zero_bit(bitmap, max + 1, start); if (start > max) break; - next = mb_find_next_bit(bitmap, max + 1, start); + + next = mb_find_next_bit(bitmap, last + 1, start); + if (origin_start == 0 && next >= last) + set_trimmed = true; if ((next - start) >= minblocks) { int ret = ext4_trim_extent(sb, start, next - start, e4b); diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index dedc9d445..8e3ff150b 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -621,6 +621,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, goto out; o_end = o_start + len; + *moved_len = 0; while (o_start < o_end) { struct ext4_extent *ex; ext4_lblk_t cur_blk, next_blk; @@ -675,7 +676,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, */ ext4_double_up_write_data_sem(orig_inode, donor_inode); /* Swap original branches with new branches */ - move_extent_per_page(o_filp, donor_inode, + *moved_len += move_extent_per_page(o_filp, donor_inode, orig_page_index, donor_page_index, offset_in_page, cur_len, unwritten, &ret); @@ -685,9 +686,6 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, o_start += cur_len; d_start += cur_len; } - *moved_len = o_start - orig_blk; - if (*moved_len > len) - *moved_len = len; out: if (*moved_len) { diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 9833ab6db..f2ed15af7 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -231,17 +231,24 @@ struct ext4_new_flex_group_data { in the flex group */ __u16 *bg_flags; /* block group flags of groups in @groups */ + ext4_group_t resize_bg; /* number of allocated + new_group_data */ ext4_group_t count; /* number of groups in @groups */ }; /* + * Avoiding memory allocation failures due to too many groups added each time. + */ +#define MAX_RESIZE_BG 16384 + +/* * alloc_flex_gd() allocates a ext4_new_flex_group_data with size of * @flexbg_size. * * Returns NULL on failure otherwise address of the allocated structure. */ -static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size) +static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned int flexbg_size) { struct ext4_new_flex_group_data *flex_gd; @@ -249,17 +256,18 @@ static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size) if (flex_gd == NULL) goto out3; - if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_group_data)) - goto out2; - flex_gd->count = flexbg_size; + if (unlikely(flexbg_size > MAX_RESIZE_BG)) + flex_gd->resize_bg = MAX_RESIZE_BG; + else + flex_gd->resize_bg = flexbg_size; - flex_gd->groups = kmalloc_array(flexbg_size, + flex_gd->groups = kmalloc_array(flex_gd->resize_bg, sizeof(struct ext4_new_group_data), GFP_NOFS); if (flex_gd->groups == NULL) goto out2; - flex_gd->bg_flags = kmalloc_array(flexbg_size, sizeof(__u16), + flex_gd->bg_flags = kmalloc_array(flex_gd->resize_bg, sizeof(__u16), GFP_NOFS); if (flex_gd->bg_flags == NULL) goto out1; @@ -296,7 +304,7 @@ static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd) */ static int ext4_alloc_group_tables(struct super_block *sb, struct ext4_new_flex_group_data *flex_gd, - int flexbg_size) + unsigned int flexbg_size) { struct ext4_new_group_data *group_data = flex_gd->groups; ext4_fsblk_t start_blk; @@ -397,12 +405,12 @@ next_group: group = group_data[0].group; printk(KERN_DEBUG "EXT4-fs: adding a flex group with " - "%d groups, flexbg size is %d:\n", flex_gd->count, + "%u groups, flexbg size is %u:\n", flex_gd->count, flexbg_size); for (i = 0; i < flex_gd->count; i++) { ext4_debug( - "adding %s group %u: %u blocks (%d free, %d mdata blocks)\n", + "adding %s group %u: %u blocks (%u free, %u mdata blocks)\n", ext4_bg_has_super(sb, group + i) ? "normal" : "no-super", group + i, group_data[i].blocks_count, @@ -1623,8 +1631,7 @@ exit: static int ext4_setup_next_flex_gd(struct super_block *sb, struct ext4_new_flex_group_data *flex_gd, - ext4_fsblk_t n_blocks_count, - unsigned long flexbg_size) + ext4_fsblk_t n_blocks_count) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; @@ -1648,7 +1655,7 @@ static int ext4_setup_next_flex_gd(struct super_block *sb, BUG_ON(last); ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &last); - last_group = group | (flexbg_size - 1); + last_group = group | (flex_gd->resize_bg - 1); if (last_group > n_group) last_group = n_group; @@ -2010,8 +2017,9 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) ext4_fsblk_t o_blocks_count; ext4_fsblk_t n_blocks_count_retry = 0; unsigned long last_update_time = 0; - int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex; + int err = 0; int meta_bg; + unsigned int flexbg_size = ext4_flex_bg_size(sbi); /* See if the device is actually as big as what was requested */ bh = ext4_sb_bread(sb, n_blocks_count - 1, 0); @@ -2152,8 +2160,7 @@ retry: /* Add flex groups. Note that a regular group is a * flex group with 1 group. */ - while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count, - flexbg_size)) { + while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count)) { if (time_is_before_jiffies(last_update_time + HZ * 10)) { if (last_update_time) ext4_msg(sb, KERN_INFO, diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 4cb58e8d6..967262c37 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -55,6 +55,7 @@ struct f2fs_compress_ops { int (*init_decompress_ctx)(struct decompress_io_ctx *dic); void (*destroy_decompress_ctx)(struct decompress_io_ctx *dic); int (*decompress_pages)(struct decompress_io_ctx *dic); + bool (*is_level_valid)(int level); }; static unsigned int offset_in_cluster(struct compress_ctx *cc, pgoff_t index) @@ -322,11 +323,21 @@ static int lz4_decompress_pages(struct decompress_io_ctx *dic) return 0; } +static bool lz4_is_level_valid(int lvl) +{ +#ifdef CONFIG_F2FS_FS_LZ4HC + return !lvl || (lvl >= LZ4HC_MIN_CLEVEL && lvl <= LZ4HC_MAX_CLEVEL); +#else + return lvl == 0; +#endif +} + static const struct f2fs_compress_ops f2fs_lz4_ops = { .init_compress_ctx = lz4_init_compress_ctx, .destroy_compress_ctx = lz4_destroy_compress_ctx, .compress_pages = lz4_compress_pages, .decompress_pages = lz4_decompress_pages, + .is_level_valid = lz4_is_level_valid, }; #endif @@ -490,6 +501,11 @@ static int zstd_decompress_pages(struct decompress_io_ctx *dic) return 0; } +static bool zstd_is_level_valid(int lvl) +{ + return lvl >= zstd_min_clevel() && lvl <= zstd_max_clevel(); +} + static const struct f2fs_compress_ops f2fs_zstd_ops = { .init_compress_ctx = zstd_init_compress_ctx, .destroy_compress_ctx = zstd_destroy_compress_ctx, @@ -497,6 +513,7 @@ static const struct f2fs_compress_ops f2fs_zstd_ops = { .init_decompress_ctx = zstd_init_decompress_ctx, .destroy_decompress_ctx = zstd_destroy_decompress_ctx, .decompress_pages = zstd_decompress_pages, + .is_level_valid = zstd_is_level_valid, }; #endif @@ -555,6 +572,16 @@ bool f2fs_is_compress_backend_ready(struct inode *inode) return f2fs_cops[F2FS_I(inode)->i_compress_algorithm]; } +bool f2fs_is_compress_level_valid(int alg, int lvl) +{ + const struct f2fs_compress_ops *cops = f2fs_cops[alg]; + + if (cops->is_level_valid) + return cops->is_level_valid(lvl); + + return lvl == 0; +} + static mempool_t *compress_page_pool; static int num_compress_pages = 512; module_param(num_compress_pages, uint, 0444); @@ -1026,8 +1053,10 @@ static void set_cluster_dirty(struct compress_ctx *cc) int i; for (i = 0; i < cc->cluster_size; i++) - if (cc->rpages[i]) + if (cc->rpages[i]) { set_page_dirty(cc->rpages[i]); + set_page_private_gcing(cc->rpages[i]); + } } static int prepare_compress_overwrite(struct compress_ctx *cc, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5c76ba764..e5a9498b8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4219,6 +4219,7 @@ bool f2fs_compress_write_end(struct inode *inode, void *fsdata, int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock); void f2fs_compress_write_end_io(struct bio *bio, struct page *page); bool f2fs_is_compress_backend_ready(struct inode *inode); +bool f2fs_is_compress_level_valid(int alg, int lvl); int f2fs_init_compress_mempool(void); void f2fs_destroy_compress_mempool(void); void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task); @@ -4283,6 +4284,7 @@ static inline bool f2fs_is_compress_backend_ready(struct inode *inode) /* not support compression */ return false; } +static inline bool f2fs_is_compress_level_valid(int alg, int lvl) { return false; } static inline struct page *f2fs_compress_control_page(struct page *page) { WARN_ON_ONCE(1); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index fd22854db..46e4960a9 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1326,6 +1326,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, } memcpy_page(pdst, 0, psrc, 0, PAGE_SIZE); set_page_dirty(pdst); + set_page_private_gcing(pdst); f2fs_put_page(pdst, 1); f2fs_put_page(psrc, 1); @@ -4037,6 +4038,7 @@ static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len) f2fs_bug_on(F2FS_I_SB(inode), !page); set_page_dirty(page); + set_page_private_gcing(page); f2fs_put_page(page, 1); f2fs_put_page(page, 0); } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index dea95b48b..53a6487f9 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -683,7 +683,16 @@ retry_dn: */ if (dest == NEW_ADDR) { f2fs_truncate_data_blocks_range(&dn, 1); - f2fs_reserve_new_block(&dn); + do { + err = f2fs_reserve_new_block(&dn); + if (err == -ENOSPC) { + f2fs_bug_on(sbi, 1); + break; + } + } while (err && + IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)); + if (err) + goto err; continue; } @@ -691,12 +700,14 @@ retry_dn: if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) { if (src == NULL_ADDR) { - err = f2fs_reserve_new_block(&dn); - while (err && - IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)) + do { err = f2fs_reserve_new_block(&dn); - /* We should not get -ENOSPC */ - f2fs_bug_on(sbi, err); + if (err == -ENOSPC) { + f2fs_bug_on(sbi, 1); + break; + } + } while (err && + IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)); if (err) goto err; } @@ -887,6 +898,8 @@ skip: if (!err && fix_curseg_write_pointer && !f2fs_readonly(sbi->sb) && f2fs_sb_has_blkzoned(sbi)) { err = f2fs_fix_curseg_write_pointer(sbi); + if (!err) + err = f2fs_check_write_pointer(sbi); ret = err; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 3805162dc..0c0d0671f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -628,7 +628,7 @@ static int f2fs_set_lz4hc_level(struct f2fs_sb_info *sbi, const char *str) if (kstrtouint(str + 1, 10, &level)) return -EINVAL; - if (level < LZ4HC_MIN_CLEVEL || level > LZ4HC_MAX_CLEVEL) { + if (!f2fs_is_compress_level_valid(COMPRESS_LZ4, level)) { f2fs_info(sbi, "invalid lz4hc compress level: %d", level); return -EINVAL; } @@ -666,7 +666,7 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str) if (kstrtouint(str + 1, 10, &level)) return -EINVAL; - if (level < zstd_min_clevel() || level > zstd_max_clevel()) { + if (!f2fs_is_compress_level_valid(COMPRESS_ZSTD, level)) { f2fs_info(sbi, "invalid zstd compress level: %d", level); return -EINVAL; } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 8eea709e3..4fe4b3393 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -123,6 +123,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) loff_t len, vma_len; int ret; struct hstate *h = hstate_file(file); + vm_flags_t vm_flags; /* * vma address alignment (but not the pgoff alignment) has @@ -164,10 +165,20 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) file_accessed(file); ret = -ENOMEM; + + vm_flags = vma->vm_flags; + /* + * for SHM_HUGETLB, the pages are reserved in the shmget() call so skip + * reserving here. Note: only for SHM hugetlbfs file, the inode + * flag S_PRIVATE is set. + */ + if (inode->i_flags & S_PRIVATE) + vm_flags |= VM_NORESERVE; + if (!hugetlb_reserve_pages(inode, vma->vm_pgoff >> huge_page_order(h), len >> huge_page_shift(h), vma, - vma->vm_flags)) + vm_flags)) goto out; ret = 0; @@ -1350,6 +1361,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par { struct hugetlbfs_fs_context *ctx = fc->fs_private; struct fs_parse_result result; + struct hstate *h; char *rest; unsigned long ps; int opt; @@ -1394,11 +1406,12 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par case Opt_pagesize: ps = memparse(param->string, &rest); - ctx->hstate = size_to_hstate(ps); - if (!ctx->hstate) { + h = size_to_hstate(ps); + if (!h) { pr_err("Unsupported page size %lu MB\n", ps / SZ_1M); return -EINVAL; } + ctx->hstate = h; return 0; case Opt_min_size: diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 4d56f6081..4462274e3 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -63,10 +63,10 @@ */ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, int nblocks); -static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval); -static int dbBackSplit(dmtree_t * tp, int leafno); -static int dbJoin(dmtree_t * tp, int leafno, int newval); -static void dbAdjTree(dmtree_t * tp, int leafno, int newval); +static void dbSplit(dmtree_t *tp, int leafno, int splitsz, int newval, bool is_ctl); +static int dbBackSplit(dmtree_t *tp, int leafno, bool is_ctl); +static int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl); +static void dbAdjTree(dmtree_t *tp, int leafno, int newval, bool is_ctl); static int dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level); static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results); @@ -2103,7 +2103,7 @@ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, * system. */ if (dp->tree.stree[word] == NOFREE) - dbBackSplit((dmtree_t *) & dp->tree, word); + dbBackSplit((dmtree_t *)&dp->tree, word, false); dbAllocBits(bmp, dp, blkno, nblocks); } @@ -2189,7 +2189,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, * the binary system of the leaves if need be. */ dbSplit(tp, word, BUDMIN, - dbMaxBud((u8 *) & dp->wmap[word])); + dbMaxBud((u8 *)&dp->wmap[word]), false); word += 1; } else { @@ -2229,7 +2229,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, * system of the leaves to reflect the current * allocation (size). */ - dbSplit(tp, word, size, NOFREE); + dbSplit(tp, word, size, NOFREE, false); /* get the number of dmap words handled */ nw = BUDSIZE(size, BUDMIN); @@ -2336,7 +2336,7 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, /* update the leaf for this dmap word. */ rc = dbJoin(tp, word, - dbMaxBud((u8 *) & dp->wmap[word])); + dbMaxBud((u8 *)&dp->wmap[word]), false); if (rc) return rc; @@ -2369,7 +2369,7 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, /* update the leaf. */ - rc = dbJoin(tp, word, size); + rc = dbJoin(tp, word, size, false); if (rc) return rc; @@ -2521,16 +2521,16 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level) * that it is at the front of a binary buddy system. */ if (oldval == NOFREE) { - rc = dbBackSplit((dmtree_t *) dcp, leafno); + rc = dbBackSplit((dmtree_t *)dcp, leafno, true); if (rc) { release_metapage(mp); return rc; } oldval = dcp->stree[ti]; } - dbSplit((dmtree_t *) dcp, leafno, dcp->budmin, newval); + dbSplit((dmtree_t *) dcp, leafno, dcp->budmin, newval, true); } else { - rc = dbJoin((dmtree_t *) dcp, leafno, newval); + rc = dbJoin((dmtree_t *) dcp, leafno, newval, true); if (rc) { release_metapage(mp); return rc; @@ -2561,7 +2561,7 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level) */ if (alloc) { dbJoin((dmtree_t *) dcp, leafno, - oldval); + oldval, true); } else { /* the dbJoin() above might have * caused a larger binary buddy system @@ -2571,9 +2571,9 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level) */ if (dcp->stree[ti] == NOFREE) dbBackSplit((dmtree_t *) - dcp, leafno); + dcp, leafno, true); dbSplit((dmtree_t *) dcp, leafno, - dcp->budmin, oldval); + dcp->budmin, oldval, true); } /* release the buffer and return the error. @@ -2621,7 +2621,7 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level) * * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; */ -static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval) +static void dbSplit(dmtree_t *tp, int leafno, int splitsz, int newval, bool is_ctl) { int budsz; int cursz; @@ -2643,7 +2643,7 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval) while (cursz >= splitsz) { /* update the buddy's leaf with its new value. */ - dbAdjTree(tp, leafno ^ budsz, cursz); + dbAdjTree(tp, leafno ^ budsz, cursz, is_ctl); /* on to the next size and buddy. */ @@ -2655,7 +2655,7 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval) /* adjust the dmap tree to reflect the specified leaf's new * value. */ - dbAdjTree(tp, leafno, newval); + dbAdjTree(tp, leafno, newval, is_ctl); } @@ -2686,7 +2686,7 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval) * * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; */ -static int dbBackSplit(dmtree_t * tp, int leafno) +static int dbBackSplit(dmtree_t *tp, int leafno, bool is_ctl) { int budsz, bud, w, bsz, size; int cursz; @@ -2737,7 +2737,7 @@ static int dbBackSplit(dmtree_t * tp, int leafno) * system in two. */ cursz = leaf[bud] - 1; - dbSplit(tp, bud, cursz, cursz); + dbSplit(tp, bud, cursz, cursz, is_ctl); break; } } @@ -2765,7 +2765,7 @@ static int dbBackSplit(dmtree_t * tp, int leafno) * * RETURN VALUES: none */ -static int dbJoin(dmtree_t * tp, int leafno, int newval) +static int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl) { int budsz, buddy; s8 *leaf; @@ -2820,12 +2820,12 @@ static int dbJoin(dmtree_t * tp, int leafno, int newval) if (leafno < buddy) { /* leafno is the left buddy. */ - dbAdjTree(tp, buddy, NOFREE); + dbAdjTree(tp, buddy, NOFREE, is_ctl); } else { /* buddy is the left buddy and becomes * leafno. */ - dbAdjTree(tp, leafno, NOFREE); + dbAdjTree(tp, leafno, NOFREE, is_ctl); leafno = buddy; } @@ -2838,7 +2838,7 @@ static int dbJoin(dmtree_t * tp, int leafno, int newval) /* update the leaf value. */ - dbAdjTree(tp, leafno, newval); + dbAdjTree(tp, leafno, newval, is_ctl); return 0; } @@ -2859,15 +2859,20 @@ static int dbJoin(dmtree_t * tp, int leafno, int newval) * * RETURN VALUES: none */ -static void dbAdjTree(dmtree_t * tp, int leafno, int newval) +static void dbAdjTree(dmtree_t *tp, int leafno, int newval, bool is_ctl) { int lp, pp, k; - int max; + int max, size; + + size = is_ctl ? CTLTREESIZE : TREESIZE; /* pick up the index of the leaf for this leafno. */ lp = leafno + le32_to_cpu(tp->dmt_leafidx); + if (WARN_ON_ONCE(lp >= size || lp < 0)) + return; + /* is the current value the same as the old value ? if so, * there is nothing to do. */ diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index 92b7c5334..031d8f570 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c @@ -633,6 +633,11 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data, for (base = 0, lim = p->header.nextindex; lim; lim >>= 1) { index = base + (lim >> 1); + if (stbl[index] < 0) { + rc = -EIO; + goto out; + } + if (p->header.flag & BT_LEAF) { /* uppercase leaf name to compare */ cmp = @@ -1970,7 +1975,7 @@ static int dtSplitRoot(tid_t tid, do { f = &rp->slot[fsi]; fsi = f->next; - } while (fsi != -1); + } while (fsi >= 0); f->next = n; } diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 6ed2e1d4c..ac42f8ee5 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -2179,6 +2179,9 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) /* get the ag and iag numbers for this iag. */ agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi); + if (agno >= MAXAG || agno < 0) + return -EIO; + iagno = le32_to_cpu(iagp->iagnum); /* check if this is the last free extent within the diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c index 48d1f70f7..21d8d4a5c 100644 --- a/fs/jfs/jfs_mount.c +++ b/fs/jfs/jfs_mount.c @@ -172,15 +172,15 @@ int jfs_mount(struct super_block *sb) } jfs_info("jfs_mount: ipimap:0x%p", ipimap); - /* map further access of per fileset inodes by the fileset inode */ - sbi->ipimap = ipimap; - /* initialize fileset inode allocation map */ if ((rc = diMount(ipimap))) { jfs_err("jfs_mount: diMount failed w/rc = %d", rc); goto err_ipimap; } + /* map further access of per fileset inodes by the fileset inode */ + sbi->ipimap = ipimap; + return rc; /* diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 44842e6cf..a00e11ebf 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -669,6 +669,18 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, { struct kernfs_node *kn; + if (parent->mode & S_ISGID) { + /* this code block imitates inode_init_owner() for + * kernfs + */ + + if (parent->iattr) + gid = parent->iattr->ia_gid; + + if (flags & KERNFS_DIR) + mode |= S_ISGID; + } + kn = __kernfs_new_node(kernfs_root(parent), parent, name, mode, uid, gid, flags); if (kn) { diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 284b019cb..b72023a6b 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -52,6 +52,7 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, *filp = file; /* Set up the missing parts of the file_lock structure */ + lock->fl.fl_flags = FL_POSIX; lock->fl.fl_file = file->f_file[mode]; lock->fl.fl_pid = current->tgid; lock->fl.fl_start = (loff_t)lock->lock_start; diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 9c1aa7544..4e30f3c50 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -659,11 +659,13 @@ nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock) nlmsvc_cancel_blocked(net, file, lock); lock->fl.fl_type = F_UNLCK; - if (file->f_file[O_RDONLY]) - error = vfs_lock_file(file->f_file[O_RDONLY], F_SETLK, + lock->fl.fl_file = file->f_file[O_RDONLY]; + if (lock->fl.fl_file) + error = vfs_lock_file(lock->fl.fl_file, F_SETLK, &lock->fl, NULL); - if (file->f_file[O_WRONLY]) - error = vfs_lock_file(file->f_file[O_WRONLY], F_SETLK, + lock->fl.fl_file = file->f_file[O_WRONLY]; + if (lock->fl.fl_file) + error |= vfs_lock_file(lock->fl.fl_file, F_SETLK, &lock->fl, NULL); return (error < 0)? nlm_lck_denied_nolocks : nlm_granted; @@ -697,9 +699,10 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l block = nlmsvc_lookup_block(file, lock); mutex_unlock(&file->f_mutex); if (block != NULL) { - mode = lock_to_openmode(&lock->fl); - vfs_cancel_lock(block->b_file->f_file[mode], - &block->b_call->a_args.lock.fl); + struct file_lock *fl = &block->b_call->a_args.lock.fl; + + mode = lock_to_openmode(fl); + vfs_cancel_lock(block->b_file->f_file[mode], fl); status = nlmsvc_unlink_block(block); nlmsvc_release_block(block); } diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index e35c05e27..32784f508 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -77,6 +77,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, /* Set up the missing parts of the file_lock structure */ mode = lock_to_openmode(&lock->fl); + lock->fl.fl_flags = FL_POSIX; lock->fl.fl_file = file->f_file[mode]; lock->fl.fl_pid = current->tgid; lock->fl.fl_lmops = &nlmsvc_lock_operations; diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 3515f17ea..e3b6229e7 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -210,7 +210,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, { struct inode *inode = nlmsvc_file_inode(file); struct file_lock *fl; - struct file_lock_context *flctx = inode->i_flctx; + struct file_lock_context *flctx = locks_inode_context(inode); struct nlm_host *lockhost; if (!flctx || list_empty_careful(&flctx->flc_posix)) @@ -265,7 +265,7 @@ nlm_file_inuse(struct nlm_file *file) { struct inode *inode = nlmsvc_file_inode(file); struct file_lock *fl; - struct file_lock_context *flctx = inode->i_flctx; + struct file_lock_context *flctx = locks_inode_context(inode); if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares) return 1; diff --git a/fs/locks.c b/fs/locks.c index 1047ab2b1..7d0918b8f 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -175,7 +175,7 @@ locks_get_lock_context(struct inode *inode, int type) struct file_lock_context *ctx; /* paired with cmpxchg() below */ - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (likely(ctx) || type == F_UNLCK) goto out; @@ -194,7 +194,7 @@ locks_get_lock_context(struct inode *inode, int type) */ if (cmpxchg(&inode->i_flctx, NULL, ctx)) { kmem_cache_free(flctx_cache, ctx); - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); } out: trace_locks_get_lock_context(inode, type, ctx); @@ -247,7 +247,7 @@ locks_check_ctx_file_list(struct file *filp, struct list_head *list, void locks_free_lock_context(struct inode *inode) { - struct file_lock_context *ctx = inode->i_flctx; + struct file_lock_context *ctx = locks_inode_context(inode); if (unlikely(ctx)) { locks_check_ctx_lists(inode); @@ -891,7 +891,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl) void *owner; void (*func)(void); - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx || list_empty_careful(&ctx->flc_posix)) { fl->fl_type = F_UNLCK; return; @@ -1483,7 +1483,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) new_fl->fl_flags = type; /* typically we will check that ctx is non-NULL before calling */ - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx) { WARN_ON_ONCE(1); goto free_lock; @@ -1588,7 +1588,7 @@ void lease_get_mtime(struct inode *inode, struct timespec64 *time) struct file_lock_context *ctx; struct file_lock *fl; - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (ctx && !list_empty_careful(&ctx->flc_lease)) { spin_lock(&ctx->flc_lock); fl = list_first_entry_or_null(&ctx->flc_lease, @@ -1634,7 +1634,7 @@ int fcntl_getlease(struct file *filp) int type = F_UNLCK; LIST_HEAD(dispose); - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (ctx && !list_empty_careful(&ctx->flc_lease)) { percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); @@ -1823,7 +1823,7 @@ static int generic_delete_lease(struct file *filp, void *owner) struct file_lock_context *ctx; LIST_HEAD(dispose); - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx) { trace_generic_delete_lease(inode, NULL); return error; @@ -2562,7 +2562,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner) * posix_lock_file(). Another process could be setting a lock on this * file at the same time, but we wouldn't remove that lock anyway. */ - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx || list_empty(&ctx->flc_posix)) return; @@ -2635,7 +2635,7 @@ void locks_remove_file(struct file *filp) { struct file_lock_context *ctx; - ctx = smp_load_acquire(&locks_inode(filp)->i_flctx); + ctx = locks_inode_context(locks_inode(filp)); if (!ctx) return; @@ -2682,7 +2682,7 @@ bool vfs_inode_has_locks(struct inode *inode) struct file_lock_context *ctx; bool ret; - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx) return false; @@ -2863,7 +2863,7 @@ void show_fd_locks(struct seq_file *f, struct file_lock_context *ctx; int id = 0; - ctx = smp_load_acquire(&inode->i_flctx); + ctx = locks_inode_context(inode); if (!ctx) return; diff --git a/fs/namespace.c b/fs/namespace.c index 29a8d90dd..1533550f7 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -4172,10 +4172,15 @@ static int do_mount_setattr(struct path *path, struct mount_kattr *kattr) /* * If this is an attached mount make sure it's located in the callers * mount namespace. If it's not don't let the caller interact with it. - * If this is a detached mount make sure it has an anonymous mount - * namespace attached to it, i.e. we've created it via OPEN_TREE_CLONE. + * + * If this mount doesn't have a parent it's most often simply a + * detached mount with an anonymous mount namespace. IOW, something + * that's simply not attached yet. But there are apparently also users + * that do change mount properties on the rootfs itself. That obviously + * neither has a parent nor is it a detached mount so we cannot + * unconditionally check for detached mounts. */ - if (!(mnt_has_parent(mnt) ? check_mnt(mnt) : is_anon_ns(mnt->mnt_ns))) + if ((mnt_has_parent(mnt) || !is_anon_ns(mnt->mnt_ns)) && !check_mnt(mnt)) goto out; /* diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 3fa77ad72..c8a57cfde 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -9,10 +9,10 @@ #define _TRACE_NFS4_H #include <linux/tracepoint.h> -#include <trace/events/sunrpc_base.h> +#include <trace/misc/sunrpc.h> -#include <trace/events/fs.h> -#include <trace/events/nfs.h> +#include <trace/misc/fs.h> +#include <trace/misc/nfs.h> #define show_nfs_fattr_flags(valid) \ __print_flags((unsigned long)valid, "|", \ diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 8c6cc5867..642f69218 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -11,9 +11,9 @@ #include <linux/tracepoint.h> #include <linux/iversion.h> -#include <trace/events/fs.h> -#include <trace/events/nfs.h> -#include <trace/events/sunrpc_base.h> +#include <trace/misc/fs.h> +#include <trace/misc/nfs.h> +#include <trace/misc/sunrpc.h> #define nfs_show_cache_validity(v) \ __print_flags(v, "|", \ diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f41d24b54..6a0606668 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -667,8 +667,10 @@ static int nfs_writepage_locked(struct page *page, int err; if (wbc->sync_mode == WB_SYNC_NONE && - NFS_SERVER(inode)->write_congested) + NFS_SERVER(inode)->write_congested) { + redirty_page_for_writepage(wbc, page); return AOP_WRITEPAGE_ACTIVATE; + } nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); nfs_pageio_init_write(&pgio, inode, 0, diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index f6a2fd301..7c441f2bd 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -8,6 +8,7 @@ config NFSD select SUNRPC select EXPORTFS select NFS_ACL_SUPPORT if NFSD_V2_ACL + select NFS_ACL_SUPPORT if NFSD_V3_ACL depends on MULTIUSER help Choose Y here if you want to allow other computers to access @@ -26,19 +27,29 @@ config NFSD Below you can choose which versions of the NFS protocol are available to clients mounting the NFS server on this system. - Support for NFS version 2 (RFC 1094) is always available when + Support for NFS version 3 (RFC 1813) is always available when CONFIG_NFSD is selected. If unsure, say N. -config NFSD_V2_ACL - bool +config NFSD_V2 + bool "NFS server support for NFS version 2 (DEPRECATED)" depends on NFSD + default n + help + NFSv2 (RFC 1094) was the first publicly-released version of NFS. + Unless you are hosting ancient (1990's era) NFS clients, you don't + need this. + + If unsure, say N. + +config NFSD_V2_ACL + bool "NFS server support for the NFSv2 ACL protocol extension" + depends on NFSD_V2 config NFSD_V3_ACL bool "NFS server support for the NFSv3 ACL protocol extension" depends on NFSD - select NFSD_V2_ACL help Solaris NFS servers support an auxiliary NFSv3 ACL protocol that never became an official part of the NFS version 3 protocol. diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index 805c06d5f..6fffc8f03 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile @@ -10,9 +10,10 @@ obj-$(CONFIG_NFSD) += nfsd.o # this one should be compiled first, as the tracing macros can easily blow up nfsd-y += trace.o -nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ - export.o auth.o lockd.o nfscache.o nfsxdr.o \ +nfsd-y += nfssvc.o nfsctl.o nfsfh.o vfs.o \ + export.o auth.o lockd.o nfscache.o \ stats.o filecache.o nfs3proc.o nfs3xdr.o +nfsd-$(CONFIG_NFSD_V2) += nfsproc.o nfsxdr.o nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index e7e6e78d9..01d7fd108 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -12,6 +12,7 @@ #include "blocklayoutxdr.h" #include "pnfs.h" #include "filecache.h" +#include "vfs.h" #define NFSDDBG_FACILITY NFSDDBG_PNFS diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c index 2455dc8be..1ed2f691e 100644 --- a/fs/nfsd/blocklayoutxdr.c +++ b/fs/nfsd/blocklayoutxdr.c @@ -9,6 +9,7 @@ #include "nfsd.h" #include "blocklayoutxdr.h" +#include "vfs.h" #define NFSDDBG_FACILITY NFSDDBG_PNFS diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h index ee0e3aba4..d03f7f6a8 100644 --- a/fs/nfsd/export.h +++ b/fs/nfsd/export.h @@ -115,7 +115,6 @@ struct svc_export * rqst_find_fsidzero_export(struct svc_rqst *); int exp_rootfh(struct net *, struct auth_domain *, char *path, struct knfsd_fh *, int maxsize); __be32 exp_pseudoroot(struct svc_rqst *, struct svc_fh *); -__be32 nfserrno(int errno); static inline void exp_put(struct svc_export *exp) { diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 5cb8cce15..697acf5c3 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -1,7 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * Open file cache. + * The NFSD open file cache. * * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com> + * + * An nfsd_file object is a per-file collection of open state that binds + * together: + * - a struct file * + * - a user credential + * - a network namespace + * - a read-ahead context + * - monitoring for writeback errors + * + * nfsd_file objects are reference-counted. Consumers acquire a new + * object via the nfsd_file_acquire API. They manage their interest in + * the acquired object, and hence the object's reference count, via + * nfsd_file_get and nfsd_file_put. There are two varieties of nfsd_file + * object: + * + * * non-garbage-collected: When a consumer wants to precisely control + * the lifetime of a file's open state, it acquires a non-garbage- + * collected nfsd_file. The final nfsd_file_put releases the open + * state immediately. + * + * * garbage-collected: When a consumer does not control the lifetime + * of open state, it acquires a garbage-collected nfsd_file. The + * final nfsd_file_put allows the open state to linger for a period + * during which it may be re-used. */ #include <linux/hash.h> @@ -186,12 +211,9 @@ static const struct rhashtable_params nfsd_file_rhash_params = { static void nfsd_file_schedule_laundrette(void) { - if ((atomic_read(&nfsd_file_rhash_tbl.nelems) == 0) || - test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0) - return; - - queue_delayed_work(system_wq, &nfsd_filecache_laundrette, - NFSD_LAUNDRETTE_DELAY); + if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags)) + queue_delayed_work(system_wq, &nfsd_filecache_laundrette, + NFSD_LAUNDRETTE_DELAY); } static void @@ -589,7 +611,8 @@ static void nfsd_file_gc_worker(struct work_struct *work) { nfsd_file_gc(); - nfsd_file_schedule_laundrette(); + if (list_lru_count(&nfsd_file_lru)) + nfsd_file_schedule_laundrette(); } static unsigned long diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c index 070f90ed0..3ca530444 100644 --- a/fs/nfsd/flexfilelayout.c +++ b/fs/nfsd/flexfilelayout.c @@ -15,6 +15,7 @@ #include "flexfilelayoutxdr.h" #include "pnfs.h" +#include "vfs.h" #define NFSDDBG_FACILITY NFSDDBG_PNFS diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 8c854ba32..51a4b7885 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -195,7 +195,7 @@ struct nfsd_net { atomic_t nfsd_courtesy_clients; struct shrinker nfsd_client_shrinker; - struct delayed_work nfsd_shrinker_work; + struct work_struct nfsd_shrinker_work; }; /* Simple check to find out if a given net was properly initialized */ diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 39989c14c..4eae2c5af 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -76,6 +76,17 @@ static __be32 *xdr_encode_empty_array(__be32 *p) * 1 Protocol" */ +static void encode_uint32(struct xdr_stream *xdr, u32 n) +{ + WARN_ON_ONCE(xdr_stream_encode_u32(xdr, n) < 0); +} + +static void encode_bitmap4(struct xdr_stream *xdr, const __u32 *bitmap, + size_t len) +{ + WARN_ON_ONCE(xdr_stream_encode_uint32_array(xdr, bitmap, len) < 0); +} + /* * nfs_cb_opnum4 * @@ -329,6 +340,24 @@ static void encode_cb_recall4args(struct xdr_stream *xdr, } /* + * CB_RECALLANY4args + * + * struct CB_RECALLANY4args { + * uint32_t craa_objects_to_keep; + * bitmap4 craa_type_mask; + * }; + */ +static void +encode_cb_recallany4args(struct xdr_stream *xdr, + struct nfs4_cb_compound_hdr *hdr, struct nfsd4_cb_recall_any *ra) +{ + encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY); + encode_uint32(xdr, ra->ra_keep); + encode_bitmap4(xdr, ra->ra_bmval, ARRAY_SIZE(ra->ra_bmval)); + hdr->nops++; +} + +/* * CB_SEQUENCE4args * * struct CB_SEQUENCE4args { @@ -482,6 +511,26 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, encode_cb_nops(&hdr); } +/* + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects + */ +static void +nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req, + struct xdr_stream *xdr, const void *data) +{ + const struct nfsd4_callback *cb = data; + struct nfsd4_cb_recall_any *ra; + struct nfs4_cb_compound_hdr hdr = { + .ident = cb->cb_clp->cl_cb_ident, + .minorversion = cb->cb_clp->cl_minorversion, + }; + + ra = container_of(cb, struct nfsd4_cb_recall_any, ra_cb); + encode_cb_compound4args(xdr, &hdr); + encode_cb_sequence4args(xdr, cb, &hdr); + encode_cb_recallany4args(xdr, &hdr, ra); + encode_cb_nops(&hdr); +} /* * NFSv4.0 and NFSv4.1 XDR decode functions @@ -520,6 +569,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status); } +/* + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects + */ +static int +nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + void *data) +{ + struct nfsd4_callback *cb = data; + struct nfs4_cb_compound_hdr hdr; + int status; + + status = decode_cb_compound4res(xdr, &hdr); + if (unlikely(status)) + return status; + status = decode_cb_sequence4res(xdr, cb); + if (unlikely(status || cb->cb_seq_status)) + return status; + status = decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status); + return status; +} + #ifdef CONFIG_NFSD_PNFS /* * CB_LAYOUTRECALL4args @@ -783,6 +854,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = { #endif PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock), PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload), + PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any), }; static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)]; diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index e70a1a299..5e9809aff 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -41,6 +41,7 @@ #include "idmap.h" #include "nfsd.h" #include "netns.h" +#include "vfs.h" /* * Turn off idmapping when using AUTH_SYS. diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index a9105e95b..ba53cd89e 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -943,12 +943,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &read->rd_stateid, RD_STATE, &read->rd_nf, NULL); - if (status) { - dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); - goto out; - } - status = nfs_ok; -out: + read->rd_rqstp = rqstp; read->rd_fhp = &cstate->current_fh; return status; @@ -1117,10 +1112,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &setattr->sa_stateid, WR_STATE, NULL, NULL); - if (status) { - dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); + if (status) return status; - } } err = fh_want_write(&cstate->current_fh); if (err) @@ -1170,10 +1163,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, write->wr_offset, cnt); status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, stateid, WR_STATE, &nf, NULL); - if (status) { - dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); + if (status) return status; - } write->wr_how_written = write->wr_stable_how; @@ -1204,17 +1195,13 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh, src_stateid, RD_STATE, src, NULL); - if (status) { - dprintk("NFSD: %s: couldn't process src stateid!\n", __func__); + if (status) goto out; - } status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, dst_stateid, WR_STATE, dst, NULL); - if (status) { - dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__); + if (status) goto out_put_src; - } /* fix up for NFS-specific error code */ if (!S_ISREG(file_inode((*src)->nf_file)->i_mode) || @@ -1935,10 +1922,8 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &fallocate->falloc_stateid, WR_STATE, &nf, NULL); - if (status != nfs_ok) { - dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n"); + if (status != nfs_ok) return status; - } status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, nf->nf_file, fallocate->falloc_offset, @@ -1994,10 +1979,8 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &seek->seek_stateid, RD_STATE, &nf, NULL); - if (status) { - dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n"); + if (status) return status; - } switch (seek->seek_whence) { case NFS4_CONTENT_DATA: diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index faecdbfa0..b9d694ec2 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -44,7 +44,9 @@ #include <linux/jhash.h> #include <linux/string_helpers.h> #include <linux/fsnotify.h> +#include <linux/rhashtable.h> #include <linux/nfs_ssc.h> + #include "xdr4.h" #include "xdr4cb.h" #include "vfs.h" @@ -84,6 +86,7 @@ static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) static void nfs4_free_ol_stateid(struct nfs4_stid *stid); void nfsd4_end_grace(struct nfsd_net *nn); static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps); +static void nfsd4_file_hash_remove(struct nfs4_file *fi); /* Locking: */ @@ -588,11 +591,8 @@ static void nfsd4_free_file_rcu(struct rcu_head *rcu) void put_nfs4_file(struct nfs4_file *fi) { - might_lock(&state_lock); - - if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) { - hlist_del_rcu(&fi->fi_hash); - spin_unlock(&state_lock); + if (refcount_dec_and_test(&fi->fi_ref)) { + nfsd4_file_hash_remove(fi); WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate)); WARN_ON_ONCE(!list_empty(&fi->fi_delegations)); call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu); @@ -717,19 +717,20 @@ static unsigned int ownerstr_hashval(struct xdr_netobj *ownername) return ret & OWNER_HASH_MASK; } -/* hash table for nfs4_file */ -#define FILE_HASH_BITS 8 -#define FILE_HASH_SIZE (1 << FILE_HASH_BITS) +static struct rhltable nfs4_file_rhltable ____cacheline_aligned_in_smp; -static unsigned int file_hashval(struct svc_fh *fh) -{ - struct inode *inode = d_inode(fh->fh_dentry); - - /* XXX: why not (here & in file cache) use inode? */ - return (unsigned int)hash_long(inode->i_ino, FILE_HASH_BITS); -} +static const struct rhashtable_params nfs4_file_rhash_params = { + .key_len = sizeof_field(struct nfs4_file, fi_inode), + .key_offset = offsetof(struct nfs4_file, fi_inode), + .head_offset = offsetof(struct nfs4_file, fi_rlist), -static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; + /* + * Start with a single page hash table to reduce resizing churn + * on light workloads. + */ + .min_size = 256, + .automatic_shrinking = true, +}; /* * Check if courtesy clients have conflicting access and resolve it if possible @@ -1367,6 +1368,8 @@ static void revoke_delegation(struct nfs4_delegation *dp) WARN_ON(!list_empty(&dp->dl_recall_lru)); + trace_nfsd_stid_revoke(&dp->dl_stid); + if (clp->cl_minorversion) { spin_lock(&clp->cl_lock); dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID; @@ -1831,13 +1834,12 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs, int numslots = fattrs->maxreqs; int slotsize = slot_bytes(fattrs); struct nfsd4_session *new; - int mem, i; + int i; - BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot *) - + sizeof(struct nfsd4_session) > PAGE_SIZE); - mem = numslots * sizeof(struct nfsd4_slot *); + BUILD_BUG_ON(struct_size(new, se_slots, NFSD_MAX_SLOTS_PER_SESSION) + > PAGE_SIZE); - new = kzalloc(sizeof(*new) + mem, GFP_KERNEL); + new = kzalloc(struct_size(new, se_slots, numslots), GFP_KERNEL); if (!new) return NULL; /* allocate each struct nfsd4_slot and data cache in one piece */ @@ -2143,6 +2145,7 @@ static void __free_client(struct kref *k) kfree(clp->cl_nii_domain.data); kfree(clp->cl_nii_name.data); idr_destroy(&clp->cl_stateids); + kfree(clp->cl_ra); kmem_cache_free(client_slab, clp); } @@ -2870,6 +2873,37 @@ static const struct tree_descr client_files[] = { [3] = {""}, }; +static int +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb, + struct rpc_task *task) +{ + trace_nfsd_cb_recall_any_done(cb, task); + switch (task->tk_status) { + case -NFS4ERR_DELAY: + rpc_delay(task, 2 * HZ); + return 0; + default: + return 1; + } +} + +static void +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb) +{ + struct nfs4_client *clp = cb->cb_clp; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + spin_lock(&nn->client_lock); + clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); + put_client_renew_locked(clp); + spin_unlock(&nn->client_lock); +} + +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = { + .done = nfsd4_cb_recall_any_done, + .release = nfsd4_cb_recall_any_release, +}; + static struct nfs4_client *create_client(struct xdr_netobj name, struct svc_rqst *rqstp, nfs4_verifier *verf) { @@ -2907,6 +2941,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name, free_client(clp); return NULL; } + clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL); + if (!clp->cl_ra) { + free_client(clp); + return NULL; + } + clp->cl_ra_time = 0; + nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops, + NFSPROC4_CLNT_CB_RECALL_ANY); return clp; } @@ -4276,11 +4318,9 @@ static struct nfs4_file *nfsd4_alloc_file(void) } /* OPEN Share state helper functions */ -static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval, - struct nfs4_file *fp) -{ - lockdep_assert_held(&state_lock); +static void nfsd4_file_init(const struct svc_fh *fh, struct nfs4_file *fp) +{ refcount_set(&fp->fi_ref, 1); spin_lock_init(&fp->fi_lock); INIT_LIST_HEAD(&fp->fi_stateids); @@ -4298,7 +4338,6 @@ static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval, INIT_LIST_HEAD(&fp->fi_lo_states); atomic_set(&fp->fi_lo_recalls, 0); #endif - hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]); } void @@ -4363,25 +4402,27 @@ out: } static unsigned long -nfsd_courtesy_client_count(struct shrinker *shrink, struct shrink_control *sc) +nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) { - int cnt; + int count; struct nfsd_net *nn = container_of(shrink, struct nfsd_net, nfsd_client_shrinker); - cnt = atomic_read(&nn->nfsd_courtesy_clients); - if (cnt > 0) - mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0); - return (unsigned long)cnt; + count = atomic_read(&nn->nfsd_courtesy_clients); + if (!count) + count = atomic_long_read(&num_delegations); + if (count) + queue_work(laundry_wq, &nn->nfsd_shrinker_work); + return (unsigned long)count; } static unsigned long -nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc) +nfsd4_state_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc) { return SHRINK_STOP; } -int +void nfsd4_init_leases_net(struct nfsd_net *nn) { struct sysinfo si; @@ -4403,16 +4444,6 @@ nfsd4_init_leases_net(struct nfsd_net *nn) nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB); atomic_set(&nn->nfsd_courtesy_clients, 0); - nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan; - nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count; - nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; - return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client"); -} - -void -nfsd4_leases_net_shutdown(struct nfsd_net *nn) -{ - unregister_shrinker(&nn->nfsd_client_shrinker); } static void init_nfs4_replay(struct nfs4_replay *rp) @@ -4683,71 +4714,80 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) nfs4_put_stid(&last->st_stid); } -/* search file_hashtbl[] for file */ -static struct nfs4_file * -find_file_locked(struct svc_fh *fh, unsigned int hashval) +static noinline_for_stack struct nfs4_file * +nfsd4_file_hash_lookup(const struct svc_fh *fhp) { - struct nfs4_file *fp; + struct inode *inode = d_inode(fhp->fh_dentry); + struct rhlist_head *tmp, *list; + struct nfs4_file *fi; - hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash, - lockdep_is_held(&state_lock)) { - if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) { - if (refcount_inc_not_zero(&fp->fi_ref)) - return fp; + rcu_read_lock(); + list = rhltable_lookup(&nfs4_file_rhltable, &inode, + nfs4_file_rhash_params); + rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) { + if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { + if (refcount_inc_not_zero(&fi->fi_ref)) { + rcu_read_unlock(); + return fi; + } } } + rcu_read_unlock(); return NULL; } -static struct nfs4_file *insert_file(struct nfs4_file *new, struct svc_fh *fh, - unsigned int hashval) +/* + * On hash insertion, identify entries with the same inode but + * distinct filehandles. They will all be on the list returned + * by rhltable_lookup(). + * + * inode->i_lock prevents racing insertions from adding an entry + * for the same inode/fhp pair twice. + */ +static noinline_for_stack struct nfs4_file * +nfsd4_file_hash_insert(struct nfs4_file *new, const struct svc_fh *fhp) { - struct nfs4_file *fp; + struct inode *inode = d_inode(fhp->fh_dentry); + struct rhlist_head *tmp, *list; struct nfs4_file *ret = NULL; bool alias_found = false; + struct nfs4_file *fi; + int err; - spin_lock(&state_lock); - hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash, - lockdep_is_held(&state_lock)) { - if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) { - if (refcount_inc_not_zero(&fp->fi_ref)) - ret = fp; - } else if (d_inode(fh->fh_dentry) == fp->fi_inode) - fp->fi_aliased = alias_found = true; - } - if (likely(ret == NULL)) { - nfsd4_init_file(fh, hashval, new); - new->fi_aliased = alias_found; - ret = new; + rcu_read_lock(); + spin_lock(&inode->i_lock); + + list = rhltable_lookup(&nfs4_file_rhltable, &inode, + nfs4_file_rhash_params); + rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) { + if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { + if (refcount_inc_not_zero(&fi->fi_ref)) + ret = fi; + } else + fi->fi_aliased = alias_found = true; } - spin_unlock(&state_lock); - return ret; -} + if (ret) + goto out_unlock; -static struct nfs4_file * find_file(struct svc_fh *fh) -{ - struct nfs4_file *fp; - unsigned int hashval = file_hashval(fh); + nfsd4_file_init(fhp, new); + err = rhltable_insert(&nfs4_file_rhltable, &new->fi_rlist, + nfs4_file_rhash_params); + if (err) + goto out_unlock; - rcu_read_lock(); - fp = find_file_locked(fh, hashval); + new->fi_aliased = alias_found; + ret = new; + +out_unlock: + spin_unlock(&inode->i_lock); rcu_read_unlock(); - return fp; + return ret; } -static struct nfs4_file * -find_or_add_file(struct nfs4_file *new, struct svc_fh *fh) +static noinline_for_stack void nfsd4_file_hash_remove(struct nfs4_file *fi) { - struct nfs4_file *fp; - unsigned int hashval = file_hashval(fh); - - rcu_read_lock(); - fp = find_file_locked(fh, hashval); - rcu_read_unlock(); - if (fp) - return fp; - - return insert_file(new, fh, hashval); + rhltable_remove(&nfs4_file_rhltable, &fi->fi_rlist, + nfs4_file_rhash_params); } /* @@ -4760,9 +4800,10 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) struct nfs4_file *fp; __be32 ret = nfs_ok; - fp = find_file(current_fh); + fp = nfsd4_file_hash_lookup(current_fh); if (!fp) return ret; + /* Check for conflicting share reservations */ spin_lock(&fp->fi_lock); if (fp->fi_share_deny & deny_type) @@ -4774,7 +4815,7 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) static bool nfsd4_deleg_present(const struct inode *inode) { - struct file_lock_context *ctx = smp_load_acquire(&inode->i_flctx); + struct file_lock_context *ctx = locks_inode_context(inode); return ctx && !list_empty_careful(&ctx->flc_lease); } @@ -4908,10 +4949,8 @@ nfsd_break_deleg_cb(struct file_lock *fl) */ fl->fl_break_time = 0; - spin_lock(&fp->fi_lock); fp->fi_had_conflict = true; nfsd_break_one_deleg(dp); - spin_unlock(&fp->fi_lock); return false; } @@ -5499,12 +5538,13 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, if (status) goto out_unlock; + status = -EAGAIN; + if (fp->fi_had_conflict) + goto out_unlock; + spin_lock(&state_lock); spin_lock(&fp->fi_lock); - if (fp->fi_had_conflict) - status = -EAGAIN; - else - status = hash_delegation_locked(dp, fp); + status = hash_delegation_locked(dp, fp); spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); @@ -5656,7 +5696,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf * and check for delegations in the process of being recalled. * If not found, create the nfs4_file struct */ - fp = find_or_add_file(open->op_file, current_fh); + fp = nfsd4_file_hash_insert(open->op_file, current_fh); + if (unlikely(!fp)) + return nfserr_jukebox; if (fp != open->op_file) { status = nfs4_check_deleg(cl, open, &dp); if (status) @@ -5933,7 +5975,7 @@ nfs4_lockowner_has_blockers(struct nfs4_lockowner *lo) list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) { nf = stp->st_stid.sc_file; - ctx = nf->fi_inode->i_flctx; + ctx = locks_inode_context(nf->fi_inode); if (!ctx) continue; if (locks_owner_has_blockers(ctx, lo)) @@ -6161,17 +6203,63 @@ laundromat_main(struct work_struct *laundry) } static void -courtesy_client_reaper(struct work_struct *reaper) +courtesy_client_reaper(struct nfsd_net *nn) { struct list_head reaplist; - struct delayed_work *dwork = to_delayed_work(reaper); - struct nfsd_net *nn = container_of(dwork, struct nfsd_net, - nfsd_shrinker_work); nfs4_get_courtesy_client_reaplist(nn, &reaplist); nfs4_process_client_reaplist(&reaplist); } +static void +deleg_reaper(struct nfsd_net *nn) +{ + struct list_head *pos, *next; + struct nfs4_client *clp; + struct list_head cblist; + + INIT_LIST_HEAD(&cblist); + spin_lock(&nn->client_lock); + list_for_each_safe(pos, next, &nn->client_lru) { + clp = list_entry(pos, struct nfs4_client, cl_lru); + if (clp->cl_state != NFSD4_ACTIVE || + list_empty(&clp->cl_delegations) || + atomic_read(&clp->cl_delegs_in_recall) || + test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) || + (ktime_get_boottime_seconds() - + clp->cl_ra_time < 5)) { + continue; + } + list_add(&clp->cl_ra_cblist, &cblist); + + /* release in nfsd4_cb_recall_any_release */ + atomic_inc(&clp->cl_rpc_users); + set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); + clp->cl_ra_time = ktime_get_boottime_seconds(); + } + spin_unlock(&nn->client_lock); + + while (!list_empty(&cblist)) { + clp = list_first_entry(&cblist, struct nfs4_client, + cl_ra_cblist); + list_del_init(&clp->cl_ra_cblist); + clp->cl_ra->ra_keep = 0; + clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG); + trace_nfsd_cb_recall_any(clp->cl_ra); + nfsd4_run_cb(&clp->cl_ra->ra_cb); + } +} + +static void +nfsd4_state_shrinker_worker(struct work_struct *work) +{ + struct nfsd_net *nn = container_of(work, struct nfsd_net, + nfsd_shrinker_work); + + courtesy_client_reaper(nn); + deleg_reaper(nn); +} + static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp) { if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle)) @@ -6936,6 +7024,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto put_stateid; + trace_nfsd_deleg_return(stateid); wake_up_var(d_inode(cstate->current_fh.fh_dentry)); destroy_delegation(dp); put_stateid: @@ -7736,18 +7825,20 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) { struct file_lock *fl; int status = false; - struct nfsd_file *nf = find_any_file(fp); + struct nfsd_file *nf; struct inode *inode; struct file_lock_context *flctx; + spin_lock(&fp->fi_lock); + nf = find_any_file_locked(fp); if (!nf) { /* Any valid lock stateid should have some sort of access */ WARN_ON_ONCE(1); - return status; + goto out; } inode = locks_inode(nf->nf_file); - flctx = inode->i_flctx; + flctx = locks_inode_context(inode); if (flctx && !list_empty_careful(&flctx->flc_posix)) { spin_lock(&flctx->flc_lock); @@ -7759,7 +7850,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) } spin_unlock(&flctx->flc_lock); } - nfsd_file_put(nf); +out: + spin_unlock(&fp->fi_lock); return status; } @@ -7769,10 +7861,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) * @cstate: NFSv4 COMPOUND state * @u: RELEASE_LOCKOWNER arguments * - * The lockowner's so_count is bumped when a lock record is added - * or when copying a conflicting lock. The latter case is brief, - * but can lead to fleeting false positives when looking for - * locks-in-use. + * Check if theree are any locks still held and if not - free the lockowner + * and any lock state that is owned. * * Return values: * %nfs_ok: lockowner released or not found @@ -7808,10 +7898,13 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, spin_unlock(&clp->cl_lock); return nfs_ok; } - if (atomic_read(&lo->lo_owner.so_count) != 2) { - spin_unlock(&clp->cl_lock); - nfs4_put_stateowner(&lo->lo_owner); - return nfserr_locks_held; + + list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) { + if (check_for_locks(stp->st_stid.sc_file, lo)) { + spin_unlock(&clp->cl_lock); + nfs4_put_stateowner(&lo->lo_owner); + return nfserr_locks_held; + } } unhash_lockowner_locked(lo); while (!list_empty(&lo->lo_owner.so_stateids)) { @@ -7992,11 +8085,20 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->blocked_locks_lru); INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); - INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper); + INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker); get_net(net); + nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan; + nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count; + nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; + + if (register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client")) + goto err_shrinker; return 0; +err_shrinker: + put_net(net); + kfree(nn->sessionid_hashtbl); err_sessionid: kfree(nn->unconf_id_hashtbl); err_unconf_id: @@ -8068,10 +8170,16 @@ nfs4_state_start(void) { int ret; - ret = nfsd4_create_callback_queue(); + ret = rhltable_init(&nfs4_file_rhltable, &nfs4_file_rhash_params); if (ret) return ret; + ret = nfsd4_create_callback_queue(); + if (ret) { + rhltable_destroy(&nfs4_file_rhltable); + return ret; + } + set_max_delegations(); return 0; } @@ -8083,6 +8191,8 @@ nfs4_state_shutdown_net(struct net *net) struct list_head *pos, *next, reaplist; struct nfsd_net *nn = net_generic(net, nfsd_net_id); + unregister_shrinker(&nn->nfsd_client_shrinker); + cancel_work(&nn->nfsd_shrinker_work); cancel_delayed_work_sync(&nn->laundromat_work); locks_end_grace(&nn->nfsd4_manager); @@ -8111,6 +8221,7 @@ void nfs4_state_shutdown(void) { nfsd4_destroy_callback_queue(); + rhltable_destroy(&nfs4_file_rhltable); } static void diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 89a579be0..597f14a80 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -770,16 +770,18 @@ nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs) static __be32 nfsd4_decode_access(struct nfsd4_compoundargs *argp, - struct nfsd4_access *access) + union nfsd4_op_u *u) { + struct nfsd4_access *access = &u->access; if (xdr_stream_decode_u32(argp->xdr, &access->ac_req_access) < 0) return nfserr_bad_xdr; return nfs_ok; } static __be32 -nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) +nfsd4_decode_close(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_close *close = &u->close; if (xdr_stream_decode_u32(argp->xdr, &close->cl_seqid) < 0) return nfserr_bad_xdr; return nfsd4_decode_stateid4(argp, &close->cl_stateid); @@ -787,8 +789,9 @@ nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) static __be32 -nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit) +nfsd4_decode_commit(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_commit *commit = &u->commit; if (xdr_stream_decode_u64(argp->xdr, &commit->co_offset) < 0) return nfserr_bad_xdr; if (xdr_stream_decode_u32(argp->xdr, &commit->co_count) < 0) @@ -798,8 +801,9 @@ nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit } static __be32 -nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create) +nfsd4_decode_create(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_create *create = &u->create; __be32 *p, status; memset(create, 0, sizeof(*create)); @@ -844,22 +848,25 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create } static inline __be32 -nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr) +nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_delegreturn *dr = &u->delegreturn; return nfsd4_decode_stateid4(argp, &dr->dr_stateid); } static inline __be32 -nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *getattr) +nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_getattr *getattr = &u->getattr; memset(getattr, 0, sizeof(*getattr)); return nfsd4_decode_bitmap4(argp, getattr->ga_bmval, ARRAY_SIZE(getattr->ga_bmval)); } static __be32 -nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link) +nfsd4_decode_link(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_link *link = &u->link; memset(link, 0, sizeof(*link)); return nfsd4_decode_component4(argp, &link->li_name, &link->li_namelen); } @@ -907,8 +914,9 @@ nfsd4_decode_locker4(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) } static __be32 -nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) +nfsd4_decode_lock(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_lock *lock = &u->lock; memset(lock, 0, sizeof(*lock)); if (xdr_stream_decode_u32(argp->xdr, &lock->lk_type) < 0) return nfserr_bad_xdr; @@ -924,8 +932,9 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) } static __be32 -nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) +nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_lockt *lockt = &u->lockt; memset(lockt, 0, sizeof(*lockt)); if (xdr_stream_decode_u32(argp->xdr, &lockt->lt_type) < 0) return nfserr_bad_xdr; @@ -940,8 +949,9 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) } static __be32 -nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) +nfsd4_decode_locku(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_locku *locku = &u->locku; __be32 status; if (xdr_stream_decode_u32(argp->xdr, &locku->lu_type) < 0) @@ -962,8 +972,9 @@ nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) } static __be32 -nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup) +nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_lookup *lookup = &u->lookup; return nfsd4_decode_component4(argp, &lookup->lo_name, &lookup->lo_len); } @@ -1143,8 +1154,9 @@ nfsd4_decode_open_claim4(struct nfsd4_compoundargs *argp, } static __be32 -nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) +nfsd4_decode_open(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_open *open = &u->open; __be32 status; u32 dummy; @@ -1171,8 +1183,10 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) } static __be32 -nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_confirm *open_conf) +nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_open_confirm *open_conf = &u->open_confirm; __be32 status; if (argp->minorversion >= 1) @@ -1190,8 +1204,10 @@ nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_con } static __be32 -nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_downgrade *open_down) +nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_open_downgrade *open_down = &u->open_downgrade; __be32 status; memset(open_down, 0, sizeof(*open_down)); @@ -1209,8 +1225,9 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d } static __be32 -nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) +nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_putfh *putfh = &u->putfh; __be32 *p; if (xdr_stream_decode_u32(argp->xdr, &putfh->pf_fhlen) < 0) @@ -1229,7 +1246,7 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) } static __be32 -nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p) +nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p) { if (argp->minorversion == 0) return nfs_ok; @@ -1237,8 +1254,9 @@ nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p) } static __be32 -nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) +nfsd4_decode_read(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_read *read = &u->read; __be32 status; memset(read, 0, sizeof(*read)); @@ -1254,8 +1272,9 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) } static __be32 -nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *readdir) +nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_readdir *readdir = &u->readdir; __be32 status; memset(readdir, 0, sizeof(*readdir)); @@ -1276,15 +1295,17 @@ nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *read } static __be32 -nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove) +nfsd4_decode_remove(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_remove *remove = &u->remove; memset(&remove->rm_cinfo, 0, sizeof(remove->rm_cinfo)); return nfsd4_decode_component4(argp, &remove->rm_name, &remove->rm_namelen); } static __be32 -nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename) +nfsd4_decode_rename(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_rename *rename = &u->rename; __be32 status; memset(rename, 0, sizeof(*rename)); @@ -1295,22 +1316,25 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename } static __be32 -nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid) +nfsd4_decode_renew(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + clientid_t *clientid = &u->renew; return nfsd4_decode_clientid4(argp, clientid); } static __be32 nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp, - struct nfsd4_secinfo *secinfo) + union nfsd4_op_u *u) { + struct nfsd4_secinfo *secinfo = &u->secinfo; secinfo->si_exp = NULL; return nfsd4_decode_component4(argp, &secinfo->si_name, &secinfo->si_namelen); } static __be32 -nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr) +nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_setattr *setattr = &u->setattr; __be32 status; memset(setattr, 0, sizeof(*setattr)); @@ -1324,8 +1348,9 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta } static __be32 -nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid *setclientid) +nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_setclientid *setclientid = &u->setclientid; __be32 *p, status; memset(setclientid, 0, sizeof(*setclientid)); @@ -1367,8 +1392,10 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient } static __be32 -nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid_confirm *scd_c) +nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_setclientid_confirm *scd_c = &u->setclientid_confirm; __be32 status; if (argp->minorversion >= 1) @@ -1382,8 +1409,9 @@ nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_s /* Also used for NVERIFY */ static __be32 -nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify) +nfsd4_decode_verify(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_verify *verify = &u->verify; __be32 *p, status; memset(verify, 0, sizeof(*verify)); @@ -1409,8 +1437,9 @@ nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify } static __be32 -nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) +nfsd4_decode_write(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_write *write = &u->write; __be32 status; status = nfsd4_decode_stateid4(argp, &write->wr_stateid); @@ -1434,8 +1463,10 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) } static __be32 -nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_release_lockowner *rlockowner) +nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner; __be32 status; if (argp->minorversion >= 1) @@ -1452,16 +1483,20 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel return nfs_ok; } -static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, struct nfsd4_backchannel_ctl *bc) +static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_backchannel_ctl *bc = &u->backchannel_ctl; memset(bc, 0, sizeof(*bc)); if (xdr_stream_decode_u32(argp->xdr, &bc->bc_cb_program) < 0) return nfserr_bad_xdr; return nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec); } -static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts) +static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session; u32 use_conn_in_rdma_mode; __be32 status; @@ -1603,8 +1638,9 @@ nfsd4_decode_nfs_impl_id4(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, - struct nfsd4_exchange_id *exid) + union nfsd4_op_u *u) { + struct nfsd4_exchange_id *exid = &u->exchange_id; __be32 status; memset(exid, 0, sizeof(*exid)); @@ -1656,8 +1692,9 @@ nfsd4_decode_channel_attrs4(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, - struct nfsd4_create_session *sess) + union nfsd4_op_u *u) { + struct nfsd4_create_session *sess = &u->create_session; __be32 status; memset(sess, 0, sizeof(*sess)); @@ -1681,23 +1718,26 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_destroy_session(struct nfsd4_compoundargs *argp, - struct nfsd4_destroy_session *destroy_session) + union nfsd4_op_u *u) { + struct nfsd4_destroy_session *destroy_session = &u->destroy_session; return nfsd4_decode_sessionid4(argp, &destroy_session->sessionid); } static __be32 nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp, - struct nfsd4_free_stateid *free_stateid) + union nfsd4_op_u *u) { + struct nfsd4_free_stateid *free_stateid = &u->free_stateid; return nfsd4_decode_stateid4(argp, &free_stateid->fr_stateid); } #ifdef CONFIG_NFSD_PNFS static __be32 nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp, - struct nfsd4_getdeviceinfo *gdev) + union nfsd4_op_u *u) { + struct nfsd4_getdeviceinfo *gdev = &u->getdeviceinfo; __be32 status; memset(gdev, 0, sizeof(*gdev)); @@ -1717,8 +1757,9 @@ nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp, - struct nfsd4_layoutcommit *lcp) + union nfsd4_op_u *u) { + struct nfsd4_layoutcommit *lcp = &u->layoutcommit; __be32 *p, status; memset(lcp, 0, sizeof(*lcp)); @@ -1753,8 +1794,9 @@ nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp, - struct nfsd4_layoutget *lgp) + union nfsd4_op_u *u) { + struct nfsd4_layoutget *lgp = &u->layoutget; __be32 status; memset(lgp, 0, sizeof(*lgp)); @@ -1781,8 +1823,9 @@ nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp, - struct nfsd4_layoutreturn *lrp) + union nfsd4_op_u *u) { + struct nfsd4_layoutreturn *lrp = &u->layoutreturn; memset(lrp, 0, sizeof(*lrp)); if (xdr_stream_decode_bool(argp->xdr, &lrp->lr_reclaim) < 0) return nfserr_bad_xdr; @@ -1795,8 +1838,9 @@ nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp, #endif /* CONFIG_NFSD_PNFS */ static __be32 nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp, - struct nfsd4_secinfo_no_name *sin) + union nfsd4_op_u *u) { + struct nfsd4_secinfo_no_name *sin = &u->secinfo_no_name; if (xdr_stream_decode_u32(argp->xdr, &sin->sin_style) < 0) return nfserr_bad_xdr; @@ -1806,8 +1850,9 @@ static __be32 nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, - struct nfsd4_sequence *seq) + union nfsd4_op_u *u) { + struct nfsd4_sequence *seq = &u->sequence; __be32 *p, status; status = nfsd4_decode_sessionid4(argp, &seq->sessionid); @@ -1826,8 +1871,10 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, } static __be32 -nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_stateid *test_stateid) +nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) { + struct nfsd4_test_stateid *test_stateid = &u->test_stateid; struct nfsd4_test_stateid_id *stateid; __be32 status; u32 i; @@ -1852,14 +1899,16 @@ nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_sta } static __be32 nfsd4_decode_destroy_clientid(struct nfsd4_compoundargs *argp, - struct nfsd4_destroy_clientid *dc) + union nfsd4_op_u *u) { + struct nfsd4_destroy_clientid *dc = &u->destroy_clientid; return nfsd4_decode_clientid4(argp, &dc->clientid); } static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, - struct nfsd4_reclaim_complete *rc) + union nfsd4_op_u *u) { + struct nfsd4_reclaim_complete *rc = &u->reclaim_complete; if (xdr_stream_decode_bool(argp->xdr, &rc->rca_one_fs) < 0) return nfserr_bad_xdr; return nfs_ok; @@ -1867,8 +1916,9 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp, - struct nfsd4_fallocate *fallocate) + union nfsd4_op_u *u) { + struct nfsd4_fallocate *fallocate = &u->allocate; __be32 status; status = nfsd4_decode_stateid4(argp, &fallocate->falloc_stateid); @@ -1924,8 +1974,9 @@ static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp, } static __be32 -nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) +nfsd4_decode_copy(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_copy *copy = &u->copy; u32 consecutive, i, count, sync; struct nl4_server *ns_dummy; __be32 status; @@ -1982,8 +2033,9 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) static __be32 nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp, - struct nfsd4_copy_notify *cn) + union nfsd4_op_u *u) { + struct nfsd4_copy_notify *cn = &u->copy_notify; __be32 status; memset(cn, 0, sizeof(*cn)); @@ -2002,16 +2054,18 @@ nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp, - struct nfsd4_offload_status *os) + union nfsd4_op_u *u) { + struct nfsd4_offload_status *os = &u->offload_status; os->count = 0; os->status = 0; return nfsd4_decode_stateid4(argp, &os->stateid); } static __be32 -nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) +nfsd4_decode_seek(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_seek *seek = &u->seek; __be32 status; status = nfsd4_decode_stateid4(argp, &seek->seek_stateid); @@ -2028,8 +2082,9 @@ nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) } static __be32 -nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone) +nfsd4_decode_clone(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { + struct nfsd4_clone *clone = &u->clone; __be32 status; status = nfsd4_decode_stateid4(argp, &clone->cl_src_stateid); @@ -2154,8 +2209,9 @@ nfsd4_decode_xattr_name(struct nfsd4_compoundargs *argp, char **namep) */ static __be32 nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp, - struct nfsd4_getxattr *getxattr) + union nfsd4_op_u *u) { + struct nfsd4_getxattr *getxattr = &u->getxattr; __be32 status; u32 maxcount; @@ -2173,8 +2229,9 @@ nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp, - struct nfsd4_setxattr *setxattr) + union nfsd4_op_u *u) { + struct nfsd4_setxattr *setxattr = &u->setxattr; u32 flags, maxcount, size; __be32 status; @@ -2214,8 +2271,9 @@ nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp, - struct nfsd4_listxattrs *listxattrs) + union nfsd4_op_u *u) { + struct nfsd4_listxattrs *listxattrs = &u->listxattrs; u32 maxcount; memset(listxattrs, 0, sizeof(*listxattrs)); @@ -2245,113 +2303,114 @@ nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_removexattr(struct nfsd4_compoundargs *argp, - struct nfsd4_removexattr *removexattr) + union nfsd4_op_u *u) { + struct nfsd4_removexattr *removexattr = &u->removexattr; memset(removexattr, 0, sizeof(*removexattr)); return nfsd4_decode_xattr_name(argp, &removexattr->rmxa_name); } static __be32 -nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) +nfsd4_decode_noop(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p) { return nfs_ok; } static __be32 -nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p) +nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p) { return nfserr_notsupp; } -typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *); +typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u); static const nfsd4_dec nfsd4_dec_ops[] = { - [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access, - [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close, - [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit, - [OP_CREATE] = (nfsd4_dec)nfsd4_decode_create, - [OP_DELEGPURGE] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_DELEGRETURN] = (nfsd4_dec)nfsd4_decode_delegreturn, - [OP_GETATTR] = (nfsd4_dec)nfsd4_decode_getattr, - [OP_GETFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_LINK] = (nfsd4_dec)nfsd4_decode_link, - [OP_LOCK] = (nfsd4_dec)nfsd4_decode_lock, - [OP_LOCKT] = (nfsd4_dec)nfsd4_decode_lockt, - [OP_LOCKU] = (nfsd4_dec)nfsd4_decode_locku, - [OP_LOOKUP] = (nfsd4_dec)nfsd4_decode_lookup, - [OP_LOOKUPP] = (nfsd4_dec)nfsd4_decode_noop, - [OP_NVERIFY] = (nfsd4_dec)nfsd4_decode_verify, - [OP_OPEN] = (nfsd4_dec)nfsd4_decode_open, - [OP_OPENATTR] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm, - [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade, - [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh, - [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_putpubfh, - [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_READ] = (nfsd4_dec)nfsd4_decode_read, - [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir, - [OP_READLINK] = (nfsd4_dec)nfsd4_decode_noop, - [OP_REMOVE] = (nfsd4_dec)nfsd4_decode_remove, - [OP_RENAME] = (nfsd4_dec)nfsd4_decode_rename, - [OP_RENEW] = (nfsd4_dec)nfsd4_decode_renew, - [OP_RESTOREFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_SAVEFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_SECINFO] = (nfsd4_dec)nfsd4_decode_secinfo, - [OP_SETATTR] = (nfsd4_dec)nfsd4_decode_setattr, - [OP_SETCLIENTID] = (nfsd4_dec)nfsd4_decode_setclientid, - [OP_SETCLIENTID_CONFIRM] = (nfsd4_dec)nfsd4_decode_setclientid_confirm, - [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify, - [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write, - [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner, + [OP_ACCESS] = nfsd4_decode_access, + [OP_CLOSE] = nfsd4_decode_close, + [OP_COMMIT] = nfsd4_decode_commit, + [OP_CREATE] = nfsd4_decode_create, + [OP_DELEGPURGE] = nfsd4_decode_notsupp, + [OP_DELEGRETURN] = nfsd4_decode_delegreturn, + [OP_GETATTR] = nfsd4_decode_getattr, + [OP_GETFH] = nfsd4_decode_noop, + [OP_LINK] = nfsd4_decode_link, + [OP_LOCK] = nfsd4_decode_lock, + [OP_LOCKT] = nfsd4_decode_lockt, + [OP_LOCKU] = nfsd4_decode_locku, + [OP_LOOKUP] = nfsd4_decode_lookup, + [OP_LOOKUPP] = nfsd4_decode_noop, + [OP_NVERIFY] = nfsd4_decode_verify, + [OP_OPEN] = nfsd4_decode_open, + [OP_OPENATTR] = nfsd4_decode_notsupp, + [OP_OPEN_CONFIRM] = nfsd4_decode_open_confirm, + [OP_OPEN_DOWNGRADE] = nfsd4_decode_open_downgrade, + [OP_PUTFH] = nfsd4_decode_putfh, + [OP_PUTPUBFH] = nfsd4_decode_putpubfh, + [OP_PUTROOTFH] = nfsd4_decode_noop, + [OP_READ] = nfsd4_decode_read, + [OP_READDIR] = nfsd4_decode_readdir, + [OP_READLINK] = nfsd4_decode_noop, + [OP_REMOVE] = nfsd4_decode_remove, + [OP_RENAME] = nfsd4_decode_rename, + [OP_RENEW] = nfsd4_decode_renew, + [OP_RESTOREFH] = nfsd4_decode_noop, + [OP_SAVEFH] = nfsd4_decode_noop, + [OP_SECINFO] = nfsd4_decode_secinfo, + [OP_SETATTR] = nfsd4_decode_setattr, + [OP_SETCLIENTID] = nfsd4_decode_setclientid, + [OP_SETCLIENTID_CONFIRM] = nfsd4_decode_setclientid_confirm, + [OP_VERIFY] = nfsd4_decode_verify, + [OP_WRITE] = nfsd4_decode_write, + [OP_RELEASE_LOCKOWNER] = nfsd4_decode_release_lockowner, /* new operations for NFSv4.1 */ - [OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_backchannel_ctl, - [OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_bind_conn_to_session, - [OP_EXCHANGE_ID] = (nfsd4_dec)nfsd4_decode_exchange_id, - [OP_CREATE_SESSION] = (nfsd4_dec)nfsd4_decode_create_session, - [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session, - [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_free_stateid, - [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_BACKCHANNEL_CTL] = nfsd4_decode_backchannel_ctl, + [OP_BIND_CONN_TO_SESSION] = nfsd4_decode_bind_conn_to_session, + [OP_EXCHANGE_ID] = nfsd4_decode_exchange_id, + [OP_CREATE_SESSION] = nfsd4_decode_create_session, + [OP_DESTROY_SESSION] = nfsd4_decode_destroy_session, + [OP_FREE_STATEID] = nfsd4_decode_free_stateid, + [OP_GET_DIR_DELEGATION] = nfsd4_decode_notsupp, #ifdef CONFIG_NFSD_PNFS - [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_getdeviceinfo, - [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_layoutcommit, - [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_layoutget, - [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_layoutreturn, + [OP_GETDEVICEINFO] = nfsd4_decode_getdeviceinfo, + [OP_GETDEVICELIST] = nfsd4_decode_notsupp, + [OP_LAYOUTCOMMIT] = nfsd4_decode_layoutcommit, + [OP_LAYOUTGET] = nfsd4_decode_layoutget, + [OP_LAYOUTRETURN] = nfsd4_decode_layoutreturn, #else - [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_GETDEVICEINFO] = nfsd4_decode_notsupp, + [OP_GETDEVICELIST] = nfsd4_decode_notsupp, + [OP_LAYOUTCOMMIT] = nfsd4_decode_notsupp, + [OP_LAYOUTGET] = nfsd4_decode_notsupp, + [OP_LAYOUTRETURN] = nfsd4_decode_notsupp, #endif - [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_secinfo_no_name, - [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence, - [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_test_stateid, - [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid, - [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete, + [OP_SECINFO_NO_NAME] = nfsd4_decode_secinfo_no_name, + [OP_SEQUENCE] = nfsd4_decode_sequence, + [OP_SET_SSV] = nfsd4_decode_notsupp, + [OP_TEST_STATEID] = nfsd4_decode_test_stateid, + [OP_WANT_DELEGATION] = nfsd4_decode_notsupp, + [OP_DESTROY_CLIENTID] = nfsd4_decode_destroy_clientid, + [OP_RECLAIM_COMPLETE] = nfsd4_decode_reclaim_complete, /* new operations for NFSv4.2 */ - [OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate, - [OP_COPY] = (nfsd4_dec)nfsd4_decode_copy, - [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_copy_notify, - [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate, - [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_offload_status, - [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_offload_status, - [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_read, - [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek, - [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_CLONE] = (nfsd4_dec)nfsd4_decode_clone, + [OP_ALLOCATE] = nfsd4_decode_fallocate, + [OP_COPY] = nfsd4_decode_copy, + [OP_COPY_NOTIFY] = nfsd4_decode_copy_notify, + [OP_DEALLOCATE] = nfsd4_decode_fallocate, + [OP_IO_ADVISE] = nfsd4_decode_notsupp, + [OP_LAYOUTERROR] = nfsd4_decode_notsupp, + [OP_LAYOUTSTATS] = nfsd4_decode_notsupp, + [OP_OFFLOAD_CANCEL] = nfsd4_decode_offload_status, + [OP_OFFLOAD_STATUS] = nfsd4_decode_offload_status, + [OP_READ_PLUS] = nfsd4_decode_read, + [OP_SEEK] = nfsd4_decode_seek, + [OP_WRITE_SAME] = nfsd4_decode_notsupp, + [OP_CLONE] = nfsd4_decode_clone, /* RFC 8276 extended atributes operations */ - [OP_GETXATTR] = (nfsd4_dec)nfsd4_decode_getxattr, - [OP_SETXATTR] = (nfsd4_dec)nfsd4_decode_setxattr, - [OP_LISTXATTRS] = (nfsd4_dec)nfsd4_decode_listxattrs, - [OP_REMOVEXATTR] = (nfsd4_dec)nfsd4_decode_removexattr, + [OP_GETXATTR] = nfsd4_decode_getxattr, + [OP_SETXATTR] = nfsd4_decode_setxattr, + [OP_LISTXATTRS] = nfsd4_decode_listxattrs, + [OP_REMOVEXATTR] = nfsd4_decode_removexattr, }; static inline bool @@ -3643,8 +3702,10 @@ nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid) } static __be32 -nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) +nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_access *access = &u->access; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3656,8 +3717,10 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ return 0; } -static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts) +static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3673,8 +3736,10 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, } static __be32 -nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close) +nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_close *close = &u->close; struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &close->cl_stateid); @@ -3682,8 +3747,10 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c static __be32 -nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit) +nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_commit *commit = &u->commit; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3696,8 +3763,10 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ } static __be32 -nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create) +nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_create *create = &u->create; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3710,8 +3779,10 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ } static __be32 -nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getattr *getattr) +nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_getattr *getattr = &u->getattr; struct svc_fh *fhp = getattr->ga_fhp; struct xdr_stream *xdr = resp->xdr; @@ -3720,8 +3791,10 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 } static __be32 -nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp) +nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct svc_fh **fhpp = &u->getfh; struct xdr_stream *xdr = resp->xdr; struct svc_fh *fhp = *fhpp; unsigned int len; @@ -3775,8 +3848,10 @@ again: } static __be32 -nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock) +nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_lock *lock = &u->lock; struct xdr_stream *xdr = resp->xdr; if (!nfserr) @@ -3788,8 +3863,10 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo } static __be32 -nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt) +nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_lockt *lockt = &u->lockt; struct xdr_stream *xdr = resp->xdr; if (nfserr == nfserr_denied) @@ -3798,8 +3875,10 @@ nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l } static __be32 -nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku) +nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_locku *locku = &u->locku; struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &locku->lu_stateid); @@ -3807,8 +3886,10 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l static __be32 -nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link) +nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_link *link = &u->link; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3821,8 +3902,10 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li static __be32 -nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) +nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_open *open = &u->open; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -3915,16 +3998,20 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op } static __be32 -nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc) +nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_open_confirm *oc = &u->open_confirm; struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid); } static __be32 -nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od) +nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_open_downgrade *od = &u->open_downgrade; struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &od->od_stateid); @@ -4023,8 +4110,9 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, static __be32 nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_read *read) + union nfsd4_op_u *u) { + struct nfsd4_read *read = &u->read; bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags); unsigned long maxcount; struct xdr_stream *xdr = resp->xdr; @@ -4065,8 +4153,10 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, } static __be32 -nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink) +nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_readlink *readlink = &u->readlink; __be32 *p, *maxcount_p, zero = xdr_zero; struct xdr_stream *xdr = resp->xdr; int length_offset = xdr->buf->len; @@ -4110,8 +4200,10 @@ out_err: } static __be32 -nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir) +nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_readdir *readdir = &u->readdir; int maxcount; int bytes_left; loff_t offset; @@ -4201,8 +4293,10 @@ err_no_verf: } static __be32 -nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove) +nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_remove *remove = &u->remove; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4214,8 +4308,10 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ } static __be32 -nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename) +nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_rename *rename = &u->rename; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4297,8 +4393,9 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp) static __be32 nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_secinfo *secinfo) + union nfsd4_op_u *u) { + struct nfsd4_secinfo *secinfo = &u->secinfo; struct xdr_stream *xdr = resp->xdr; return nfsd4_do_encode_secinfo(xdr, secinfo->si_exp); @@ -4306,8 +4403,9 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_secinfo_no_name *secinfo) + union nfsd4_op_u *u) { + struct nfsd4_secinfo_no_name *secinfo = &u->secinfo_no_name; struct xdr_stream *xdr = resp->xdr; return nfsd4_do_encode_secinfo(xdr, secinfo->sin_exp); @@ -4318,8 +4416,10 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, * regardless of the error status. */ static __be32 -nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr) +nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_setattr *setattr = &u->setattr; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4342,8 +4442,10 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 } static __be32 -nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd) +nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_setclientid *scd = &u->setclientid; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4366,8 +4468,10 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n } static __be32 -nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write) +nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) { + struct nfsd4_write *write = &u->write; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4383,8 +4487,9 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w static __be32 nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_exchange_id *exid) + union nfsd4_op_u *u) { + struct nfsd4_exchange_id *exid = &u->exchange_id; struct xdr_stream *xdr = resp->xdr; __be32 *p; char *major_id; @@ -4461,8 +4566,9 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_create_session *sess) + union nfsd4_op_u *u) { + struct nfsd4_create_session *sess = &u->create_session; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4514,8 +4620,9 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_sequence *seq) + union nfsd4_op_u *u) { + struct nfsd4_sequence *seq = &u->sequence; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4537,8 +4644,9 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_test_stateid *test_stateid) + union nfsd4_op_u *u) { + struct nfsd4_test_stateid *test_stateid = &u->test_stateid; struct xdr_stream *xdr = resp->xdr; struct nfsd4_test_stateid_id *stateid, *next; __be32 *p; @@ -4558,8 +4666,9 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, #ifdef CONFIG_NFSD_PNFS static __be32 nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_getdeviceinfo *gdev) + union nfsd4_op_u *u) { + struct nfsd4_getdeviceinfo *gdev = &u->getdeviceinfo; struct xdr_stream *xdr = resp->xdr; const struct nfsd4_layout_ops *ops; u32 starting_len = xdr->buf->len, needed_len; @@ -4611,8 +4720,9 @@ toosmall: static __be32 nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_layoutget *lgp) + union nfsd4_op_u *u) { + struct nfsd4_layoutget *lgp = &u->layoutget; struct xdr_stream *xdr = resp->xdr; const struct nfsd4_layout_ops *ops; __be32 *p; @@ -4638,8 +4748,9 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_layoutcommit *lcp) + union nfsd4_op_u *u) { + struct nfsd4_layoutcommit *lcp = &u->layoutcommit; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4659,8 +4770,9 @@ nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_layoutreturn *lrp) + union nfsd4_op_u *u) { + struct nfsd4_layoutreturn *lrp = &u->layoutreturn; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4745,8 +4857,9 @@ nfsd42_encode_nl4_server(struct nfsd4_compoundres *resp, struct nl4_server *ns) static __be32 nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_copy *copy) + union nfsd4_op_u *u) { + struct nfsd4_copy *copy = &u->copy; __be32 *p; nfserr = nfsd42_encode_write_res(resp, ©->cp_res, @@ -4762,8 +4875,9 @@ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_offload_status *os) + union nfsd4_op_u *u) { + struct nfsd4_offload_status *os = &u->offload_status; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4777,156 +4891,83 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, - struct nfsd4_read *read, - unsigned long *maxcount, u32 *eof, - loff_t *pos) + struct nfsd4_read *read) { - struct xdr_stream *xdr = resp->xdr; + bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags); struct file *file = read->rd_nf->nf_file; - int starting_len = xdr->buf->len; - loff_t hole_pos; - __be32 nfserr; - __be32 *p, tmp; - __be64 tmp64; - - hole_pos = pos ? *pos : vfs_llseek(file, read->rd_offset, SEEK_HOLE); - if (hole_pos > read->rd_offset) - *maxcount = min_t(unsigned long, *maxcount, hole_pos - read->rd_offset); - *maxcount = min_t(unsigned long, *maxcount, (xdr->buf->buflen - xdr->buf->len)); + struct xdr_stream *xdr = resp->xdr; + unsigned long maxcount; + __be32 nfserr, *p; /* Content type, offset, byte count */ p = xdr_reserve_space(xdr, 4 + 8 + 4); if (!p) - return nfserr_resource; + return nfserr_io; + if (resp->xdr->buf->page_len && splice_ok) { + WARN_ON_ONCE(splice_ok); + return nfserr_serverfault; + } - read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, *maxcount); - if (read->rd_vlen < 0) - return nfserr_resource; + maxcount = min_t(unsigned long, read->rd_length, + (xdr->buf->buflen - xdr->buf->len)); - nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset, - resp->rqstp->rq_vec, read->rd_vlen, maxcount, eof); + if (file->f_op->splice_read && splice_ok) + nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); + else + nfserr = nfsd4_encode_readv(resp, read, file, maxcount); if (nfserr) return nfserr; - xdr_truncate_encode(xdr, starting_len + 16 + xdr_align_size(*maxcount)); - - tmp = htonl(NFS4_CONTENT_DATA); - write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); - tmp64 = cpu_to_be64(read->rd_offset); - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8); - tmp = htonl(*maxcount); - write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4); - - tmp = xdr_zero; - write_bytes_to_xdr_buf(xdr->buf, starting_len + 16 + *maxcount, &tmp, - xdr_pad_size(*maxcount)); - return nfs_ok; -} - -static __be32 -nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp, - struct nfsd4_read *read, - unsigned long *maxcount, u32 *eof) -{ - struct file *file = read->rd_nf->nf_file; - loff_t data_pos = vfs_llseek(file, read->rd_offset, SEEK_DATA); - loff_t f_size = i_size_read(file_inode(file)); - unsigned long count; - __be32 *p; - - if (data_pos == -ENXIO) - data_pos = f_size; - else if (data_pos <= read->rd_offset || (data_pos < f_size && data_pos % PAGE_SIZE)) - return nfsd4_encode_read_plus_data(resp, read, maxcount, eof, &f_size); - count = data_pos - read->rd_offset; - /* Content type, offset, byte count */ - p = xdr_reserve_space(resp->xdr, 4 + 8 + 8); - if (!p) - return nfserr_resource; - - *p++ = htonl(NFS4_CONTENT_HOLE); + *p++ = cpu_to_be32(NFS4_CONTENT_DATA); p = xdr_encode_hyper(p, read->rd_offset); - p = xdr_encode_hyper(p, count); + *p = cpu_to_be32(read->rd_length); - *eof = (read->rd_offset + count) >= f_size; - *maxcount = min_t(unsigned long, count, *maxcount); return nfs_ok; } static __be32 nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_read *read) + union nfsd4_op_u *u) { - unsigned long maxcount, count; + struct nfsd4_read *read = &u->read; + struct file *file = read->rd_nf->nf_file; struct xdr_stream *xdr = resp->xdr; - struct file *file; int starting_len = xdr->buf->len; - int last_segment = xdr->buf->len; - int segments = 0; - __be32 *p, tmp; - bool is_data; - loff_t pos; - u32 eof; + u32 segments = 0; + __be32 *p; if (nfserr) return nfserr; - file = read->rd_nf->nf_file; /* eof flag, segment count */ p = xdr_reserve_space(xdr, 4 + 4); if (!p) - return nfserr_resource; + return nfserr_io; xdr_commit_encode(xdr); - maxcount = min_t(unsigned long, read->rd_length, - (xdr->buf->buflen - xdr->buf->len)); - count = maxcount; - - eof = read->rd_offset >= i_size_read(file_inode(file)); - if (eof) + read->rd_eof = read->rd_offset >= i_size_read(file_inode(file)); + if (read->rd_eof) goto out; - pos = vfs_llseek(file, read->rd_offset, SEEK_HOLE); - is_data = pos > read->rd_offset; - - while (count > 0 && !eof) { - maxcount = count; - if (is_data) - nfserr = nfsd4_encode_read_plus_data(resp, read, &maxcount, &eof, - segments == 0 ? &pos : NULL); - else - nfserr = nfsd4_encode_read_plus_hole(resp, read, &maxcount, &eof); - if (nfserr) - goto out; - count -= maxcount; - read->rd_offset += maxcount; - is_data = !is_data; - last_segment = xdr->buf->len; - segments++; - } - -out: - if (nfserr && segments == 0) + nfserr = nfsd4_encode_read_plus_data(resp, read); + if (nfserr) { xdr_truncate_encode(xdr, starting_len); - else { - if (nfserr) { - xdr_truncate_encode(xdr, last_segment); - nfserr = nfs_ok; - eof = 0; - } - tmp = htonl(eof); - write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); - tmp = htonl(segments); - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); + return nfserr; } + segments++; + +out: + p = xdr_encode_bool(p, read->rd_eof); + *p = cpu_to_be32(segments); return nfserr; } static __be32 nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_copy_notify *cn) + union nfsd4_op_u *u) { + struct nfsd4_copy_notify *cn = &u->copy_notify; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -4960,8 +5001,9 @@ nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_seek *seek) + union nfsd4_op_u *u) { + struct nfsd4_seek *seek = &u->seek; __be32 *p; p = xdr_reserve_space(resp->xdr, 4 + 8); @@ -4972,7 +5014,8 @@ nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, } static __be32 -nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) +nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *p) { return nfserr; } @@ -5023,8 +5066,9 @@ nfsd4_vbuf_to_stream(struct xdr_stream *xdr, char *buf, u32 buflen) static __be32 nfsd4_encode_getxattr(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_getxattr *getxattr) + union nfsd4_op_u *u) { + struct nfsd4_getxattr *getxattr = &u->getxattr; struct xdr_stream *xdr = resp->xdr; __be32 *p, err; @@ -5047,8 +5091,9 @@ nfsd4_encode_getxattr(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_setxattr(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_setxattr *setxattr) + union nfsd4_op_u *u) { + struct nfsd4_setxattr *setxattr = &u->setxattr; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -5088,8 +5133,9 @@ nfsd4_listxattr_validate_cookie(struct nfsd4_listxattrs *listxattrs, static __be32 nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_listxattrs *listxattrs) + union nfsd4_op_u *u) { + struct nfsd4_listxattrs *listxattrs = &u->listxattrs; struct xdr_stream *xdr = resp->xdr; u32 cookie_offset, count_offset, eof; u32 left, xdrleft, slen, count; @@ -5199,8 +5245,9 @@ out: static __be32 nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_removexattr *removexattr) + union nfsd4_op_u *u) { + struct nfsd4_removexattr *removexattr = &u->removexattr; struct xdr_stream *xdr = resp->xdr; __be32 *p; @@ -5212,7 +5259,7 @@ nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr, return 0; } -typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *); +typedef __be32(*nfsd4_enc)(struct nfsd4_compoundres *, __be32, union nfsd4_op_u *u); /* * Note: nfsd4_enc_ops vector is shared for v4.0 and v4.1 @@ -5220,93 +5267,93 @@ typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *); * done in the decoding phase. */ static const nfsd4_enc nfsd4_enc_ops[] = { - [OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access, - [OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close, - [OP_COMMIT] = (nfsd4_enc)nfsd4_encode_commit, - [OP_CREATE] = (nfsd4_enc)nfsd4_encode_create, - [OP_DELEGPURGE] = (nfsd4_enc)nfsd4_encode_noop, - [OP_DELEGRETURN] = (nfsd4_enc)nfsd4_encode_noop, - [OP_GETATTR] = (nfsd4_enc)nfsd4_encode_getattr, - [OP_GETFH] = (nfsd4_enc)nfsd4_encode_getfh, - [OP_LINK] = (nfsd4_enc)nfsd4_encode_link, - [OP_LOCK] = (nfsd4_enc)nfsd4_encode_lock, - [OP_LOCKT] = (nfsd4_enc)nfsd4_encode_lockt, - [OP_LOCKU] = (nfsd4_enc)nfsd4_encode_locku, - [OP_LOOKUP] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LOOKUPP] = (nfsd4_enc)nfsd4_encode_noop, - [OP_NVERIFY] = (nfsd4_enc)nfsd4_encode_noop, - [OP_OPEN] = (nfsd4_enc)nfsd4_encode_open, - [OP_OPENATTR] = (nfsd4_enc)nfsd4_encode_noop, - [OP_OPEN_CONFIRM] = (nfsd4_enc)nfsd4_encode_open_confirm, - [OP_OPEN_DOWNGRADE] = (nfsd4_enc)nfsd4_encode_open_downgrade, - [OP_PUTFH] = (nfsd4_enc)nfsd4_encode_noop, - [OP_PUTPUBFH] = (nfsd4_enc)nfsd4_encode_noop, - [OP_PUTROOTFH] = (nfsd4_enc)nfsd4_encode_noop, - [OP_READ] = (nfsd4_enc)nfsd4_encode_read, - [OP_READDIR] = (nfsd4_enc)nfsd4_encode_readdir, - [OP_READLINK] = (nfsd4_enc)nfsd4_encode_readlink, - [OP_REMOVE] = (nfsd4_enc)nfsd4_encode_remove, - [OP_RENAME] = (nfsd4_enc)nfsd4_encode_rename, - [OP_RENEW] = (nfsd4_enc)nfsd4_encode_noop, - [OP_RESTOREFH] = (nfsd4_enc)nfsd4_encode_noop, - [OP_SAVEFH] = (nfsd4_enc)nfsd4_encode_noop, - [OP_SECINFO] = (nfsd4_enc)nfsd4_encode_secinfo, - [OP_SETATTR] = (nfsd4_enc)nfsd4_encode_setattr, - [OP_SETCLIENTID] = (nfsd4_enc)nfsd4_encode_setclientid, - [OP_SETCLIENTID_CONFIRM] = (nfsd4_enc)nfsd4_encode_noop, - [OP_VERIFY] = (nfsd4_enc)nfsd4_encode_noop, - [OP_WRITE] = (nfsd4_enc)nfsd4_encode_write, - [OP_RELEASE_LOCKOWNER] = (nfsd4_enc)nfsd4_encode_noop, + [OP_ACCESS] = nfsd4_encode_access, + [OP_CLOSE] = nfsd4_encode_close, + [OP_COMMIT] = nfsd4_encode_commit, + [OP_CREATE] = nfsd4_encode_create, + [OP_DELEGPURGE] = nfsd4_encode_noop, + [OP_DELEGRETURN] = nfsd4_encode_noop, + [OP_GETATTR] = nfsd4_encode_getattr, + [OP_GETFH] = nfsd4_encode_getfh, + [OP_LINK] = nfsd4_encode_link, + [OP_LOCK] = nfsd4_encode_lock, + [OP_LOCKT] = nfsd4_encode_lockt, + [OP_LOCKU] = nfsd4_encode_locku, + [OP_LOOKUP] = nfsd4_encode_noop, + [OP_LOOKUPP] = nfsd4_encode_noop, + [OP_NVERIFY] = nfsd4_encode_noop, + [OP_OPEN] = nfsd4_encode_open, + [OP_OPENATTR] = nfsd4_encode_noop, + [OP_OPEN_CONFIRM] = nfsd4_encode_open_confirm, + [OP_OPEN_DOWNGRADE] = nfsd4_encode_open_downgrade, + [OP_PUTFH] = nfsd4_encode_noop, + [OP_PUTPUBFH] = nfsd4_encode_noop, + [OP_PUTROOTFH] = nfsd4_encode_noop, + [OP_READ] = nfsd4_encode_read, + [OP_READDIR] = nfsd4_encode_readdir, + [OP_READLINK] = nfsd4_encode_readlink, + [OP_REMOVE] = nfsd4_encode_remove, + [OP_RENAME] = nfsd4_encode_rename, + [OP_RENEW] = nfsd4_encode_noop, + [OP_RESTOREFH] = nfsd4_encode_noop, + [OP_SAVEFH] = nfsd4_encode_noop, + [OP_SECINFO] = nfsd4_encode_secinfo, + [OP_SETATTR] = nfsd4_encode_setattr, + [OP_SETCLIENTID] = nfsd4_encode_setclientid, + [OP_SETCLIENTID_CONFIRM] = nfsd4_encode_noop, + [OP_VERIFY] = nfsd4_encode_noop, + [OP_WRITE] = nfsd4_encode_write, + [OP_RELEASE_LOCKOWNER] = nfsd4_encode_noop, /* NFSv4.1 operations */ - [OP_BACKCHANNEL_CTL] = (nfsd4_enc)nfsd4_encode_noop, - [OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_bind_conn_to_session, - [OP_EXCHANGE_ID] = (nfsd4_enc)nfsd4_encode_exchange_id, - [OP_CREATE_SESSION] = (nfsd4_enc)nfsd4_encode_create_session, - [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_noop, - [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop, - [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, + [OP_BACKCHANNEL_CTL] = nfsd4_encode_noop, + [OP_BIND_CONN_TO_SESSION] = nfsd4_encode_bind_conn_to_session, + [OP_EXCHANGE_ID] = nfsd4_encode_exchange_id, + [OP_CREATE_SESSION] = nfsd4_encode_create_session, + [OP_DESTROY_SESSION] = nfsd4_encode_noop, + [OP_FREE_STATEID] = nfsd4_encode_noop, + [OP_GET_DIR_DELEGATION] = nfsd4_encode_noop, #ifdef CONFIG_NFSD_PNFS - [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_getdeviceinfo, - [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_layoutcommit, - [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_layoutget, - [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_layoutreturn, + [OP_GETDEVICEINFO] = nfsd4_encode_getdeviceinfo, + [OP_GETDEVICELIST] = nfsd4_encode_noop, + [OP_LAYOUTCOMMIT] = nfsd4_encode_layoutcommit, + [OP_LAYOUTGET] = nfsd4_encode_layoutget, + [OP_LAYOUTRETURN] = nfsd4_encode_layoutreturn, #else - [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop, - [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop, + [OP_GETDEVICEINFO] = nfsd4_encode_noop, + [OP_GETDEVICELIST] = nfsd4_encode_noop, + [OP_LAYOUTCOMMIT] = nfsd4_encode_noop, + [OP_LAYOUTGET] = nfsd4_encode_noop, + [OP_LAYOUTRETURN] = nfsd4_encode_noop, #endif - [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo_no_name, - [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence, - [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop, - [OP_TEST_STATEID] = (nfsd4_enc)nfsd4_encode_test_stateid, - [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, - [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop, - [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop, + [OP_SECINFO_NO_NAME] = nfsd4_encode_secinfo_no_name, + [OP_SEQUENCE] = nfsd4_encode_sequence, + [OP_SET_SSV] = nfsd4_encode_noop, + [OP_TEST_STATEID] = nfsd4_encode_test_stateid, + [OP_WANT_DELEGATION] = nfsd4_encode_noop, + [OP_DESTROY_CLIENTID] = nfsd4_encode_noop, + [OP_RECLAIM_COMPLETE] = nfsd4_encode_noop, /* NFSv4.2 operations */ - [OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, - [OP_COPY] = (nfsd4_enc)nfsd4_encode_copy, - [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_copy_notify, - [OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, - [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTERROR] = (nfsd4_enc)nfsd4_encode_noop, - [OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop, - [OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop, - [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_offload_status, - [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_read_plus, - [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek, - [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop, - [OP_CLONE] = (nfsd4_enc)nfsd4_encode_noop, + [OP_ALLOCATE] = nfsd4_encode_noop, + [OP_COPY] = nfsd4_encode_copy, + [OP_COPY_NOTIFY] = nfsd4_encode_copy_notify, + [OP_DEALLOCATE] = nfsd4_encode_noop, + [OP_IO_ADVISE] = nfsd4_encode_noop, + [OP_LAYOUTERROR] = nfsd4_encode_noop, + [OP_LAYOUTSTATS] = nfsd4_encode_noop, + [OP_OFFLOAD_CANCEL] = nfsd4_encode_noop, + [OP_OFFLOAD_STATUS] = nfsd4_encode_offload_status, + [OP_READ_PLUS] = nfsd4_encode_read_plus, + [OP_SEEK] = nfsd4_encode_seek, + [OP_WRITE_SAME] = nfsd4_encode_noop, + [OP_CLONE] = nfsd4_encode_noop, /* RFC 8276 extended atributes operations */ - [OP_GETXATTR] = (nfsd4_enc)nfsd4_encode_getxattr, - [OP_SETXATTR] = (nfsd4_enc)nfsd4_encode_setxattr, - [OP_LISTXATTRS] = (nfsd4_enc)nfsd4_encode_listxattrs, - [OP_REMOVEXATTR] = (nfsd4_enc)nfsd4_encode_removexattr, + [OP_GETXATTR] = nfsd4_encode_getxattr, + [OP_SETXATTR] = nfsd4_encode_setxattr, + [OP_LISTXATTRS] = nfsd4_encode_listxattrs, + [OP_REMOVEXATTR] = nfsd4_encode_removexattr, }; /* diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 573de0d49..76a60e7a7 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -581,7 +581,9 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) cmd = sign == '-' ? NFSD_CLEAR : NFSD_SET; switch(num) { +#ifdef CONFIG_NFSD_V2 case 2: +#endif case 3: nfsd_vers(nn, num, cmd); break; @@ -601,7 +603,9 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) } break; default: - return -EINVAL; + /* Ignore requests to disable non-existent versions */ + if (cmd == NFSD_SET) + return -EINVAL; } vers += len + 1; } while ((len = qword_get(&mesg, vers, size)) > 0); @@ -1448,9 +1452,7 @@ static __net_init int nfsd_init_net(struct net *net) goto out_idmap_error; nn->nfsd_versions = NULL; nn->nfsd4_minorversions = NULL; - retval = nfsd4_init_leases_net(nn); - if (retval) - goto out_drc_error; + nfsd4_init_leases_net(nn); retval = nfsd_reply_cache_init(nn); if (retval) goto out_cache_error; @@ -1460,8 +1462,6 @@ static __net_init int nfsd_init_net(struct net *net) return 0; out_cache_error: - nfsd4_leases_net_shutdown(nn); -out_drc_error: nfsd_idmap_shutdown(net); out_idmap_error: nfsd_export_shutdown(net); @@ -1477,7 +1477,6 @@ static __net_exit void nfsd_exit_net(struct net *net) nfsd_idmap_shutdown(net); nfsd_export_shutdown(net); nfsd_netns_free_versions(net_generic(net, nfsd_net_id)); - nfsd4_leases_net_shutdown(nn); } static struct pernet_operations nfsd_net_ops = { diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 09726c5b9..fa0144a74 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -64,8 +64,7 @@ struct readdir_cd { extern struct svc_program nfsd_program; -extern const struct svc_version nfsd_version2, nfsd_version3, - nfsd_version4; +extern const struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; extern struct mutex nfsd_mutex; extern spinlock_t nfsd_drc_lock; extern unsigned long nfsd_drc_max_mem; @@ -505,8 +504,7 @@ extern void unregister_cld_notifier(void); extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn); #endif -extern int nfsd4_init_leases_net(struct nfsd_net *nn); -extern void nfsd4_leases_net_shutdown(struct nfsd_net *nn); +extern void nfsd4_init_leases_net(struct nfsd_net *nn); #else /* CONFIG_NFSD_V4 */ static inline int nfsd4_is_junction(struct dentry *dentry) @@ -514,8 +512,7 @@ static inline int nfsd4_is_junction(struct dentry *dentry) return 0; } -static inline int nfsd4_init_leases_net(struct nfsd_net *nn) { return 0; }; -static inline void nfsd4_leases_net_shutdown(struct nfsd_net *nn) {}; +static inline void nfsd4_init_leases_net(struct nfsd_net *nn) { }; #define register_cld_notifier() 0 #define unregister_cld_notifier() do { } while(0) diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index c3ae6414f..513e028b0 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -220,7 +220,7 @@ __be32 fh_update(struct svc_fh *); void fh_put(struct svc_fh *); static __inline__ struct svc_fh * -fh_copy(struct svc_fh *dst, struct svc_fh *src) +fh_copy(struct svc_fh *dst, const struct svc_fh *src) { WARN_ON(src->fh_dentry); @@ -229,7 +229,7 @@ fh_copy(struct svc_fh *dst, struct svc_fh *src) } static inline void -fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src) +fh_copy_shallow(struct knfsd_fh *dst, const struct knfsd_fh *src) { dst->fh_size = src->fh_size; memcpy(&dst->fh_raw, &src->fh_raw, src->fh_size); @@ -243,7 +243,8 @@ fh_init(struct svc_fh *fhp, int maxsize) return fhp; } -static inline bool fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) +static inline bool fh_match(const struct knfsd_fh *fh1, + const struct knfsd_fh *fh2) { if (fh1->fh_size != fh2->fh_size) return false; @@ -252,7 +253,8 @@ static inline bool fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) return true; } -static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) +static inline bool fh_fsid_match(const struct knfsd_fh *fh1, + const struct knfsd_fh *fh2) { if (fh1->fh_fsid_type != fh2->fh_fsid_type) return false; diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 82b3ddeac..9744443c3 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -211,7 +211,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) - return rpc_drop_reply; + set_bit(RQ_DROPME, &rqstp->rq_flags); return rpc_success; } @@ -246,7 +246,7 @@ nfsd_proc_write(struct svc_rqst *rqstp) if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) - return rpc_drop_reply; + set_bit(RQ_DROPME, &rqstp->rq_flags); return rpc_success; } @@ -848,65 +848,3 @@ const struct svc_version nfsd_version2 = { .vs_dispatch = nfsd_dispatch, .vs_xdrsize = NFS2_SVC_XDRSIZE, }; - -/* - * Map errnos to NFS errnos. - */ -__be32 -nfserrno (int errno) -{ - static struct { - __be32 nfserr; - int syserr; - } nfs_errtbl[] = { - { nfs_ok, 0 }, - { nfserr_perm, -EPERM }, - { nfserr_noent, -ENOENT }, - { nfserr_io, -EIO }, - { nfserr_nxio, -ENXIO }, - { nfserr_fbig, -E2BIG }, - { nfserr_stale, -EBADF }, - { nfserr_acces, -EACCES }, - { nfserr_exist, -EEXIST }, - { nfserr_xdev, -EXDEV }, - { nfserr_mlink, -EMLINK }, - { nfserr_nodev, -ENODEV }, - { nfserr_notdir, -ENOTDIR }, - { nfserr_isdir, -EISDIR }, - { nfserr_inval, -EINVAL }, - { nfserr_fbig, -EFBIG }, - { nfserr_nospc, -ENOSPC }, - { nfserr_rofs, -EROFS }, - { nfserr_mlink, -EMLINK }, - { nfserr_nametoolong, -ENAMETOOLONG }, - { nfserr_notempty, -ENOTEMPTY }, -#ifdef EDQUOT - { nfserr_dquot, -EDQUOT }, -#endif - { nfserr_stale, -ESTALE }, - { nfserr_jukebox, -ETIMEDOUT }, - { nfserr_jukebox, -ERESTARTSYS }, - { nfserr_jukebox, -EAGAIN }, - { nfserr_jukebox, -EWOULDBLOCK }, - { nfserr_jukebox, -ENOMEM }, - { nfserr_io, -ETXTBSY }, - { nfserr_notsupp, -EOPNOTSUPP }, - { nfserr_toosmall, -ETOOSMALL }, - { nfserr_serverfault, -ESERVERFAULT }, - { nfserr_serverfault, -ENFILE }, - { nfserr_io, -EREMOTEIO }, - { nfserr_stale, -EOPENSTALE }, - { nfserr_io, -EUCLEAN }, - { nfserr_perm, -ENOKEY }, - { nfserr_no_grace, -ENOGRACE}, - }; - int i; - - for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) { - if (nfs_errtbl[i].syserr == errno) - return nfs_errtbl[i].nfserr; - } - WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno); - return nfserr_io; -} - diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index c7695ebd2..0c7563605 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -91,8 +91,12 @@ unsigned long nfsd_drc_mem_used; #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) static struct svc_stat nfsd_acl_svcstats; static const struct svc_version *nfsd_acl_version[] = { +# if defined(CONFIG_NFSD_V2_ACL) [2] = &nfsd_acl_version2, +# endif +# if defined(CONFIG_NFSD_V3_ACL) [3] = &nfsd_acl_version3, +# endif }; #define NFSD_ACL_MINVERS 2 @@ -116,7 +120,9 @@ static struct svc_stat nfsd_acl_svcstats = { #endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ static const struct svc_version *nfsd_version[] = { +#if defined(CONFIG_NFSD_V2) [2] = &nfsd_version2, +#endif [3] = &nfsd_version3, #if defined(CONFIG_NFSD_V4) [4] = &nfsd_version4, @@ -1065,7 +1071,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) nfs_reply = xdr_inline_decode(&rqstp->rq_res_stream, 0); *statp = proc->pc_func(rqstp); - if (*statp == rpc_drop_reply || test_bit(RQ_DROPME, &rqstp->rq_flags)) + if (test_bit(RQ_DROPME, &rqstp->rq_flags)) goto out_update_drop; if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream)) diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index e2daef3cc..e94634d30 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -368,6 +368,7 @@ struct nfs4_client { #define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */ #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \ 1 << NFSD4_CLIENT_CB_KILL) +#define NFSD4_CLIENT_CB_RECALL_ANY (6) unsigned long cl_flags; const struct cred *cl_cb_cred; struct rpc_clnt *cl_cb_client; @@ -411,6 +412,10 @@ struct nfs4_client { unsigned int cl_state; atomic_t cl_delegs_in_recall; + + struct nfsd4_cb_recall_any *cl_ra; + time64_t cl_ra_time; + struct list_head cl_ra_cblist; }; /* struct nfs4_client_reset @@ -536,16 +541,13 @@ struct nfs4_clnt_odstate { * inode can have multiple filehandles associated with it, so there is * (potentially) a many to one relationship between this struct and struct * inode. - * - * These are hashed by filehandle in the file_hashtbl, which is protected by - * the global state_lock spinlock. */ struct nfs4_file { refcount_t fi_ref; struct inode * fi_inode; bool fi_aliased; spinlock_t fi_lock; - struct hlist_node fi_hash; /* hash on fi_fhandle */ + struct rhlist_head fi_rlist; struct list_head fi_stateids; union { struct list_head fi_delegations; @@ -639,6 +641,7 @@ enum nfsd4_cb_op { NFSPROC4_CLNT_CB_OFFLOAD, NFSPROC4_CLNT_CB_SEQUENCE, NFSPROC4_CLNT_CB_NOTIFY_LOCK, + NFSPROC4_CLNT_CB_RECALL_ANY, }; /* Returns true iff a is later than b: */ diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 132335011..4183819ea 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -9,9 +9,12 @@ #define _NFSD_TRACE_H #include <linux/tracepoint.h> +#include <linux/sunrpc/xprt.h> +#include <trace/misc/nfs.h> #include "export.h" #include "nfsfh.h" +#include "xdr4.h" #define NFSD_TRACE_PROC_RES_FIELDS \ __field(unsigned int, netns_ino) \ @@ -604,6 +607,7 @@ DEFINE_STATEID_EVENT(layout_recall_release); DEFINE_STATEID_EVENT(open); DEFINE_STATEID_EVENT(deleg_read); +DEFINE_STATEID_EVENT(deleg_return); DEFINE_STATEID_EVENT(deleg_recall); DECLARE_EVENT_CLASS(nfsd_stateseqid_class, @@ -636,6 +640,61 @@ DEFINE_EVENT(nfsd_stateseqid_class, nfsd_##name, \ DEFINE_STATESEQID_EVENT(preprocess); DEFINE_STATESEQID_EVENT(open_confirm); +TRACE_DEFINE_ENUM(NFS4_OPEN_STID); +TRACE_DEFINE_ENUM(NFS4_LOCK_STID); +TRACE_DEFINE_ENUM(NFS4_DELEG_STID); +TRACE_DEFINE_ENUM(NFS4_CLOSED_STID); +TRACE_DEFINE_ENUM(NFS4_REVOKED_DELEG_STID); +TRACE_DEFINE_ENUM(NFS4_CLOSED_DELEG_STID); +TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID); + +#define show_stid_type(x) \ + __print_flags(x, "|", \ + { NFS4_OPEN_STID, "OPEN" }, \ + { NFS4_LOCK_STID, "LOCK" }, \ + { NFS4_DELEG_STID, "DELEG" }, \ + { NFS4_CLOSED_STID, "CLOSED" }, \ + { NFS4_REVOKED_DELEG_STID, "REVOKED" }, \ + { NFS4_CLOSED_DELEG_STID, "CLOSED_DELEG" }, \ + { NFS4_LAYOUT_STID, "LAYOUT" }) + +DECLARE_EVENT_CLASS(nfsd_stid_class, + TP_PROTO( + const struct nfs4_stid *stid + ), + TP_ARGS(stid), + TP_STRUCT__entry( + __field(unsigned long, sc_type) + __field(int, sc_count) + __field(u32, cl_boot) + __field(u32, cl_id) + __field(u32, si_id) + __field(u32, si_generation) + ), + TP_fast_assign( + const stateid_t *stp = &stid->sc_stateid; + + __entry->sc_type = stid->sc_type; + __entry->sc_count = refcount_read(&stid->sc_count); + __entry->cl_boot = stp->si_opaque.so_clid.cl_boot; + __entry->cl_id = stp->si_opaque.so_clid.cl_id; + __entry->si_id = stp->si_opaque.so_id; + __entry->si_generation = stp->si_generation; + ), + TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s", + __entry->cl_boot, __entry->cl_id, + __entry->si_id, __entry->si_generation, + __entry->sc_count, show_stid_type(__entry->sc_type) + ) +); + +#define DEFINE_STID_EVENT(name) \ +DEFINE_EVENT(nfsd_stid_class, nfsd_stid_##name, \ + TP_PROTO(const struct nfs4_stid *stid), \ + TP_ARGS(stid)) + +DEFINE_STID_EVENT(revoke); + DECLARE_EVENT_CLASS(nfsd_clientid_class, TP_PROTO(const clientid_t *clid), TP_ARGS(clid), @@ -1436,6 +1495,32 @@ TRACE_EVENT(nfsd_cb_offload, __entry->fh_hash, __entry->count, __entry->status) ); +TRACE_EVENT(nfsd_cb_recall_any, + TP_PROTO( + const struct nfsd4_cb_recall_any *ra + ), + TP_ARGS(ra), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __field(u32, keep) + __field(unsigned long, bmval0) + __sockaddr(addr, ra->ra_cb.cb_clp->cl_cb_conn.cb_addrlen) + ), + TP_fast_assign( + __entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot; + __entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id; + __entry->keep = ra->ra_keep; + __entry->bmval0 = ra->ra_bmval[0]; + __assign_sockaddr(addr, &ra->ra_cb.cb_clp->cl_addr, + ra->ra_cb.cb_clp->cl_cb_conn.cb_addrlen); + ), + TP_printk("addr=%pISpc client %08x:%08x keep=%u bmval0=%s", + __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, + __entry->keep, show_rca_mask(__entry->bmval0) + ) +); + DECLARE_EVENT_CLASS(nfsd_cb_done_class, TP_PROTO( const stateid_t *stp, @@ -1475,6 +1560,27 @@ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done); DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done); DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done); +TRACE_EVENT(nfsd_cb_recall_any_done, + TP_PROTO( + const struct nfsd4_callback *cb, + const struct rpc_task *task + ), + TP_ARGS(cb, task), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __field(int, status) + ), + TP_fast_assign( + __entry->status = task->tk_status; + __entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot; + __entry->cl_id = cb->cb_clp->cl_clientid.cl_id; + ), + TP_printk("client %08x:%08x status=%d", + __entry->cl_boot, __entry->cl_id, __entry->status + ) +); + #endif /* _NFSD_TRACE_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index eccc6ce55..5d6a61d47 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -49,6 +49,69 @@ #define NFSDDBG_FACILITY NFSDDBG_FILEOP +/** + * nfserrno - Map Linux errnos to NFS errnos + * @errno: POSIX(-ish) error code to be mapped + * + * Returns the appropriate (net-endian) nfserr_* (or nfs_ok if errno is 0). If + * it's an error we don't expect, log it once and return nfserr_io. + */ +__be32 +nfserrno (int errno) +{ + static struct { + __be32 nfserr; + int syserr; + } nfs_errtbl[] = { + { nfs_ok, 0 }, + { nfserr_perm, -EPERM }, + { nfserr_noent, -ENOENT }, + { nfserr_io, -EIO }, + { nfserr_nxio, -ENXIO }, + { nfserr_fbig, -E2BIG }, + { nfserr_stale, -EBADF }, + { nfserr_acces, -EACCES }, + { nfserr_exist, -EEXIST }, + { nfserr_xdev, -EXDEV }, + { nfserr_mlink, -EMLINK }, + { nfserr_nodev, -ENODEV }, + { nfserr_notdir, -ENOTDIR }, + { nfserr_isdir, -EISDIR }, + { nfserr_inval, -EINVAL }, + { nfserr_fbig, -EFBIG }, + { nfserr_nospc, -ENOSPC }, + { nfserr_rofs, -EROFS }, + { nfserr_mlink, -EMLINK }, + { nfserr_nametoolong, -ENAMETOOLONG }, + { nfserr_notempty, -ENOTEMPTY }, + { nfserr_dquot, -EDQUOT }, + { nfserr_stale, -ESTALE }, + { nfserr_jukebox, -ETIMEDOUT }, + { nfserr_jukebox, -ERESTARTSYS }, + { nfserr_jukebox, -EAGAIN }, + { nfserr_jukebox, -EWOULDBLOCK }, + { nfserr_jukebox, -ENOMEM }, + { nfserr_io, -ETXTBSY }, + { nfserr_notsupp, -EOPNOTSUPP }, + { nfserr_toosmall, -ETOOSMALL }, + { nfserr_serverfault, -ESERVERFAULT }, + { nfserr_serverfault, -ENFILE }, + { nfserr_io, -EREMOTEIO }, + { nfserr_stale, -EOPENSTALE }, + { nfserr_io, -EUCLEAN }, + { nfserr_perm, -ENOKEY }, + { nfserr_no_grace, -ENOGRACE}, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) { + if (nfs_errtbl[i].syserr == errno) + return nfs_errtbl[i].nfserr; + } + WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno); + return nfserr_io; +} + /* * Called from nfsd_lookup and encode_dirent. Check if we have crossed * a mount point. @@ -1317,7 +1380,6 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, iap->ia_mode &= ~current_umask(); err = 0; - host_err = 0; switch (type) { case S_IFREG: host_err = vfs_create(&init_user_ns, dirp, dchild, iap->ia_mode, true); diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 9744b0411..dbdfef7ae 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -60,6 +60,7 @@ static inline void nfsd_attrs_free(struct nfsd_attrs *attrs) posix_acl_release(attrs->na_dpacl); } +__be32 nfserrno (int errno); int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, struct svc_export **expp); __be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *, diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 36c3340c1..510978e60 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -896,5 +896,10 @@ struct nfsd4_operation { union nfsd4_op_u *); }; +struct nfsd4_cb_recall_any { + struct nfsd4_callback ra_cb; + u32 ra_keep; + u32 ra_bmval[1]; +}; #endif diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h index 547cf07cf..0d39af1b0 100644 --- a/fs/nfsd/xdr4cb.h +++ b/fs/nfsd/xdr4cb.h @@ -48,3 +48,9 @@ #define NFS4_dec_cb_offload_sz (cb_compound_dec_hdr_sz + \ cb_sequence_dec_sz + \ op_dec_sz) +#define NFS4_enc_cb_recall_any_sz (cb_compound_enc_hdr_sz + \ + cb_sequence_enc_sz + \ + 1 + 1 + 1) +#define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \ + cb_sequence_dec_sz + \ + op_dec_sz) diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 9930fa901..1e7f653c1 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -40,8 +40,21 @@ static inline struct nilfs_dat_info *NILFS_DAT_I(struct inode *dat) static int nilfs_dat_prepare_entry(struct inode *dat, struct nilfs_palloc_req *req, int create) { - return nilfs_palloc_get_entry_block(dat, req->pr_entry_nr, - create, &req->pr_entry_bh); + int ret; + + ret = nilfs_palloc_get_entry_block(dat, req->pr_entry_nr, + create, &req->pr_entry_bh); + if (unlikely(ret == -ENOENT)) { + nilfs_err(dat->i_sb, + "DAT doesn't have a block to manage vblocknr = %llu", + (unsigned long long)req->pr_entry_nr); + /* + * Return internal code -EINVAL to notify bmap layer of + * metadata corruption. + */ + ret = -EINVAL; + } + return ret; } static void nilfs_dat_commit_entry(struct inode *dat, @@ -123,11 +136,7 @@ static void nilfs_dat_commit_free(struct inode *dat, int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req) { - int ret; - - ret = nilfs_dat_prepare_entry(dat, req, 0); - WARN_ON(ret == -ENOENT); - return ret; + return nilfs_dat_prepare_entry(dat, req, 0); } void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req, @@ -154,10 +163,8 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) int ret; ret = nilfs_dat_prepare_entry(dat, req, 0); - if (ret < 0) { - WARN_ON(ret == -ENOENT); + if (ret < 0) return ret; - } kaddr = kmap_atomic(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index a265d391f..822e8d95d 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -105,7 +105,13 @@ static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf) nilfs_transaction_commit(inode->i_sb); mapped: - wait_for_stable_page(page); + /* + * Since checksumming including data blocks is performed to determine + * the validity of the log to be written and used for recovery, it is + * necessary to wait for writeback to finish here, regardless of the + * stable write requirement of the backing device. + */ + wait_on_page_writeback(page); out: sb_end_pagefault(inode->i_sb); return block_page_mkwrite_return(ret); diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 0955b6579..a9b8d77c8 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -472,9 +472,10 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, static int nilfs_recovery_copy_block(struct the_nilfs *nilfs, struct nilfs_recovery_block *rb, - struct page *page) + loff_t pos, struct page *page) { struct buffer_head *bh_org; + size_t from = pos & ~PAGE_MASK; void *kaddr; bh_org = __bread(nilfs->ns_bdev, rb->blocknr, nilfs->ns_blocksize); @@ -482,7 +483,7 @@ static int nilfs_recovery_copy_block(struct the_nilfs *nilfs, return -EIO; kaddr = kmap_atomic(page); - memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size); + memcpy(kaddr + from, bh_org->b_data, bh_org->b_size); kunmap_atomic(kaddr); brelse(bh_org); return 0; @@ -521,7 +522,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, goto failed_inode; } - err = nilfs_recovery_copy_block(nilfs, rb, page); + err = nilfs_recovery_copy_block(nilfs, rb, pos, page); if (unlikely(err)) goto failed_page; diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index a4a147a98..0a8461396 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1702,7 +1702,6 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - set_buffer_async_write(bh); if (bh == segbuf->sb_super_root) { if (bh->b_page != bd_page) { lock_page(bd_page); @@ -1713,6 +1712,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) } break; } + set_buffer_async_write(bh); if (bh->b_page != fs_page) { nilfs_begin_page_io(fs_page); fs_page = bh->b_page; @@ -1798,7 +1798,6 @@ static void nilfs_abort_logs(struct list_head *logs, int err) list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - clear_buffer_async_write(bh); if (bh == segbuf->sb_super_root) { clear_buffer_uptodate(bh); if (bh->b_page != bd_page) { @@ -1807,6 +1806,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err) } break; } + clear_buffer_async_write(bh); if (bh->b_page != fs_page) { nilfs_end_page_io(fs_page, err); fs_page = bh->b_page; @@ -1894,8 +1894,9 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) BIT(BH_Delay) | BIT(BH_NILFS_Volatile) | BIT(BH_NILFS_Redirected)); - set_mask_bits(&bh->b_state, clear_bits, set_bits); if (bh == segbuf->sb_super_root) { + set_buffer_uptodate(bh); + clear_buffer_dirty(bh); if (bh->b_page != bd_page) { end_page_writeback(bd_page); bd_page = bh->b_page; @@ -1903,6 +1904,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) update_sr = true; break; } + set_mask_bits(&bh->b_state, clear_bits, set_bits); if (bh->b_page != fs_page) { nilfs_end_page_io(fs_page, 0); fs_page = bh->b_page; diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index 2215179c9..2618bf5a3 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -1658,8 +1658,10 @@ repack: le_b = NULL; attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b); - if (!attr_b) - return -ENOENT; + if (!attr_b) { + err = -ENOENT; + goto out; + } attr = attr_b; le = le_b; @@ -1740,13 +1742,15 @@ ins_ext: ok: run_truncate_around(run, vcn); out: - if (new_valid > data_size) - new_valid = data_size; + if (attr_b) { + if (new_valid > data_size) + new_valid = data_size; - valid_size = le64_to_cpu(attr_b->nres.valid_size); - if (new_valid != valid_size) { - attr_b->nres.valid_size = cpu_to_le64(valid_size); - mi_b->dirty = true; + valid_size = le64_to_cpu(attr_b->nres.valid_size); + if (new_valid != valid_size) { + attr_b->nres.valid_size = cpu_to_le64(valid_size); + mi_b->dirty = true; + } } return err; diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c index 0c6a68e71..723e49ec8 100644 --- a/fs/ntfs3/attrlist.c +++ b/fs/ntfs3/attrlist.c @@ -127,12 +127,13 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni, { size_t off; u16 sz; + const unsigned le_min_size = le_size(0); if (!le) { le = ni->attr_list.le; } else { sz = le16_to_cpu(le->size); - if (sz < sizeof(struct ATTR_LIST_ENTRY)) { + if (sz < le_min_size) { /* Impossible 'cause we should not return such le. */ return NULL; } @@ -141,7 +142,7 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni, /* Check boundary. */ off = PtrOffset(ni->attr_list.le, le); - if (off + sizeof(struct ATTR_LIST_ENTRY) > ni->attr_list.size) { + if (off + le_min_size > ni->attr_list.size) { /* The regular end of list. */ return NULL; } @@ -149,8 +150,7 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni, sz = le16_to_cpu(le->size); /* Check le for errors. */ - if (sz < sizeof(struct ATTR_LIST_ENTRY) || - off + sz > ni->attr_list.size || + if (sz < le_min_size || off + sz > ni->attr_list.size || sz < le->name_off + le->name_len * sizeof(short)) { return NULL; } diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index d4d9f4ffb..72cdfa872 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -309,11 +309,31 @@ static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, return 0; } - /* NTFS: symlinks are "dir + reparse" or "file + reparse" */ - if (fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) - dt_type = DT_LNK; - else - dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG; + /* + * NTFS: symlinks are "dir + reparse" or "file + reparse" + * Unfortunately reparse attribute is used for many purposes (several dozens). + * It is not possible here to know is this name symlink or not. + * To get exactly the type of name we should to open inode (read mft). + * getattr for opened file (fstat) correctly returns symlink. + */ + dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG; + + /* + * It is not reliable to detect the type of name using duplicated information + * stored in parent directory. + * The only correct way to get the type of name - read MFT record and find ATTR_STD. + * The code below is not good idea. + * It does additional locks/reads just to get the type of name. + * Should we use additional mount option to enable branch below? + */ + if ((fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) && + ino != ni->mi.rno) { + struct inode *inode = ntfs_iget5(sbi->sb, &e->ref, NULL); + if (!IS_ERR_OR_NULL(inode)) { + dt_type = fs_umode_to_dtype(inode->i_mode); + iput(inode); + } + } return !dir_emit(ctx, (s8 *)name, name_len, ino, dt_type); } @@ -495,11 +515,9 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, struct INDEX_HDR *hdr; const struct ATTR_FILE_NAME *fname; u32 e_size, off, end; - u64 vbo = 0; size_t drs = 0, fles = 0, bit = 0; - loff_t i_size = ni->vfs_inode.i_size; struct indx_node *node = NULL; - u8 index_bits = ni->dir.index_bits; + size_t max_indx = ni->vfs_inode.i_size >> ni->dir.index_bits; if (is_empty) *is_empty = true; @@ -543,7 +561,7 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, fles += 1; } - if (vbo >= i_size) + if (bit >= max_indx) goto out; err = indx_used_bit(&ni->dir, ni, &bit); @@ -553,8 +571,7 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, if (bit == MINUS_ONE_T) goto out; - vbo = (u64)bit << index_bits; - if (vbo >= i_size) + if (bit >= max_indx) goto out; err = indx_read(&ni->dir, ni, bit << ni->dir.idx2vbn_bits, @@ -564,7 +581,6 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, hdr = &node->index->ihdr; bit += 1; - vbo = (u64)bit << ni->dir.idx2vbn_bits; } out: diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index f31c0389a..14efe46df 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -1110,6 +1110,8 @@ out: iocb->ki_pos += written; if (iocb->ki_pos > ni->i_valid) ni->i_valid = iocb->ki_pos; + if (iocb->ki_pos > i_size) + i_size_write(inode, iocb->ki_pos); return written; } diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index bb7e33c24..d26026090 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -102,7 +102,7 @@ void ni_clear(struct ntfs_inode *ni) { struct rb_node *node; - if (!ni->vfs_inode.i_nlink && is_rec_inuse(ni->mi.mrec)) + if (!ni->vfs_inode.i_nlink && ni->mi.mrec && is_rec_inuse(ni->mi.mrec)) ni_delete_all(ni); al_destroy(ni); @@ -3255,6 +3255,9 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint) return 0; } + if (!ni->mi.mrec) + goto out; + if (is_rec_inuse(ni->mi.mrec) && !(sbi->flags & NTFS_FLAGS_LOG_REPLAYING) && inode->i_nlink) { bool modified = false; diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c index 710cb5aa5..d53ef128f 100644 --- a/fs/ntfs3/fslog.c +++ b/fs/ntfs3/fslog.c @@ -465,7 +465,7 @@ static inline bool is_rst_area_valid(const struct RESTART_HDR *rhdr) { const struct RESTART_AREA *ra; u16 cl, fl, ul; - u32 off, l_size, file_dat_bits, file_size_round; + u32 off, l_size, seq_bits; u16 ro = le16_to_cpu(rhdr->ra_off); u32 sys_page = le32_to_cpu(rhdr->sys_page_size); @@ -511,13 +511,15 @@ static inline bool is_rst_area_valid(const struct RESTART_HDR *rhdr) /* Make sure the sequence number bits match the log file size. */ l_size = le64_to_cpu(ra->l_size); - file_dat_bits = sizeof(u64) * 8 - le32_to_cpu(ra->seq_num_bits); - file_size_round = 1u << (file_dat_bits + 3); - if (file_size_round != l_size && - (file_size_round < l_size || (file_size_round / 2) > l_size)) { - return false; + seq_bits = sizeof(u64) * 8 + 3; + while (l_size) { + l_size >>= 1; + seq_bits -= 1; } + if (seq_bits != ra->seq_num_bits) + return false; + /* The log page data offset and record header length must be quad-aligned. */ if (!IS_ALIGNED(le16_to_cpu(ra->data_off), 8) || !IS_ALIGNED(le16_to_cpu(ra->rec_hdr_len), 8)) diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index 873b1434a..4c2d079b3 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -976,6 +976,30 @@ static inline __le32 security_hash(const void *sd, size_t bytes) return cpu_to_le32(hash); } +/* + * simple wrapper for sb_bread_unmovable. + */ +struct buffer_head *ntfs_bread(struct super_block *sb, sector_t block) +{ + struct ntfs_sb_info *sbi = sb->s_fs_info; + struct buffer_head *bh; + + if (unlikely(block >= sbi->volume.blocks)) { + /* prevent generic message "attempt to access beyond end of device" */ + ntfs_err(sb, "try to read out of volume at offset 0x%llx", + (u64)block << sb->s_blocksize_bits); + return NULL; + } + + bh = sb_bread_unmovable(sb, block); + if (bh) + return bh; + + ntfs_err(sb, "failed to read volume at offset 0x%llx", + (u64)block << sb->s_blocksize_bits); + return NULL; +} + int ntfs_sb_read(struct super_block *sb, u64 lbo, size_t bytes, void *buffer) { struct block_device *bdev = sb->s_bdev; @@ -1650,6 +1674,7 @@ struct ntfs_inode *ntfs_new_inode(struct ntfs_sb_info *sbi, CLST rno, bool dir) out: if (err) { + make_bad_inode(inode); iput(inode); ni = ERR_PTR(err); } @@ -1842,10 +1867,12 @@ int ntfs_security_init(struct ntfs_sb_info *sbi) goto out; } - root_sdh = resident_data_ex(attr, sizeof(struct INDEX_ROOT)); - if (root_sdh->type != ATTR_ZERO || + if(!(root_sdh = resident_data_ex(attr, sizeof(struct INDEX_ROOT))) || + root_sdh->type != ATTR_ZERO || root_sdh->rule != NTFS_COLLATION_TYPE_SECURITY_HASH || - offsetof(struct INDEX_ROOT, ihdr) + root_sdh->ihdr.used > attr->res.data_size) { + offsetof(struct INDEX_ROOT, ihdr) + + le32_to_cpu(root_sdh->ihdr.used) > + le32_to_cpu(attr->res.data_size)) { err = -EINVAL; goto out; } @@ -1861,10 +1888,12 @@ int ntfs_security_init(struct ntfs_sb_info *sbi) goto out; } - root_sii = resident_data_ex(attr, sizeof(struct INDEX_ROOT)); - if (root_sii->type != ATTR_ZERO || + if(!(root_sii = resident_data_ex(attr, sizeof(struct INDEX_ROOT))) || + root_sii->type != ATTR_ZERO || root_sii->rule != NTFS_COLLATION_TYPE_UINT || - offsetof(struct INDEX_ROOT, ihdr) + root_sii->ihdr.used > attr->res.data_size) { + offsetof(struct INDEX_ROOT, ihdr) + + le32_to_cpu(root_sii->ihdr.used) > + le32_to_cpu(attr->res.data_size)) { err = -EINVAL; goto out; } diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index b89a33f57..eee01db6e 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -998,6 +998,7 @@ struct INDEX_ROOT *indx_get_root(struct ntfs_index *indx, struct ntfs_inode *ni, struct ATTR_LIST_ENTRY *le = NULL; struct ATTRIB *a; const struct INDEX_NAMES *in = &s_index_names[indx->type]; + struct INDEX_ROOT *root = NULL; a = ni_find_attr(ni, NULL, &le, ATTR_ROOT, in->name, in->name_len, NULL, mi); @@ -1007,7 +1008,15 @@ struct INDEX_ROOT *indx_get_root(struct ntfs_index *indx, struct ntfs_inode *ni, if (attr) *attr = a; - return resident_data_ex(a, sizeof(struct INDEX_ROOT)); + root = resident_data_ex(a, sizeof(struct INDEX_ROOT)); + + /* length check */ + if (root && offsetof(struct INDEX_ROOT, ihdr) + le32_to_cpu(root->ihdr.used) > + le32_to_cpu(a->res.data_size)) { + return NULL; + } + + return root; } static int indx_write(struct ntfs_index *indx, struct ntfs_inode *ni, @@ -1097,7 +1106,8 @@ ok: } /* check for index header length */ - if (offsetof(struct INDEX_BUFFER, ihdr) + ib->ihdr.used > bytes) { + if (offsetof(struct INDEX_BUFFER, ihdr) + le32_to_cpu(ib->ihdr.used) > + bytes) { err = -EINVAL; goto out; } diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index dc937089a..42dd9fdaf 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -402,7 +402,6 @@ end_enum: goto out; if (!is_match && name) { - /* Reuse rec as buffer for ascii name. */ err = -ENOENT; goto out; } @@ -417,6 +416,7 @@ end_enum: if (names != le16_to_cpu(rec->hard_links)) { /* Correct minor error on the fly. Do not mark inode as dirty. */ + ntfs_inode_warn(inode, "Correct links count -> %u.", names); rec->hard_links = cpu_to_le16(names); ni->mi.dirty = true; } diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h index 0f38d5581..ba26a465b 100644 --- a/fs/ntfs3/ntfs.h +++ b/fs/ntfs3/ntfs.h @@ -517,12 +517,10 @@ struct ATTR_LIST_ENTRY { __le64 vcn; // 0x08: Starting VCN of this attribute. struct MFT_REF ref; // 0x10: MFT record number with attribute. __le16 id; // 0x18: struct ATTRIB ID. - __le16 name[3]; // 0x1A: Just to align. To get real name can use bNameOffset. + __le16 name[]; // 0x1A: Just to align. To get real name can use name_off. }; // sizeof(0x20) -static_assert(sizeof(struct ATTR_LIST_ENTRY) == 0x20); - static inline u32 le_size(u8 name_len) { return ALIGN(offsetof(struct ATTR_LIST_ENTRY, name) + diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 8c9abaf13..0f9bec29f 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -467,7 +467,7 @@ bool al_delete_le(struct ntfs_inode *ni, enum ATTR_TYPE type, CLST vcn, int al_update(struct ntfs_inode *ni, int sync); static inline size_t al_aligned(size_t size) { - return (size + 1023) & ~(size_t)1023; + return size_add(size, 1023) & ~(size_t)1023; } /* Globals from bitfunc.c */ @@ -580,6 +580,7 @@ bool check_index_header(const struct INDEX_HDR *hdr, size_t bytes); int log_replay(struct ntfs_inode *ni, bool *initialized); /* Globals from fsntfs.c */ +struct buffer_head *ntfs_bread(struct super_block *sb, sector_t block); bool ntfs_fix_pre_write(struct NTFS_RECORD_HEADER *rhdr, size_t bytes); int ntfs_fix_post_read(struct NTFS_RECORD_HEADER *rhdr, size_t bytes, bool simple); @@ -1012,19 +1013,6 @@ static inline u64 bytes_to_block(const struct super_block *sb, u64 size) return (size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; } -static inline struct buffer_head *ntfs_bread(struct super_block *sb, - sector_t block) -{ - struct buffer_head *bh = sb_bread(sb, block); - - if (bh) - return bh; - - ntfs_err(sb, "failed to read volume at offset 0x%llx", - (u64)block << sb->s_blocksize_bits); - return NULL; -} - static inline struct ntfs_inode *ntfs_i(struct inode *inode) { return container_of(inode, struct ntfs_inode, vfs_inode); diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c index ba336c728..a8d4ed7bc 100644 --- a/fs/ntfs3/record.c +++ b/fs/ntfs3/record.c @@ -226,11 +226,6 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) return NULL; } - if (off + asize < off) { - /* overflow check */ - return NULL; - } - attr = Add2Ptr(attr, asize); off += asize; } @@ -253,8 +248,8 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) if ((t32 & 0xf) || (t32 > 0x100)) return NULL; - /* Check boundary. */ - if (off + asize > used) + /* Check overflow and boundary. */ + if (off + asize < off || off + asize > used) return NULL; /* Check size of attribute. */ @@ -491,8 +486,20 @@ bool mi_remove_attr(struct ntfs_inode *ni, struct mft_inode *mi, return false; if (ni && is_attr_indexed(attr)) { - le16_add_cpu(&ni->mi.mrec->hard_links, -1); - ni->mi.dirty = true; + u16 links = le16_to_cpu(ni->mi.mrec->hard_links); + struct ATTR_FILE_NAME *fname = + attr->type != ATTR_NAME ? + NULL : + resident_data_ex(attr, + SIZEOF_ATTRIBUTE_FILENAME); + if (fname && fname->type == FILE_NAME_DOS) { + /* Do not decrease links count deleting DOS name. */ + } else if (!links) { + /* minor error. Not critical. */ + } else { + ni->mi.mrec->hard_links = cpu_to_le16(links - 1); + ni->mi.dirty = true; + } } used -= asize; diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index df15e00c2..d98cf7b38 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -217,6 +217,9 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer, if (!ea->name_len) break; + if (ea->name_len > ea_size) + break; + if (buffer) { /* Check if we can use field ea->name */ if (off + ea_size > size) diff --git a/fs/proc/array.c b/fs/proc/array.c index 49283b810..d210b2f8b 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -467,13 +467,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, int permitted; struct mm_struct *mm; unsigned long long start_time; - unsigned long cmin_flt = 0, cmaj_flt = 0; - unsigned long min_flt = 0, maj_flt = 0; - u64 cutime, cstime, utime, stime; - u64 cgtime, gtime; + unsigned long cmin_flt, cmaj_flt, min_flt, maj_flt; + u64 cutime, cstime, cgtime, utime, stime, gtime; unsigned long rsslim = 0; unsigned long flags; int exit_code = task->exit_code; + struct signal_struct *sig = task->signal; + unsigned int seq = 1; state = *get_task_state(task); vsize = eip = esp = 0; @@ -501,12 +501,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, sigemptyset(&sigign); sigemptyset(&sigcatch); - cutime = cstime = utime = stime = 0; - cgtime = gtime = 0; if (lock_task_sighand(task, &flags)) { - struct signal_struct *sig = task->signal; - if (sig->tty) { struct pid *pgrp = tty_get_pgrp(sig->tty); tty_pgrp = pid_nr_ns(pgrp, ns); @@ -517,27 +513,9 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, num_threads = get_nr_threads(task); collect_sigign_sigcatch(task, &sigign, &sigcatch); - cmin_flt = sig->cmin_flt; - cmaj_flt = sig->cmaj_flt; - cutime = sig->cutime; - cstime = sig->cstime; - cgtime = sig->cgtime; rsslim = READ_ONCE(sig->rlim[RLIMIT_RSS].rlim_cur); - /* add up live thread stats at the group level */ if (whole) { - struct task_struct *t = task; - do { - min_flt += t->min_flt; - maj_flt += t->maj_flt; - gtime += task_gtime(t); - } while_each_thread(task, t); - - min_flt += sig->min_flt; - maj_flt += sig->maj_flt; - thread_group_cputime_adjusted(task, &utime, &stime); - gtime += sig->gtime; - if (sig->flags & (SIGNAL_GROUP_EXIT | SIGNAL_STOP_STOPPED)) exit_code = sig->group_exit_code; } @@ -551,10 +529,41 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, if (permitted && (!whole || num_threads < 2)) wchan = !task_is_running(task); - if (!whole) { + + do { + seq++; /* 2 on the 1st/lockless path, otherwise odd */ + flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); + + cmin_flt = sig->cmin_flt; + cmaj_flt = sig->cmaj_flt; + cutime = sig->cutime; + cstime = sig->cstime; + cgtime = sig->cgtime; + + if (whole) { + struct task_struct *t; + + min_flt = sig->min_flt; + maj_flt = sig->maj_flt; + gtime = sig->gtime; + + rcu_read_lock(); + __for_each_thread(sig, t) { + min_flt += t->min_flt; + maj_flt += t->maj_flt; + gtime += task_gtime(t); + } + rcu_read_unlock(); + } + } while (need_seqretry(&sig->stats_lock, seq)); + done_seqretry_irqrestore(&sig->stats_lock, seq, flags); + + if (whole) { + thread_group_cputime_adjusted(task, &utime, &stime); + } else { + task_cputime_adjusted(task, &utime, &stime); min_flt = task->min_flt; maj_flt = task->maj_flt; - task_cputime_adjusted(task, &utime, &stime); gtime = task_gtime(task); } diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index f3fa3625d..e15b46313 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -519,6 +519,7 @@ static int ramoops_init_przs(const char *name, } zone_sz = mem_sz / *cnt; + zone_sz = ALIGN_DOWN(zone_sz, 2); if (!zone_sz) { dev_err(dev, "%s zone size == 0\n", name); goto fail; diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index 5a132c1e6..fd082151c 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -149,7 +149,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, return -EOPNOTSUPP; ses = tcon->ses; - server = ses->server; + server = cifs_pick_channel(ses); cfids = tcon->cfids; if (!server->ops->new_lease_key) @@ -268,10 +268,12 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, if (o_rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE) goto oshr_free; - smb2_parse_contexts(server, o_rsp, + rc = smb2_parse_contexts(server, rsp_iov, &oparms.fid->epoch, - oparms.fid->lease_key, &oplock, - NULL, NULL); + oparms.fid->lease_key, + &oplock, NULL, NULL); + if (rc) + goto oshr_free; if (!(oplock & SMB2_LEASE_READ_CACHING_HE)) goto oshr_free; qi_rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base; diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c index d0ac2648c..d3d4cf632 100644 --- a/fs/smb/client/cifsencrypt.c +++ b/fs/smb/client/cifsencrypt.c @@ -444,7 +444,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, len = cifs_strtoUTF16(user, ses->user_name, len, nls_cp); UniStrupr(user); } else { - memset(user, '\0', 2); + *(u16 *)user = 0; } rc = crypto_shash_update(ses->server->secmech.hmacmd5, diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 7f1aea4c1..58bb54994 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -86,7 +86,7 @@ #define SMB_INTERFACE_POLL_INTERVAL 600 /* maximum number of PDUs in one compound */ -#define MAX_COMPOUND 5 +#define MAX_COMPOUND 7 /* * Default number of credits to keep available for SMB3. diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index f4818599c..4d5302b58 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -209,7 +209,7 @@ cifs_parse_security_flavors(struct fs_context *fc, char *value, struct smb3_fs_c switch (match_token(value, cifs_secflavor_tokens, args)) { case Opt_sec_krb5p: - cifs_errorf(fc, "sec=krb5p is not supported!\n"); + cifs_errorf(fc, "sec=krb5p is not supported. Use sec=krb5,seal instead\n"); return 1; case Opt_sec_krb5i: ctx->sign = true; diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index 2d75ba5aa..5990bdbae 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -304,14 +304,16 @@ cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info, } static void cifs_fulldir_info_to_fattr(struct cifs_fattr *fattr, - SEARCH_ID_FULL_DIR_INFO *info, + const void *info, struct cifs_sb_info *cifs_sb) { + const FILE_FULL_DIRECTORY_INFO *di = info; + __dir_info_to_fattr(fattr, info); - /* See MS-FSCC 2.4.19 FileIdFullDirectoryInformation */ + /* See MS-FSCC 2.4.14, 2.4.19 */ if (fattr->cf_cifsattrs & ATTR_REPARSE) - fattr->cf_cifstag = le32_to_cpu(info->EaSize); + fattr->cf_cifstag = le32_to_cpu(di->EaSize); cifs_fill_common_info(fattr, cifs_sb); } @@ -425,7 +427,7 @@ ffirst_retry: } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { cifsFile->srch_inf.info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO; } else /* not srvinos - BB fixme add check for backlevel? */ { - cifsFile->srch_inf.info_level = SMB_FIND_FILE_DIRECTORY_INFO; + cifsFile->srch_inf.info_level = SMB_FIND_FILE_FULL_DIRECTORY_INFO; } search_flags = CIFS_SEARCH_CLOSE_AT_END | CIFS_SEARCH_RETURN_RESUME; @@ -1019,10 +1021,9 @@ static int cifs_filldir(char *find_entry, struct file *file, (FIND_FILE_STANDARD_INFO *)find_entry, cifs_sb); break; + case SMB_FIND_FILE_FULL_DIRECTORY_INFO: case SMB_FIND_FILE_ID_FULL_DIR_INFO: - cifs_fulldir_info_to_fattr(&fattr, - (SEARCH_ID_FULL_DIR_INFO *)find_entry, - cifs_sb); + cifs_fulldir_info_to_fattr(&fattr, find_entry, cifs_sb); break; default: cifs_dir_info_to_fattr(&fattr, diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index 634035bcb..b8e14bcd2 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -248,6 +248,8 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) &iface->sockaddr, rc); kref_put(&iface->refcount, release_iface); + /* failure to add chan should increase weight */ + iface->weight_fulfilled++; continue; } diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 5a157000b..34d126200 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -613,7 +613,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, goto out; } - while (bytes_left >= sizeof(*p)) { + while (bytes_left >= (ssize_t)sizeof(*p)) { memset(&tmp_iface, 0, sizeof(tmp_iface)); tmp_iface.speed = le64_to_cpu(p->LinkSpeed); tmp_iface.rdma_capable = le32_to_cpu(p->Capability & RDMA_CAPABLE) ? 1 : 0; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index e65f998ea..4c1231496 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -2145,17 +2145,18 @@ parse_posix_ctxt(struct create_context *cc, struct smb2_file_all_info *info, posix->nlink, posix->mode, posix->reparse_tag); } -void -smb2_parse_contexts(struct TCP_Server_Info *server, - struct smb2_create_rsp *rsp, - unsigned int *epoch, char *lease_key, __u8 *oplock, - struct smb2_file_all_info *buf, - struct create_posix_rsp *posix) +int smb2_parse_contexts(struct TCP_Server_Info *server, + struct kvec *rsp_iov, + unsigned int *epoch, + char *lease_key, __u8 *oplock, + struct smb2_file_all_info *buf, + struct create_posix_rsp *posix) { - char *data_offset; + struct smb2_create_rsp *rsp = rsp_iov->iov_base; struct create_context *cc; - unsigned int next; - unsigned int remaining; + size_t rem, off, len; + size_t doff, dlen; + size_t noff, nlen; char *name; static const char smb3_create_tag_posix[] = { 0x93, 0xAD, 0x25, 0x50, 0x9C, @@ -2164,45 +2165,63 @@ smb2_parse_contexts(struct TCP_Server_Info *server, }; *oplock = 0; - data_offset = (char *)rsp + le32_to_cpu(rsp->CreateContextsOffset); - remaining = le32_to_cpu(rsp->CreateContextsLength); - cc = (struct create_context *)data_offset; + + off = le32_to_cpu(rsp->CreateContextsOffset); + rem = le32_to_cpu(rsp->CreateContextsLength); + if (check_add_overflow(off, rem, &len) || len > rsp_iov->iov_len) + return -EINVAL; + cc = (struct create_context *)((u8 *)rsp + off); /* Initialize inode number to 0 in case no valid data in qfid context */ if (buf) buf->IndexNumber = 0; - while (remaining >= sizeof(struct create_context)) { - name = le16_to_cpu(cc->NameOffset) + (char *)cc; - if (le16_to_cpu(cc->NameLength) == 4 && - strncmp(name, SMB2_CREATE_REQUEST_LEASE, 4) == 0) - *oplock = server->ops->parse_lease_buf(cc, epoch, - lease_key); - else if (buf && (le16_to_cpu(cc->NameLength) == 4) && - strncmp(name, SMB2_CREATE_QUERY_ON_DISK_ID, 4) == 0) - parse_query_id_ctxt(cc, buf); - else if ((le16_to_cpu(cc->NameLength) == 16)) { - if (posix && - memcmp(name, smb3_create_tag_posix, 16) == 0) + while (rem >= sizeof(*cc)) { + doff = le16_to_cpu(cc->DataOffset); + dlen = le32_to_cpu(cc->DataLength); + if (check_add_overflow(doff, dlen, &len) || len > rem) + return -EINVAL; + + noff = le16_to_cpu(cc->NameOffset); + nlen = le16_to_cpu(cc->NameLength); + if (noff + nlen > doff) + return -EINVAL; + + name = (char *)cc + noff; + switch (nlen) { + case 4: + if (!strncmp(name, SMB2_CREATE_REQUEST_LEASE, 4)) { + *oplock = server->ops->parse_lease_buf(cc, epoch, + lease_key); + } else if (buf && + !strncmp(name, SMB2_CREATE_QUERY_ON_DISK_ID, 4)) { + parse_query_id_ctxt(cc, buf); + } + break; + case 16: + if (posix && !memcmp(name, smb3_create_tag_posix, 16)) parse_posix_ctxt(cc, buf, posix); + break; + default: + cifs_dbg(FYI, "%s: unhandled context (nlen=%zu dlen=%zu)\n", + __func__, nlen, dlen); + if (IS_ENABLED(CONFIG_CIFS_DEBUG2)) + cifs_dump_mem("context data: ", cc, dlen); + break; } - /* else { - cifs_dbg(FYI, "Context not matched with len %d\n", - le16_to_cpu(cc->NameLength)); - cifs_dump_mem("Cctxt name: ", name, 4); - } */ - - next = le32_to_cpu(cc->Next); - if (!next) + + off = le32_to_cpu(cc->Next); + if (!off) break; - remaining -= next; - cc = (struct create_context *)((char *)cc + next); + if (check_sub_overflow(rem, off, &rem)) + return -EINVAL; + cc = (struct create_context *)((u8 *)cc + off); } if (rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE) *oplock = rsp->OplockLevel; - return; + return 0; } static int @@ -3082,8 +3101,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, } - smb2_parse_contexts(server, rsp, &oparms->fid->epoch, - oparms->fid->lease_key, oplock, buf, posix); + rc = smb2_parse_contexts(server, &rsp_iov, &oparms->fid->epoch, + oparms->fid->lease_key, oplock, buf, posix); creat_exit: SMB2_open_free(&rqst); free_rsp_buf(resp_buftype, rsp); @@ -4991,6 +5010,9 @@ int SMB2_query_directory_init(const unsigned int xid, case SMB_FIND_FILE_POSIX_INFO: req->FileInformationClass = SMB_FIND_FILE_POSIX_INFO; break; + case SMB_FIND_FILE_FULL_DIRECTORY_INFO: + req->FileInformationClass = FILE_FULL_DIRECTORY_INFORMATION; + break; default: cifs_tcon_dbg(VFS, "info level %u isn't supported\n", info_level); @@ -5060,6 +5082,9 @@ smb2_parse_query_directory(struct cifs_tcon *tcon, /* note that posix payload are variable size */ info_buf_size = sizeof(struct smb2_posix_info); break; + case SMB_FIND_FILE_FULL_DIRECTORY_INFO: + info_buf_size = sizeof(FILE_FULL_DIRECTORY_INFO); + break; default: cifs_tcon_dbg(VFS, "info level %u isn't supported\n", srch_inf->info_level); diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index be21b5d26..b325fde01 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -249,11 +249,13 @@ extern int smb3_validate_negotiate(const unsigned int, struct cifs_tcon *); extern enum securityEnum smb2_select_sectype(struct TCP_Server_Info *, enum securityEnum); -extern void smb2_parse_contexts(struct TCP_Server_Info *server, - struct smb2_create_rsp *rsp, - unsigned int *epoch, char *lease_key, - __u8 *oplock, struct smb2_file_all_info *buf, - struct create_posix_rsp *posix); +int smb2_parse_contexts(struct TCP_Server_Info *server, + struct kvec *rsp_iov, + unsigned int *epoch, + char *lease_key, __u8 *oplock, + struct smb2_file_all_info *buf, + struct create_posix_rsp *posix); + extern int smb3_encryption_required(const struct cifs_tcon *tcon); extern int smb2_validate_iov(unsigned int offset, unsigned int buffer_length, struct kvec *iov, unsigned int min_buf_size); diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 8a1dd8407..df44acaec 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -427,10 +427,17 @@ unmask: server->conn_id, server->hostname); } smbd_done: - if (rc < 0 && rc != -EINTR) + /* + * there's hardly any use for the layers above to know the + * actual error code here. All they should do at this point is + * to retry the connection and hope it goes away. + */ + if (rc < 0 && rc != -EINTR && rc != -EAGAIN) { cifs_server_dbg(VFS, "Error %d sending data on socket to server\n", rc); - else if (rc > 0) + rc = -ECONNABORTED; + cifs_signal_cifsd_for_reconnect(server, false); + } else if (rc > 0) rc = 0; out: cifs_in_send_dec(server); @@ -449,8 +456,8 @@ smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, if (!(flags & CIFS_TRANSFORM_REQ)) return __smb_send_rqst(server, num_rqst, rqst); - if (num_rqst > MAX_COMPOUND - 1) - return -ENOMEM; + if (WARN_ON_ONCE(num_rqst > MAX_COMPOUND - 1)) + return -EIO; if (!server->ops->init_transform_rq) { cifs_server_dbg(VFS, "Encryption requested but transform callback is missing\n"); diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 4cfa45c27..66d25d0e3 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -6171,8 +6171,10 @@ static noinline int smb2_read_pipe(struct ksmbd_work *work) err = ksmbd_iov_pin_rsp_read(work, (void *)rsp, offsetof(struct smb2_read_rsp, Buffer), aux_payload_buf, nbytes); - if (err) + if (err) { + kvfree(aux_payload_buf); goto out; + } kvfree(rpc_resp); } else { err = ksmbd_iov_pin_rsp(work, (void *)rsp, @@ -6382,8 +6384,10 @@ int smb2_read(struct ksmbd_work *work) err = ksmbd_iov_pin_rsp_read(work, (void *)rsp, offsetof(struct smb2_read_rsp, Buffer), aux_payload_buf, nbytes); - if (err) + if (err) { + kvfree(aux_payload_buf); goto out; + } ksmbd_fd_put(work, fp); return 0; diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c index 63cd50840..8d5f4a5a7 100644 --- a/fs/zonefs/file.c +++ b/fs/zonefs/file.c @@ -349,7 +349,12 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size, struct zonefs_inode_info *zi = ZONEFS_I(inode); if (error) { - zonefs_io_error(inode, true); + /* + * For Sync IOs, error recovery is called from + * zonefs_file_dio_write(). + */ + if (!is_sync_kiocb(iocb)) + zonefs_io_error(inode, true); return error; } @@ -577,6 +582,14 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) ret = -EINVAL; goto inode_unlock; } + /* + * Advance the zone write pointer offset. This assumes that the + * IO will succeed, which is OK to do because we do not allow + * partial writes (IOMAP_DIO_PARTIAL is not set) and if the IO + * fails, the error path will correct the write pointer offset. + */ + z->z_wpoffset += count; + zonefs_inode_account_active(inode); mutex_unlock(&zi->i_truncate_mutex); append = sync; } @@ -596,20 +609,19 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) ret = -EBUSY; } - if (zonefs_zone_is_seq(z) && - (ret > 0 || ret == -EIOCBQUEUED)) { - if (ret > 0) - count = ret; - - /* - * Update the zone write pointer offset assuming the write - * operation succeeded. If it did not, the error recovery path - * will correct it. Also do active seq file accounting. - */ - mutex_lock(&zi->i_truncate_mutex); - z->z_wpoffset += count; - zonefs_inode_account_active(inode); - mutex_unlock(&zi->i_truncate_mutex); + /* + * For a failed IO or partial completion, trigger error recovery + * to update the zone write pointer offset to a correct value. + * For asynchronous IOs, zonefs_file_write_dio_end_io() may already + * have executed error recovery if the IO already completed when we + * reach here. However, we cannot know that and execute error recovery + * again (that will not change anything). + */ + if (zonefs_zone_is_seq(z)) { + if (ret > 0 && ret != count) + ret = -EIO; + if (ret < 0 && ret != -EIOCBQUEUED) + zonefs_io_error(inode, true); } inode_unlock: diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 270ded209..f6b701261 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -245,16 +245,18 @@ static void zonefs_inode_update_mode(struct inode *inode) z->z_flags &= ~ZONEFS_ZONE_INIT_MODE; } -struct zonefs_ioerr_data { - struct inode *inode; - bool write; -}; - static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, void *data) { - struct zonefs_ioerr_data *err = data; - struct inode *inode = err->inode; + struct blk_zone *z = data; + + *z = *zone; + return 0; +} + +static void zonefs_handle_io_error(struct inode *inode, struct blk_zone *zone, + bool write) +{ struct zonefs_zone *z = zonefs_inode_zone(inode); struct super_block *sb = inode->i_sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); @@ -269,8 +271,8 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, data_size = zonefs_check_zone_condition(sb, z, zone); isize = i_size_read(inode); if (!(z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) && - !err->write && isize == data_size) - return 0; + !write && isize == data_size) + return; /* * At this point, we detected either a bad zone or an inconsistency @@ -291,7 +293,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, * In all cases, warn about inode size inconsistency and handle the * IO error according to the zone condition and to the mount options. */ - if (zonefs_zone_is_seq(z) && isize != data_size) + if (isize != data_size) zonefs_warn(sb, "inode %lu: invalid size %lld (should be %lld)\n", inode->i_ino, isize, data_size); @@ -351,8 +353,6 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, zonefs_i_size_write(inode, data_size); z->z_wpoffset = data_size; zonefs_inode_account_active(inode); - - return 0; } /* @@ -366,23 +366,25 @@ void __zonefs_io_error(struct inode *inode, bool write) { struct zonefs_zone *z = zonefs_inode_zone(inode); struct super_block *sb = inode->i_sb; - struct zonefs_sb_info *sbi = ZONEFS_SB(sb); unsigned int noio_flag; - unsigned int nr_zones = 1; - struct zonefs_ioerr_data err = { - .inode = inode, - .write = write, - }; + struct blk_zone zone; int ret; /* - * The only files that have more than one zone are conventional zone - * files with aggregated conventional zones, for which the inode zone - * size is always larger than the device zone size. + * Conventional zone have no write pointer and cannot become read-only + * or offline. So simply fake a report for a single or aggregated zone + * and let zonefs_handle_io_error() correct the zone inode information + * according to the mount options. */ - if (z->z_size > bdev_zone_sectors(sb->s_bdev)) - nr_zones = z->z_size >> - (sbi->s_zone_sectors_shift + SECTOR_SHIFT); + if (!zonefs_zone_is_seq(z)) { + zone.start = z->z_sector; + zone.len = z->z_size >> SECTOR_SHIFT; + zone.wp = zone.start + zone.len; + zone.type = BLK_ZONE_TYPE_CONVENTIONAL; + zone.cond = BLK_ZONE_COND_NOT_WP; + zone.capacity = zone.len; + goto handle_io_error; + } /* * Memory allocations in blkdev_report_zones() can trigger a memory @@ -393,12 +395,20 @@ void __zonefs_io_error(struct inode *inode, bool write) * the GFP_NOIO context avoids both problems. */ noio_flag = memalloc_noio_save(); - ret = blkdev_report_zones(sb->s_bdev, z->z_sector, nr_zones, - zonefs_io_error_cb, &err); - if (ret != nr_zones) + ret = blkdev_report_zones(sb->s_bdev, z->z_sector, 1, + zonefs_io_error_cb, &zone); + memalloc_noio_restore(noio_flag); + + if (ret != 1) { zonefs_err(sb, "Get inode %lu zone information failed %d\n", inode->i_ino, ret); - memalloc_noio_restore(noio_flag); + zonefs_warn(sb, "remounting filesystem read-only\n"); + sb->s_flags |= SB_RDONLY; + return; + } + +handle_io_error: + zonefs_handle_io_error(inode, &zone, write); } static struct kmem_cache *zonefs_inode_cachep; diff --git a/include/asm-generic/numa.h b/include/asm-generic/numa.h index 1a3ad6d29..c32e0cf23 100644 --- a/include/asm-generic/numa.h +++ b/include/asm-generic/numa.h @@ -35,6 +35,7 @@ int __init numa_add_memblk(int nodeid, u64 start, u64 end); void __init numa_set_distance(int from, int to, int distance); void __init numa_free_distance(void); void __init early_map_cpu_to_node(unsigned int cpu, int nid); +int __init early_cpu_to_node(int cpu); void numa_store_cpu_info(unsigned int cpu); void numa_add_cpu(unsigned int cpu); void numa_remove_cpu(unsigned int cpu); @@ -46,6 +47,7 @@ static inline void numa_add_cpu(unsigned int cpu) { } static inline void numa_remove_cpu(unsigned int cpu) { } static inline void arch_numa_init(void) { } static inline void early_map_cpu_to_node(unsigned int cpu, int nid) { } +static inline int early_cpu_to_node(int cpu) { return 0; } #endif /* CONFIG_NUMA */ diff --git a/include/asm-generic/unaligned.h b/include/asm-generic/unaligned.h index 699650f81..a84c64e5f 100644 --- a/include/asm-generic/unaligned.h +++ b/include/asm-generic/unaligned.h @@ -104,9 +104,9 @@ static inline u32 get_unaligned_le24(const void *p) static inline void __put_unaligned_be24(const u32 val, u8 *p) { - *p++ = val >> 16; - *p++ = val >> 8; - *p++ = val; + *p++ = (val >> 16) & 0xff; + *p++ = (val >> 8) & 0xff; + *p++ = val & 0xff; } static inline void put_unaligned_be24(const u32 val, void *p) @@ -116,9 +116,9 @@ static inline void put_unaligned_be24(const u32 val, void *p) static inline void __put_unaligned_le24(const u32 val, u8 *p) { - *p++ = val; - *p++ = val >> 8; - *p++ = val >> 16; + *p++ = val & 0xff; + *p++ = (val >> 8) & 0xff; + *p++ = (val >> 16) & 0xff; } static inline void put_unaligned_le24(const u32 val, void *p) @@ -128,12 +128,12 @@ static inline void put_unaligned_le24(const u32 val, void *p) static inline void __put_unaligned_be48(const u64 val, u8 *p) { - *p++ = val >> 40; - *p++ = val >> 32; - *p++ = val >> 24; - *p++ = val >> 16; - *p++ = val >> 8; - *p++ = val; + *p++ = (val >> 40) & 0xff; + *p++ = (val >> 32) & 0xff; + *p++ = (val >> 24) & 0xff; + *p++ = (val >> 16) & 0xff; + *p++ = (val >> 8) & 0xff; + *p++ = val & 0xff; } static inline void put_unaligned_be48(const u64 val, void *p) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 7ad6f51b3..1d1f480a5 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -351,7 +351,6 @@ *(.ref.data) \ *(.data..shared_aligned) /* percpu related */ \ MEM_KEEP(init.data*) \ - MEM_KEEP(exit.data*) \ *(.data.unlikely) \ __start_once = .; \ *(.data.once) \ @@ -546,7 +545,6 @@ __init_rodata : AT(ADDR(__init_rodata) - LOAD_OFFSET) { \ *(.ref.rodata) \ MEM_KEEP(init.rodata) \ - MEM_KEEP(exit.rodata) \ } \ \ /* Built-in module parameters. */ \ @@ -601,7 +599,6 @@ *(.ref.text) \ *(.text.asan.* .text.tsan.*) \ MEM_KEEP(init.text*) \ - MEM_KEEP(exit.text*) \ /* sched.text is aling to function alignment to secure we have same @@ -751,13 +748,10 @@ *(.exit.data .exit.data.*) \ *(.fini_array .fini_array.*) \ *(.dtors .dtors.*) \ - MEM_DISCARD(exit.data*) \ - MEM_DISCARD(exit.rodata*) #define EXIT_TEXT \ *(.exit.text) \ *(.text.exit) \ - MEM_DISCARD(exit.text) #define EXIT_CALL \ *(.exitcall.exit) diff --git a/include/drm/drm_color_mgmt.h b/include/drm/drm_color_mgmt.h index 81c298488..6b5eec10c 100644 --- a/include/drm/drm_color_mgmt.h +++ b/include/drm/drm_color_mgmt.h @@ -24,6 +24,7 @@ #define __DRM_COLOR_MGMT_H__ #include <linux/ctype.h> +#include <linux/math64.h> #include <drm/drm_property.h> struct drm_crtc; diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index 9054a5185..311719149 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -168,6 +168,7 @@ struct mipi_dsi_device_info { * struct mipi_dsi_device - DSI peripheral device * @host: DSI host for this peripheral * @dev: driver model device node for this peripheral + * @attached: the DSI device has been successfully attached * @name: DSI peripheral chip type * @channel: virtual channel assigned to the peripheral * @format: pixel format for video mode @@ -184,6 +185,7 @@ struct mipi_dsi_device_info { struct mipi_dsi_device { struct mipi_dsi_host *host; struct device dev; + bool attached; char name[DSI_DEV_NAME_SIZE]; unsigned int channel; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c04a61ffa..1ca1902af 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2739,10 +2739,18 @@ struct btf_id_set; bool btf_id_set_contains(const struct btf_id_set *set, u32 id); #define MAX_BPRINTF_VARARGS 12 +#define MAX_BPRINTF_BUF 1024 + +struct bpf_bprintf_data { + u32 *bin_args; + char *buf; + bool get_bin_args; + bool get_buf; +}; int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, - u32 **bin_buf, u32 num_args); -void bpf_bprintf_cleanup(void); + u32 num_args, struct bpf_bprintf_data *data); +void bpf_bprintf_cleanup(struct bpf_bprintf_data *data); /* the implementation of the opaque uapi struct bpf_dynptr */ struct bpf_dynptr_kern { diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 9e3dac51e..d4dbaae8b 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -59,7 +59,7 @@ struct bvec_iter { unsigned int bi_bvec_done; /* number of bytes completed in current bvec */ -} __packed; +} __packed __aligned(4); struct bvec_iter_all { struct bio_vec bv; diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h index 4c3e0648d..fcc95bff7 100644 --- a/include/linux/ceph/mdsmap.h +++ b/include/linux/ceph/mdsmap.h @@ -25,7 +25,11 @@ struct ceph_mdsmap { u32 m_session_timeout; /* seconds */ u32 m_session_autoclose; /* seconds */ u64 m_max_file_size; - u64 m_max_xattr_size; /* maximum size for xattrs blob */ + /* + * maximum size for xattrs blob. + * Zeroed by default to force the usage of the (sync) SETXATTR Op. + */ + u64 m_max_xattr_size; u32 m_max_mds; /* expected up:active mds number */ u32 m_num_active_mds; /* actual up:active mds number */ u32 possible_max_rank; /* possible max rank index */ diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h new file mode 100644 index 000000000..53f1a7a93 --- /dev/null +++ b/include/linux/cleanup.h @@ -0,0 +1,171 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_GUARDS_H +#define __LINUX_GUARDS_H + +#include <linux/compiler.h> + +/* + * DEFINE_FREE(name, type, free): + * simple helper macro that defines the required wrapper for a __free() + * based cleanup function. @free is an expression using '_T' to access + * the variable. + * + * __free(name): + * variable attribute to add a scoped based cleanup to the variable. + * + * no_free_ptr(var): + * like a non-atomic xchg(var, NULL), such that the cleanup function will + * be inhibited -- provided it sanely deals with a NULL value. + * + * return_ptr(p): + * returns p while inhibiting the __free(). + * + * Ex. + * + * DEFINE_FREE(kfree, void *, if (_T) kfree(_T)) + * + * struct obj *p __free(kfree) = kmalloc(...); + * if (!p) + * return NULL; + * + * if (!init_obj(p)) + * return NULL; + * + * return_ptr(p); + */ + +#define DEFINE_FREE(_name, _type, _free) \ + static inline void __free_##_name(void *p) { _type _T = *(_type *)p; _free; } + +#define __free(_name) __cleanup(__free_##_name) + +#define no_free_ptr(p) \ + ({ __auto_type __ptr = (p); (p) = NULL; __ptr; }) + +#define return_ptr(p) return no_free_ptr(p) + + +/* + * DEFINE_CLASS(name, type, exit, init, init_args...): + * helper to define the destructor and constructor for a type. + * @exit is an expression using '_T' -- similar to FREE above. + * @init is an expression in @init_args resulting in @type + * + * EXTEND_CLASS(name, ext, init, init_args...): + * extends class @name to @name@ext with the new constructor + * + * CLASS(name, var)(args...): + * declare the variable @var as an instance of the named class + * + * Ex. + * + * DEFINE_CLASS(fdget, struct fd, fdput(_T), fdget(fd), int fd) + * + * CLASS(fdget, f)(fd); + * if (!f.file) + * return -EBADF; + * + * // use 'f' without concern + */ + +#define DEFINE_CLASS(_name, _type, _exit, _init, _init_args...) \ +typedef _type class_##_name##_t; \ +static inline void class_##_name##_destructor(_type *p) \ +{ _type _T = *p; _exit; } \ +static inline _type class_##_name##_constructor(_init_args) \ +{ _type t = _init; return t; } + +#define EXTEND_CLASS(_name, ext, _init, _init_args...) \ +typedef class_##_name##_t class_##_name##ext##_t; \ +static inline void class_##_name##ext##_destructor(class_##_name##_t *p)\ +{ class_##_name##_destructor(p); } \ +static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ +{ class_##_name##_t t = _init; return t; } + +#define CLASS(_name, var) \ + class_##_name##_t var __cleanup(class_##_name##_destructor) = \ + class_##_name##_constructor + + +/* + * DEFINE_GUARD(name, type, lock, unlock): + * trivial wrapper around DEFINE_CLASS() above specifically + * for locks. + * + * guard(name): + * an anonymous instance of the (guard) class + * + * scoped_guard (name, args...) { }: + * similar to CLASS(name, scope)(args), except the variable (with the + * explicit name 'scope') is declard in a for-loop such that its scope is + * bound to the next (compound) statement. + * + */ + +#define DEFINE_GUARD(_name, _type, _lock, _unlock) \ + DEFINE_CLASS(_name, _type, _unlock, ({ _lock; _T; }), _type _T) + +#define guard(_name) \ + CLASS(_name, __UNIQUE_ID(guard)) + +#define scoped_guard(_name, args...) \ + for (CLASS(_name, scope)(args), \ + *done = NULL; !done; done = (void *)1) + +/* + * Additional helper macros for generating lock guards with types, either for + * locks that don't have a native type (eg. RCU, preempt) or those that need a + * 'fat' pointer (eg. spin_lock_irqsave). + * + * DEFINE_LOCK_GUARD_0(name, lock, unlock, ...) + * DEFINE_LOCK_GUARD_1(name, type, lock, unlock, ...) + * + * will result in the following type: + * + * typedef struct { + * type *lock; // 'type := void' for the _0 variant + * __VA_ARGS__; + * } class_##name##_t; + * + * As above, both _lock and _unlock are statements, except this time '_T' will + * be a pointer to the above struct. + */ + +#define __DEFINE_UNLOCK_GUARD(_name, _type, _unlock, ...) \ +typedef struct { \ + _type *lock; \ + __VA_ARGS__; \ +} class_##_name##_t; \ + \ +static inline void class_##_name##_destructor(class_##_name##_t *_T) \ +{ \ + if (_T->lock) { _unlock; } \ +} + + +#define __DEFINE_LOCK_GUARD_1(_name, _type, _lock) \ +static inline class_##_name##_t class_##_name##_constructor(_type *l) \ +{ \ + class_##_name##_t _t = { .lock = l }, *_T = &_t; \ + _lock; \ + return _t; \ +} + +#define __DEFINE_LOCK_GUARD_0(_name, _lock) \ +static inline class_##_name##_t class_##_name##_constructor(void) \ +{ \ + class_##_name##_t _t = { .lock = (void*)1 }, \ + *_T __maybe_unused = &_t; \ + _lock; \ + return _t; \ +} + +#define DEFINE_LOCK_GUARD_1(_name, _type, _lock, _unlock, ...) \ +__DEFINE_UNLOCK_GUARD(_name, _type, _unlock, __VA_ARGS__) \ +__DEFINE_LOCK_GUARD_1(_name, _type, _lock) + +#define DEFINE_LOCK_GUARD_0(_name, _lock, _unlock, ...) \ +__DEFINE_UNLOCK_GUARD(_name, void, _unlock, __VA_ARGS__) \ +__DEFINE_LOCK_GUARD_0(_name, _lock) + +#endif /* __LINUX_GUARDS_H */ diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 6cfd6902b..9b673fefc 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -5,6 +5,15 @@ /* Compiler specific definitions for Clang compiler */ +/* + * Clang prior to 17 is being silly and considers many __cleanup() variables + * as unused (because they are, their sole purpose is to go out of scope). + * + * https://reviews.llvm.org/D152180 + */ +#undef __cleanup +#define __cleanup(func) __maybe_unused __attribute__((__cleanup__(func))) + /* same as gcc, this was present in clang-2.6 so we can assume it works * with any version that can compile the kernel */ diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index f55a37efd..149a52051 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -66,6 +66,26 @@ __builtin_unreachable(); \ } while (0) +/* + * GCC 'asm goto' with outputs miscompiles certain code sequences: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 + * + * Work around it via the same compiler barrier quirk that we used + * to use for the old 'asm goto' workaround. + * + * Also, always mark such 'asm goto' statements as volatile: all + * asm goto statements are supposed to be volatile as per the + * documentation, but some versions of gcc didn't actually do + * that for asms with outputs: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619 + */ +#ifdef CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND +#define asm_goto_output(x...) \ + do { asm volatile goto(x); asm (""); } while (0) +#endif + #if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) #define __HAVE_BUILTIN_BSWAP32__ #define __HAVE_BUILTIN_BSWAP64__ diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 898b3458b..ae4c9579c 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -76,6 +76,12 @@ #endif /* + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-cleanup-variable-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#cleanup + */ +#define __cleanup(func) __attribute__((__cleanup__(func))) + +/* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-cold-function-attribute * gcc: https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html#index-cold-label-attribute */ diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index eb0466236..574b4121e 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -284,8 +284,15 @@ struct ftrace_likely_data { # define __realloc_size(x, ...) #endif -#ifndef asm_volatile_goto -#define asm_volatile_goto(x...) asm goto(x) +/* + * Some versions of gcc do not mark 'asm goto' volatile: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103979 + * + * We do it here by hand, because it doesn't hurt. + */ +#ifndef asm_goto_output +#define asm_goto_output(x...) asm volatile goto(x) #endif #ifdef CONFIG_CC_HAS_ASM_INLINE diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 008bfa68c..4b06b1f1e 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -74,6 +74,8 @@ extern ssize_t cpu_show_spec_rstack_overflow(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/include/linux/decompress/mm.h b/include/linux/decompress/mm.h index 9192986b1..ac862422d 100644 --- a/include/linux/decompress/mm.h +++ b/include/linux/decompress/mm.h @@ -48,7 +48,7 @@ MALLOC_VISIBLE void *malloc(int size) if (!malloc_ptr) malloc_ptr = free_mem_ptr; - malloc_ptr = (malloc_ptr + 3) & ~3; /* Align */ + malloc_ptr = (malloc_ptr + 7) & ~7; /* Align */ p = (void *)malloc_ptr; malloc_ptr += size; diff --git a/include/linux/device.h b/include/linux/device.h index 7cf24330d..5520bb546 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -30,6 +30,7 @@ #include <linux/device/bus.h> #include <linux/device/class.h> #include <linux/device/driver.h> +#include <linux/cleanup.h> #include <asm/device.h> struct device; @@ -898,6 +899,9 @@ void device_unregister(struct device *dev); void device_initialize(struct device *dev); int __must_check device_add(struct device *dev); void device_del(struct device *dev); + +DEFINE_FREE(device_del, struct device *, if (_T) device_del(_T)) + int device_for_each_child(struct device *dev, void *data, int (*fn)(struct device *dev, void *data)); int device_for_each_child_reverse(struct device *dev, void *data, @@ -1071,6 +1075,9 @@ extern int (*platform_notify_remove)(struct device *dev); */ struct device *get_device(struct device *dev); void put_device(struct device *dev); + +DEFINE_FREE(put_device, struct device *, if (_T) put_device(_T)) + bool kill_device(struct device *dev); #ifdef CONFIG_DEVTMPFS diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index c923f4e60..3576c6e89 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -954,7 +954,8 @@ static inline int dmaengine_slave_config(struct dma_chan *chan, static inline bool is_slave_direction(enum dma_transfer_direction direction) { - return (direction == DMA_MEM_TO_DEV) || (direction == DMA_DEV_TO_MEM); + return (direction == DMA_MEM_TO_DEV) || (direction == DMA_DEV_TO_MEM) || + (direction == DMA_DEV_TO_DEV); } static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_single( diff --git a/include/linux/efi.h b/include/linux/efi.h index 4e1bfee96..de6d6558a 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -390,6 +390,7 @@ void efi_native_runtime_setup(void); #define EFI_RT_PROPERTIES_TABLE_GUID EFI_GUID(0xeb66918a, 0x7eef, 0x402a, 0x84, 0x2e, 0x93, 0x1d, 0x21, 0xc3, 0x8a, 0xe9) #define EFI_DXE_SERVICES_TABLE_GUID EFI_GUID(0x05ad34ba, 0x6f02, 0x4214, 0x95, 0x2e, 0x4d, 0xa0, 0x39, 0x8e, 0x2b, 0xb9) #define EFI_SMBIOS_PROTOCOL_GUID EFI_GUID(0x03583ff6, 0xcb36, 0x4940, 0x94, 0x7e, 0xb9, 0xb3, 0x9f, 0x4a, 0xfa, 0xf7) +#define EFI_MEMORY_ATTRIBUTE_PROTOCOL_GUID EFI_GUID(0xf4560cf6, 0x40ec, 0x4b4a, 0xa1, 0x92, 0xbf, 0x1d, 0x57, 0xd0, 0xb1, 0x89) #define EFI_IMAGE_SECURITY_DATABASE_GUID EFI_GUID(0xd719b2cb, 0x3d3a, 0x4596, 0xa3, 0xbc, 0xda, 0xd0, 0x0e, 0x67, 0x65, 0x6f) #define EFI_SHIM_LOCK_GUID EFI_GUID(0x605dab50, 0xe046, 0x4300, 0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23) diff --git a/include/linux/file.h b/include/linux/file.h index 39704eae8..6e9099d29 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -10,6 +10,7 @@ #include <linux/types.h> #include <linux/posix_types.h> #include <linux/errno.h> +#include <linux/cleanup.h> struct file; @@ -80,6 +81,8 @@ static inline void fdput_pos(struct fd f) fdput(f); } +DEFINE_CLASS(fd, struct fd, fdput(_T), fdget(fd), int fd) + extern int f_dupfd(unsigned int from, struct file *file, unsigned flags); extern int replace_fd(unsigned fd, struct file *file, unsigned flags); extern void set_close_on_exec(unsigned int fd, int flag); @@ -88,6 +91,9 @@ extern int __get_unused_fd_flags(unsigned flags, unsigned long nofile); extern int get_unused_fd_flags(unsigned flags); extern void put_unused_fd(unsigned int fd); +DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T), + get_unused_fd_flags(flags), unsigned flags) + extern void fd_install(unsigned int fd, struct file *file); extern int __receive_fd(struct file *file, int __user *ufd, diff --git a/include/linux/fs.h b/include/linux/fs.h index 4a1911dcf..092d8fa10 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -337,6 +337,8 @@ enum rw_hint { #define IOCB_NOIO (1 << 20) /* can use bio alloc cache */ #define IOCB_ALLOC_CACHE (1 << 21) +/* kiocb is a read or write operation submitted by fs/aio.c. */ +#define IOCB_AIO_RW (1 << 23) struct kiocb { struct file *ki_filp; @@ -1187,6 +1189,13 @@ extern void show_fd_locks(struct seq_file *f, struct file *filp, struct files_struct *files); extern bool locks_owner_has_blockers(struct file_lock_context *flctx, fl_owner_t owner); + +static inline struct file_lock_context * +locks_inode_context(const struct inode *inode) +{ + return smp_load_acquire(&inode->i_flctx); +} + #else /* !CONFIG_FILE_LOCKING */ static inline int fcntl_getlk(struct file *file, unsigned int cmd, struct flock __user *user) @@ -1332,6 +1341,13 @@ static inline bool locks_owner_has_blockers(struct file_lock_context *flctx, { return false; } + +static inline struct file_lock_context * +locks_inode_context(const struct inode *inode) +{ + return NULL; +} + #endif /* !CONFIG_FILE_LOCKING */ static inline struct inode *file_inode(const struct file *f) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index f2044d5a6..254d4a898 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -197,6 +197,7 @@ enum hrtimer_base_type { * @max_hang_time: Maximum time spent in hrtimer_interrupt * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are * expired + * @online: CPU is online from an hrtimers point of view * @timer_waiters: A hrtimer_cancel() invocation waits for the timer * callback to finish. * @expires_next: absolute time of the next event, is required for remote @@ -219,7 +220,8 @@ struct hrtimer_cpu_base { unsigned int hres_active : 1, in_hrtirq : 1, hang_detected : 1, - softirq_activated : 1; + softirq_activated : 1, + online : 1; #ifdef CONFIG_HIGH_RES_TIMERS unsigned int nr_events; unsigned short nr_retries; diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index 7852f6c9a..719cf9cc6 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -8,6 +8,8 @@ #ifndef __AD_SIGMA_DELTA_H__ #define __AD_SIGMA_DELTA_H__ +#include <linux/iio/iio.h> + enum ad_sigma_delta_mode { AD_SD_MODE_CONTINUOUS = 0, AD_SD_MODE_SINGLE = 1, @@ -99,7 +101,7 @@ struct ad_sigma_delta { * 'rx_buf' is up to 32 bits per sample + 64 bit timestamp, * rounded to 16 bytes to take into account padding. */ - uint8_t tx_buf[4] ____cacheline_aligned; + uint8_t tx_buf[4] __aligned(IIO_DMA_MINALIGN); uint8_t rx_buf[16] __aligned(8); }; diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index db4a1b260..c34e648f0 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -261,9 +261,9 @@ struct st_sensor_data { bool hw_irq_trigger; s64 hw_timestamp; - char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] ____cacheline_aligned; - struct mutex odr_lock; + + char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] __aligned(IIO_DMA_MINALIGN); }; #ifdef CONFIG_IIO_BUFFER diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index bcbefb757..af083aa0c 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -11,6 +11,7 @@ #include <linux/spi/spi.h> #include <linux/interrupt.h> +#include <linux/iio/iio.h> #include <linux/iio/types.h> #define ADIS_WRITE_REG(reg) ((0x80 | (reg))) @@ -131,7 +132,7 @@ struct adis { unsigned long irq_flag; void *buffer; - u8 tx[10] ____cacheline_aligned; + u8 tx[10] __aligned(IIO_DMA_MINALIGN); u8 rx[4]; }; diff --git a/include/linux/init.h b/include/linux/init.h index 077d7f93b..c96aea322 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -87,9 +87,6 @@ __latent_entropy #define __meminitdata __section(".meminit.data") #define __meminitconst __section(".meminit.rodata") -#define __memexit __section(".memexit.text") __exitused __cold notrace -#define __memexitdata __section(".memexit.data") -#define __memexitconst __section(".memexit.rodata") /* For assembly routines */ #define __HEAD .section ".head.text","ax" diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 8cd11a223..136f2980c 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -66,6 +66,9 @@ void irq_work_sync(struct irq_work *work); void irq_work_run(void); bool irq_work_needs_cpu(void); void irq_work_single(void *arg); + +void arch_irq_work_raise(void); + #else static inline bool irq_work_needs_cpu(void) { return false; } static inline void irq_work_run(void) { } diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 5ec0fa713..2b665c32f 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -13,6 +13,7 @@ #define _LINUX_TRACE_IRQFLAGS_H #include <linux/typecheck.h> +#include <linux/cleanup.h> #include <asm/irqflags.h> #include <asm/percpu.h> @@ -267,4 +268,10 @@ extern void warn_bogus_irq_restore(void); #define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags) +DEFINE_LOCK_GUARD_0(irq, local_irq_disable(), local_irq_enable()) +DEFINE_LOCK_GUARD_0(irqsave, + local_irq_save(_T->flags), + local_irq_restore(_T->flags), + unsigned long flags) + #endif diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 50ad19662..6790f0806 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -118,6 +118,8 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size); int memblock_physmem_add(phys_addr_t base, phys_addr_t size); #endif void memblock_trim_memory(phys_addr_t align); +unsigned long memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, + phys_addr_t base2, phys_addr_t size2); bool memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size); int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size); diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 74f9d9a6d..0e4ef9c51 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -102,6 +102,7 @@ #define SDIO_DEVICE_ID_MARVELL_8977_BT 0x9146 #define SDIO_DEVICE_ID_MARVELL_8987_WLAN 0x9149 #define SDIO_DEVICE_ID_MARVELL_8987_BT 0x914a +#define SDIO_DEVICE_ID_MARVELL_8978_WLAN 0x9159 #define SDIO_VENDOR_ID_MEDIATEK 0x037a #define SDIO_DEVICE_ID_MEDIATEK_MT7663 0x7663 diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 638d2124d..3b9f4d7c4 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1854,9 +1854,9 @@ static inline int pfn_valid(unsigned long pfn) if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; ms = __pfn_to_section(pfn); - rcu_read_lock(); + rcu_read_lock_sched(); if (!valid_section(ms)) { - rcu_read_unlock(); + rcu_read_unlock_sched(); return 0; } /* @@ -1864,7 +1864,7 @@ static inline int pfn_valid(unsigned long pfn) * the entire section-sized span. */ ret = early_section(ms) || pfn_section_valid(ms, pfn); - rcu_read_unlock(); + rcu_read_unlock_sched(); return ret; } diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 8f226d460..a33aa9eb9 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -19,6 +19,7 @@ #include <asm/processor.h> #include <linux/osq_lock.h> #include <linux/debug_locks.h> +#include <linux/cleanup.h> #ifdef CONFIG_DEBUG_LOCK_ALLOC # define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ @@ -219,4 +220,7 @@ extern void mutex_unlock(struct mutex *lock); extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); +DEFINE_GUARD(mutex, struct mutex *, mutex_lock(_T), mutex_unlock(_T)) +DEFINE_FREE(mutex, struct mutex *, if (_T) mutex_unlock(_T)) + #endif /* __LINUX_MUTEX_H */ diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index bef8db9d6..e5f4b6f8d 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -437,11 +437,13 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) #include <linux/netfilter/nf_conntrack_zones_common.h> void nf_ct_attach(struct sk_buff *, const struct sk_buff *); +void nf_ct_set_closing(struct nf_conntrack *nfct); struct nf_conntrack_tuple; bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, const struct sk_buff *skb); #else static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} +static inline void nf_ct_set_closing(struct nf_conntrack *nfct) {} struct nf_conntrack_tuple; static inline bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, const struct sk_buff *skb) @@ -459,6 +461,8 @@ struct nf_ct_hook { bool (*get_tuple_skb)(struct nf_conntrack_tuple *, const struct sk_buff *); void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb); + void (*set_closing)(struct nf_conntrack *nfct); + int (*confirm)(struct sk_buff *skb); }; extern const struct nf_ct_hook __rcu *nf_ct_hook; diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 72f5ebc5c..0b217d4ae 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -186,6 +186,8 @@ struct ip_set_type_variant { /* Return true if "b" set is the same as "a" * according to the create set parameters */ bool (*same_set)(const struct ip_set *a, const struct ip_set *b); + /* Cancel ongoing garbage collectors before destroying the set*/ + void (*cancel_gc)(struct ip_set *set); /* Region-locking is used */ bool region_lock; }; @@ -242,6 +244,8 @@ extern void ip_set_type_unregister(struct ip_set_type *set_type); /* A generic IP set */ struct ip_set { + /* For call_cru in destroy */ + struct rcu_head rcu; /* The name of the set */ char name[IPSET_MAXNAMELEN]; /* Lock protecting the set data */ diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 8d04b6a59..730003c4f 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -732,4 +732,17 @@ enum nfs4_setxattr_options { SETXATTR4_CREATE = 1, SETXATTR4_REPLACE = 2, }; + +enum { + RCA4_TYPE_MASK_RDATA_DLG = 0, + RCA4_TYPE_MASK_WDATA_DLG = 1, + RCA4_TYPE_MASK_DIR_DLG = 2, + RCA4_TYPE_MASK_FILE_LAYOUT = 3, + RCA4_TYPE_MASK_BLK_LAYOUT = 4, + RCA4_TYPE_MASK_OBJ_LAYOUT_MIN = 8, + RCA4_TYPE_MASK_OBJ_LAYOUT_MAX = 9, + RCA4_TYPE_MASK_OTHER_LAYOUT_MIN = 12, + RCA4_TYPE_MASK_OTHER_LAYOUT_MAX = 15, +}; + #endif diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index b76ff0850..73cc1e7dd 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -3000,6 +3000,7 @@ #define PCI_DEVICE_ID_INTEL_82443GX_0 0x71a0 #define PCI_DEVICE_ID_INTEL_82443GX_2 0x71a2 #define PCI_DEVICE_ID_INTEL_82372FB_1 0x7601 +#define PCI_DEVICE_ID_INTEL_HDA_ARL 0x7728 #define PCI_DEVICE_ID_INTEL_SCH_LPC 0x8119 #define PCI_DEVICE_ID_INTEL_SCH_IDE 0x811a #define PCI_DEVICE_ID_INTEL_E6XX_CU 0x8183 diff --git a/include/linux/percpu.h b/include/linux/percpu.h index f1ec5ad13..ba00a4936 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -8,6 +8,7 @@ #include <linux/cpumask.h> #include <linux/pfn.h> #include <linux/init.h> +#include <linux/cleanup.h> #include <asm/percpu.h> @@ -128,6 +129,9 @@ extern void __init setup_per_cpu_areas(void); extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) __alloc_size(1); extern void __percpu *__alloc_percpu(size_t size, size_t align) __alloc_size(1); extern void free_percpu(void __percpu *__pdata); + +DEFINE_FREE(free_percpu, void __percpu *, free_percpu(_T)) + extern phys_addr_t per_cpu_ptr_to_phys(void *addr); #define alloc_percpu_gfp(type, gfp) \ diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 8cfcc5d45..9aa6358a1 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -8,6 +8,7 @@ */ #include <linux/linkage.h> +#include <linux/cleanup.h> #include <linux/list.h> /* @@ -474,4 +475,8 @@ static __always_inline void preempt_enable_nested(void) preempt_enable(); } +DEFINE_LOCK_GUARD_0(preempt, preempt_disable(), preempt_enable()) +DEFINE_LOCK_GUARD_0(preempt_notrace, preempt_disable_notrace(), preempt_enable_notrace()) +DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable()) + #endif /* __LINUX_PREEMPT_H */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 46bd9a331..d2507168b 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -27,6 +27,7 @@ #include <linux/preempt.h> #include <linux/bottom_half.h> #include <linux/lockdep.h> +#include <linux/cleanup.h> #include <asm/processor.h> #include <linux/cpumask.h> #include <linux/context_tracking_irq.h> @@ -1077,4 +1078,6 @@ rcu_head_after_call_rcu(struct rcu_head *rhp, rcu_callback_t f) extern int rcu_expedited; extern int rcu_normal; +DEFINE_LOCK_GUARD_0(rcu, rcu_read_lock(), rcu_read_unlock()) + #endif /* __LINUX_RCUPDATE_H */ diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index efa5c3243..1dd530ce8 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -15,6 +15,7 @@ #include <linux/spinlock.h> #include <linux/atomic.h> #include <linux/err.h> +#include <linux/cleanup.h> #ifdef CONFIG_DEBUG_LOCK_ALLOC # define __RWSEM_DEP_MAP_INIT(lockname) \ @@ -201,6 +202,13 @@ extern void up_read(struct rw_semaphore *sem); */ extern void up_write(struct rw_semaphore *sem); +DEFINE_GUARD(rwsem_read, struct rw_semaphore *, down_read(_T), up_read(_T)) +DEFINE_GUARD(rwsem_write, struct rw_semaphore *, down_write(_T), up_write(_T)) + +DEFINE_FREE(up_read, struct rw_semaphore *, if (_T) up_read(_T)) +DEFINE_FREE(up_write, struct rw_semaphore *, if (_T) up_write(_T)) + + /* * downgrade write lock to read lock */ diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 7291fb639..aaa25ed1a 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -145,6 +145,8 @@ static inline void put_task_struct(struct task_struct *t) __put_task_struct(t); } +DEFINE_FREE(put_task, struct task_struct *, if (_T) put_task_struct(_T)) + static inline void put_task_struct_many(struct task_struct *t, int nr) { if (refcount_sub_and_test(nr, &t->usage)) diff --git a/include/linux/slab.h b/include/linux/slab.h index 45efc6c55..cb4b5deca 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -17,6 +17,7 @@ #include <linux/types.h> #include <linux/workqueue.h> #include <linux/percpu-refcount.h> +#include <linux/cleanup.h> /* @@ -197,6 +198,8 @@ void kfree(const void *objp); void kfree_sensitive(const void *objp); size_t __ksize(const void *objp); +DEFINE_FREE(kfree, void *, if (_T) kfree(_T)) + /** * ksize - Report actual allocation size of associated object * diff --git a/include/linux/socket.h b/include/linux/socket.h index b3c58042b..d79efd026 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -33,7 +33,10 @@ typedef __kernel_sa_family_t sa_family_t; struct sockaddr { sa_family_t sa_family; /* address family, AF_xxx */ - char sa_data[14]; /* 14 bytes of protocol address */ + union { + char sa_data_min[14]; /* Minimum 14 bytes of protocol address */ + DECLARE_FLEX_ARRAY(char, sa_data); + }; }; struct linger { diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 1341f7d62..83377540c 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -61,6 +61,7 @@ #include <linux/stringify.h> #include <linux/bottom_half.h> #include <linux/lockdep.h> +#include <linux/cleanup.h> #include <asm/barrier.h> #include <asm/mmiowb.h> @@ -493,5 +494,35 @@ int __alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask, void free_bucket_spinlocks(spinlock_t *locks); +DEFINE_LOCK_GUARD_1(raw_spinlock, raw_spinlock_t, + raw_spin_lock(_T->lock), + raw_spin_unlock(_T->lock)) + +DEFINE_LOCK_GUARD_1(raw_spinlock_nested, raw_spinlock_t, + raw_spin_lock_nested(_T->lock, SINGLE_DEPTH_NESTING), + raw_spin_unlock(_T->lock)) + +DEFINE_LOCK_GUARD_1(raw_spinlock_irq, raw_spinlock_t, + raw_spin_lock_irq(_T->lock), + raw_spin_unlock_irq(_T->lock)) + +DEFINE_LOCK_GUARD_1(raw_spinlock_irqsave, raw_spinlock_t, + raw_spin_lock_irqsave(_T->lock, _T->flags), + raw_spin_unlock_irqrestore(_T->lock, _T->flags), + unsigned long flags) + +DEFINE_LOCK_GUARD_1(spinlock, spinlock_t, + spin_lock(_T->lock), + spin_unlock(_T->lock)) + +DEFINE_LOCK_GUARD_1(spinlock_irq, spinlock_t, + spin_lock_irq(_T->lock), + spin_unlock_irq(_T->lock)) + +DEFINE_LOCK_GUARD_1(spinlock_irqsave, spinlock_t, + spin_lock_irqsave(_T->lock, _T->flags), + spin_unlock_irqrestore(_T->lock, _T->flags), + unsigned long flags) + #undef __LINUX_INSIDE_SPINLOCK_H #endif /* __LINUX_SPINLOCK_H */ diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 01226e4d9..f9e1fa7ff 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -212,4 +212,9 @@ static inline void smp_mb__after_srcu_read_unlock(void) /* __srcu_read_unlock has smp_mb() internally so nothing to do here. */ } +DEFINE_LOCK_GUARD_1(srcu, struct srcu_struct, + _T->idx = srcu_read_lock(_T->lock), + srcu_read_unlock(_T->lock, _T->idx), + int idx) + #endif diff --git a/include/linux/swap.h b/include/linux/swap.h index a18cf4b7c..add47f43e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -571,6 +571,11 @@ static inline int swap_duplicate(swp_entry_t swp) return 0; } +static inline int swapcache_prepare(swp_entry_t swp) +{ + return 0; +} + static inline void swap_free(swp_entry_t swp) { } diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 43ac3fa76..9783b9107 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -412,6 +412,8 @@ extern int composite_dev_prepare(struct usb_composite_driver *composite, extern int composite_os_desc_req_prepare(struct usb_composite_dev *cdev, struct usb_ep *ep0); void composite_dev_cleanup(struct usb_composite_dev *cdev); +void check_remote_wakeup_config(struct usb_gadget *g, + struct usb_configuration *c); static inline struct usb_composite_driver *to_cdriver( struct usb_gadget_driver *gdrv) diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index dc3092cea..5bec668b4 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -309,6 +309,7 @@ struct usb_udc; struct usb_gadget_ops { int (*get_frame)(struct usb_gadget *); int (*wakeup)(struct usb_gadget *); + int (*set_remote_wakeup)(struct usb_gadget *, int set); int (*set_selfpowered) (struct usb_gadget *, int is_selfpowered); int (*vbus_session) (struct usb_gadget *, int is_active); int (*vbus_draw) (struct usb_gadget *, unsigned mA); @@ -383,6 +384,8 @@ struct usb_gadget_ops { * @connected: True if gadget is connected. * @lpm_capable: If the gadget max_speed is FULL or HIGH, this flag * indicates that it supports LPM as per the LPM ECN & errata. + * @wakeup_capable: True if gadget is capable of sending remote wakeup. + * @wakeup_armed: True if gadget is armed by the host for remote wakeup. * @irq: the interrupt number for device controller. * @id_number: a unique ID number for ensuring that gadget names are distinct * @@ -444,6 +447,8 @@ struct usb_gadget { unsigned deactivated:1; unsigned connected:1; unsigned lpm_capable:1; + unsigned wakeup_capable:1; + unsigned wakeup_armed:1; int irq; int id_number; }; @@ -600,6 +605,7 @@ static inline int gadget_is_otg(struct usb_gadget *g) #if IS_ENABLED(CONFIG_USB_GADGET) int usb_gadget_frame_number(struct usb_gadget *gadget); int usb_gadget_wakeup(struct usb_gadget *gadget); +int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set); int usb_gadget_set_selfpowered(struct usb_gadget *gadget); int usb_gadget_clear_selfpowered(struct usb_gadget *gadget); int usb_gadget_vbus_connect(struct usb_gadget *gadget); @@ -615,6 +621,8 @@ static inline int usb_gadget_frame_number(struct usb_gadget *gadget) { return 0; } static inline int usb_gadget_wakeup(struct usb_gadget *gadget) { return 0; } +static inline int usb_gadget_set_remote_wakeup(struct usb_gadget *gadget, int set) +{ return 0; } static inline int usb_gadget_set_selfpowered(struct usb_gadget *gadget) { return 0; } static inline int usb_gadget_clear_selfpowered(struct usb_gadget *gadget) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 55ca217c6..0920b669b 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -45,12 +45,6 @@ struct scm_stat { #define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) -#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) -#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) -#define unix_state_lock_nested(s) \ - spin_lock_nested(&unix_sk(s)->lock, \ - SINGLE_DEPTH_NESTING) - /* The AF_UNIX socket */ struct unix_sock { /* WARNING: sk has to be the first member */ @@ -79,6 +73,20 @@ static inline struct unix_sock *unix_sk(const struct sock *sk) } #define unix_peer(sk) (unix_sk(sk)->peer) +#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) +#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) +enum unix_socket_lock_class { + U_LOCK_NORMAL, + U_LOCK_SECOND, /* for double locking, see unix_state_double_lock(). */ + U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */ +}; + +static inline void unix_state_lock_nested(struct sock *sk, + enum unix_socket_lock_class subclass) +{ + spin_lock_nested(&unix_sk(sk)->lock, subclass); +} + #define peer_wait peer_wq.wait long unix_inq_len(struct sock *sk); diff --git a/include/net/ip.h b/include/net/ip.h index c83c09c65..4f11f7df7 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -752,7 +752,7 @@ int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev); * Functions provided by ip_sockglue.c */ -void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb); +void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb, bool drop_dst); void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, int tlen, int offset); int ip_cmsg_send(struct sock *sk, struct msghdr *msg, diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index c48186bf4..21da31e1d 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -85,6 +85,11 @@ struct ipv6_bpf_stub { sockptr_t optval, unsigned int optlen); int (*ipv6_getsockopt)(struct sock *sk, int level, int optname, sockptr_t optval, sockptr_t optlen); + int (*ipv6_dev_get_saddr)(struct net *net, + const struct net_device *dst_dev, + const struct in6_addr *daddr, + unsigned int prefs, + struct in6_addr *saddr); }; extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; diff --git a/include/net/mctp.h b/include/net/mctp.h index 82800d521..7ed84054f 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -249,6 +249,7 @@ struct mctp_route { struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, mctp_eid_t daddr); +/* always takes ownership of skb */ int mctp_local_output(struct sock *sk, struct mctp_route *rt, struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag); diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 6a2019aaa..3dbf94728 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -126,6 +126,12 @@ struct nf_conn { }; static inline struct nf_conn * +nf_ct_to_nf_conn(const struct nf_conntrack *nfct) +{ + return container_of(nfct, struct nf_conn, ct_general); +} + +static inline struct nf_conn * nf_ct_tuplehash_to_ctrack(const struct nf_conntrack_tuple_hash *hash) { return container_of(hash, struct nf_conn, @@ -175,6 +181,8 @@ nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) void nf_ct_destroy(struct nf_conntrack *nfct); +void nf_conntrack_tcp_set_closing(struct nf_conn *ct); + /* decrement reference count on a conntrack */ static inline void nf_ct_put(struct nf_conn *ct) { diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index dde4dd9c4..4a767b3d2 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -274,8 +274,8 @@ nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, flow_table->type->put(flow_table); } -int flow_offload_route_init(struct flow_offload *flow, - const struct nf_flow_route *route); +void flow_offload_route_init(struct flow_offload *flow, + struct nf_flow_route *route); int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow); void flow_offload_refresh(struct nf_flowtable *flow_table, diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index c726da3b7..2fa344cb6 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1286,6 +1286,7 @@ void nft_obj_notify(struct net *net, const struct nft_table *table, * @type: stateful object numeric type * @owner: module owner * @maxattr: maximum netlink attribute + * @family: address family for AF-specific object types * @policy: netlink attribute policy */ struct nft_object_type { @@ -1295,6 +1296,7 @@ struct nft_object_type { struct list_head list; u32 type; unsigned int maxattr; + u8 family; struct module *owner; const struct nla_policy *policy; }; diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 7dcdc97c0..a3d8f013a 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -303,6 +303,9 @@ void switchdev_deferred_process(void); int switchdev_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr, struct netlink_ext_ack *extack); +bool switchdev_port_obj_act_is_deferred(struct net_device *dev, + enum switchdev_notifier_type nt, + const struct switchdev_obj *obj); int switchdev_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, struct netlink_ext_ack *extack); diff --git a/include/net/tcp.h b/include/net/tcp.h index 4c838f729..8ea1fba84 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2290,7 +2290,7 @@ struct tcp_ulp_ops { /* cleanup ulp */ void (*release)(struct sock *sk); /* diagnostic */ - int (*get_info)(const struct sock *sk, struct sk_buff *skb); + int (*get_info)(struct sock *sk, struct sk_buff *skb); size_t (*get_info_size)(const struct sock *sk); /* clone ulp */ void (*clone)(const struct request_sock *req, struct sock *newsk, diff --git a/include/net/tls.h b/include/net/tls.h index c36bf4c50..899c863ab 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -108,9 +108,6 @@ struct tls_sw_context_tx { struct tls_rec *open_rec; struct list_head tx_list; atomic_t encrypt_pending; - /* protect crypto_wait with encrypt_pending */ - spinlock_t encrypt_compl_lock; - int async_notify; u8 async_capable:1; #define BIT_TX_SCHEDULED 0 @@ -147,8 +144,6 @@ struct tls_sw_context_rx { struct tls_strparser strp; atomic_t decrypt_pending; - /* protect crypto_wait with decrypt_pending*/ - spinlock_t decrypt_compl_lock; struct sk_buff_head async_hold; struct wait_queue_head wq; }; diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index fdc31fdb6..a64713fe5 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -100,10 +100,6 @@ struct scsi_vpd { unsigned char data[]; }; -enum scsi_vpd_parameters { - SCSI_VPD_HEADER_SIZE = 4, -}; - struct scsi_device { struct Scsi_Host *host; struct request_queue *request_queue; @@ -208,6 +204,7 @@ struct scsi_device { unsigned use_10_for_rw:1; /* first try 10-byte read / write */ unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */ unsigned set_dbd_for_ms:1; /* Set "DBD" field in mode sense */ + unsigned read_before_ms:1; /* perform a READ before MODE SENSE */ unsigned no_report_opcodes:1; /* no REPORT SUPPORTED OPERATION CODES */ unsigned no_write_same:1; /* no WRITE SAME command */ unsigned use_16_for_rw:1; /* Use read/write(16) over read/write(10) */ @@ -483,28 +480,51 @@ extern const char *scsi_device_state_name(enum scsi_device_state); extern int scsi_is_sdev_device(const struct device *); extern int scsi_is_target_device(const struct device *); extern void scsi_sanitize_inquiry_string(unsigned char *s, int len); -extern int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, - int data_direction, void *buffer, unsigned bufflen, - unsigned char *sense, struct scsi_sense_hdr *sshdr, - int timeout, int retries, blk_opf_t flags, - req_flags_t rq_flags, int *resid); + +/* Optional arguments to scsi_execute_cmd */ +struct scsi_exec_args { + unsigned char *sense; /* sense buffer */ + unsigned int sense_len; /* sense buffer len */ + struct scsi_sense_hdr *sshdr; /* decoded sense header */ + blk_mq_req_flags_t req_flags; /* BLK_MQ_REQ flags */ + int *resid; /* residual length */ +}; + +int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, + blk_opf_t opf, void *buffer, unsigned int bufflen, + int timeout, int retries, + const struct scsi_exec_args *args); + /* Make sure any sense buffer is the correct size. */ -#define scsi_execute(sdev, cmd, data_direction, buffer, bufflen, sense, \ - sshdr, timeout, retries, flags, rq_flags, resid) \ +#define scsi_execute(_sdev, _cmd, _data_dir, _buffer, _bufflen, _sense, \ + _sshdr, _timeout, _retries, _flags, _rq_flags, \ + _resid) \ ({ \ - BUILD_BUG_ON((sense) != NULL && \ - sizeof(sense) != SCSI_SENSE_BUFFERSIZE); \ - __scsi_execute(sdev, cmd, data_direction, buffer, bufflen, \ - sense, sshdr, timeout, retries, flags, rq_flags, \ - resid); \ + scsi_execute_cmd(_sdev, _cmd, (_data_dir == DMA_TO_DEVICE ? \ + REQ_OP_DRV_OUT : REQ_OP_DRV_IN) | _flags, \ + _buffer, _bufflen, _timeout, _retries, \ + &(struct scsi_exec_args) { \ + .sense = _sense, \ + .sshdr = _sshdr, \ + .req_flags = _rq_flags & RQF_PM ? \ + BLK_MQ_REQ_PM : 0, \ + .resid = _resid, \ + }); \ }) + static inline int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd, int data_direction, void *buffer, unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout, int retries, int *resid) { - return scsi_execute(sdev, cmd, data_direction, buffer, - bufflen, NULL, sshdr, timeout, retries, 0, 0, resid); + return scsi_execute_cmd(sdev, cmd, + data_direction == DMA_TO_DEVICE ? + REQ_OP_DRV_OUT : REQ_OP_DRV_IN, buffer, + bufflen, timeout, retries, + &(struct scsi_exec_args) { + .sshdr = sshdr, + .resid = resid, + }); } extern void sdev_disable_disk_events(struct scsi_device *sdev); extern void sdev_enable_disk_events(struct scsi_device *sdev); diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h index a3995925c..1f4258308 100644 --- a/include/trace/events/qdisc.h +++ b/include/trace/events/qdisc.h @@ -81,14 +81,14 @@ TRACE_EVENT(qdisc_reset, TP_ARGS(q), TP_STRUCT__entry( - __string( dev, qdisc_dev(q) ) - __string( kind, q->ops->id ) - __field( u32, parent ) - __field( u32, handle ) + __string( dev, qdisc_dev(q)->name ) + __string( kind, q->ops->id ) + __field( u32, parent ) + __field( u32, handle ) ), TP_fast_assign( - __assign_str(dev, qdisc_dev(q)); + __assign_str(dev, qdisc_dev(q)->name); __assign_str(kind, q->ops->id); __entry->parent = q->parent; __entry->handle = q->handle; @@ -106,14 +106,14 @@ TRACE_EVENT(qdisc_destroy, TP_ARGS(q), TP_STRUCT__entry( - __string( dev, qdisc_dev(q) ) - __string( kind, q->ops->id ) - __field( u32, parent ) - __field( u32, handle ) + __string( dev, qdisc_dev(q)->name ) + __string( kind, q->ops->id ) + __field( u32, parent ) + __field( u32, handle ) ), TP_fast_assign( - __assign_str(dev, qdisc_dev(q)); + __assign_str(dev, qdisc_dev(q)->name); __assign_str(kind, q->ops->id); __entry->parent = q->parent; __entry->handle = q->handle; diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h index c9048f3e4..3f121eed3 100644 --- a/include/trace/events/rpcgss.h +++ b/include/trace/events/rpcgss.h @@ -13,7 +13,7 @@ #include <linux/tracepoint.h> -#include <trace/events/sunrpc_base.h> +#include <trace/misc/sunrpc.h> /** ** GSS-API related trace events diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index fcd3b3f10..8f461e04e 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -15,8 +15,8 @@ #include <linux/tracepoint.h> #include <rdma/ib_cm.h> -#include <trace/events/rdma.h> -#include <trace/events/sunrpc_base.h> +#include <trace/misc/rdma.h> +#include <trace/misc/sunrpc.h> /** ** Event classes diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index f48f2ab9d..ffe2679a1 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -14,7 +14,7 @@ #include <linux/net.h> #include <linux/tracepoint.h> -#include <trace/events/sunrpc_base.h> +#include <trace/misc/sunrpc.h> TRACE_DEFINE_ENUM(SOCK_STREAM); TRACE_DEFINE_ENUM(SOCK_DGRAM); diff --git a/include/trace/events/fs.h b/include/trace/misc/fs.h index 738b97f22..738b97f22 100644 --- a/include/trace/events/fs.h +++ b/include/trace/misc/fs.h diff --git a/include/trace/events/nfs.h b/include/trace/misc/nfs.h index 09ffdbb04..0d9d48dca 100644 --- a/include/trace/events/nfs.h +++ b/include/trace/misc/nfs.h @@ -360,6 +360,18 @@ TRACE_DEFINE_ENUM(IOMODE_ANY); { IOMODE_RW, "RW" }, \ { IOMODE_ANY, "ANY" }) +#define show_rca_mask(x) \ + __print_flags(x, "|", \ + { BIT(RCA4_TYPE_MASK_RDATA_DLG), "RDATA_DLG" }, \ + { BIT(RCA4_TYPE_MASK_WDATA_DLG), "WDATA_DLG" }, \ + { BIT(RCA4_TYPE_MASK_DIR_DLG), "DIR_DLG" }, \ + { BIT(RCA4_TYPE_MASK_FILE_LAYOUT), "FILE_LAYOUT" }, \ + { BIT(RCA4_TYPE_MASK_BLK_LAYOUT), "BLK_LAYOUT" }, \ + { BIT(RCA4_TYPE_MASK_OBJ_LAYOUT_MIN), "OBJ_LAYOUT_MIN" }, \ + { BIT(RCA4_TYPE_MASK_OBJ_LAYOUT_MAX), "OBJ_LAYOUT_MAX" }, \ + { BIT(RCA4_TYPE_MASK_OTHER_LAYOUT_MIN), "OTHER_LAYOUT_MIN" }, \ + { BIT(RCA4_TYPE_MASK_OTHER_LAYOUT_MAX), "OTHER_LAYOUT_MAX" }) + #define show_nfs4_seq4_status(x) \ __print_flags(x, "|", \ { SEQ4_STATUS_CB_PATH_DOWN, "CB_PATH_DOWN" }, \ diff --git a/include/trace/events/rdma.h b/include/trace/misc/rdma.h index 81bb454fc..81bb454fc 100644 --- a/include/trace/events/rdma.h +++ b/include/trace/misc/rdma.h diff --git a/include/trace/events/sunrpc_base.h b/include/trace/misc/sunrpc.h index 588557d07..588557d07 100644 --- a/include/trace/events/sunrpc_base.h +++ b/include/trace/misc/sunrpc.h diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 201dc77eb..d5d218373 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3109,6 +3109,10 @@ union bpf_attr { * **BPF_FIB_LOOKUP_DIRECT** * Do a direct table lookup vs full lookup using FIB * rules. + * **BPF_FIB_LOOKUP_TBID** + * Used with BPF_FIB_LOOKUP_DIRECT. + * Use the routing table ID present in *params*->tbid + * for the fib lookup. * **BPF_FIB_LOOKUP_OUTPUT** * Perform lookup from an egress perspective (default is * ingress). @@ -3117,6 +3121,11 @@ union bpf_attr { * and *params*->smac will not be set as output. A common * use case is to call **bpf_redirect_neigh**\ () after * doing **bpf_fib_lookup**\ (). + * **BPF_FIB_LOOKUP_SRC** + * Derive and set source IP addr in *params*->ipv{4,6}_src + * for the nexthop. If the src addr cannot be derived, + * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this + * case, *params*->dmac and *params*->smac are not set either. * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. @@ -6687,6 +6696,8 @@ enum { BPF_FIB_LOOKUP_DIRECT = (1U << 0), BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), + BPF_FIB_LOOKUP_TBID = (1U << 3), + BPF_FIB_LOOKUP_SRC = (1U << 4), }; enum { @@ -6699,6 +6710,7 @@ enum { BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ + BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */ }; struct bpf_fib_lookup { @@ -6733,6 +6745,9 @@ struct bpf_fib_lookup { __u32 rt_metric; }; + /* input: source address to consider for lookup + * output: source address result from lookup + */ union { __be32 ipv4_src; __u32 ipv6_src[4]; /* in6_addr; network order */ @@ -6747,9 +6762,19 @@ struct bpf_fib_lookup { __u32 ipv6_dst[4]; /* in6_addr; network order */ }; - /* output */ - __be16 h_vlan_proto; - __be16 h_vlan_TCI; + union { + struct { + /* output */ + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + }; + /* input: when accompanied with the + * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a + * specific routing table to use for the fib lookup. + */ + __u32 tbid; + }; + __u8 smac[6]; /* ETH_ALEN */ __u8 dmac[6]; /* ETH_ALEN */ }; diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index c4c53a9ab..ff8d21f9e 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -145,7 +145,7 @@ struct in6_flowlabel_req { #define IPV6_TLV_PADN 1 #define IPV6_TLV_ROUTERALERT 5 #define IPV6_TLV_CALIPSO 7 /* RFC 5570 */ -#define IPV6_TLV_IOAM 49 /* TEMPORARY IANA allocation for IOAM */ +#define IPV6_TLV_IOAM 49 /* RFC 9486 */ #define IPV6_TLV_JUMBO 194 #define IPV6_TLV_HAO 201 /* home address option */ diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index af8f4c304..707af820f 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -266,9 +266,11 @@ enum nft_rule_attributes { /** * enum nft_rule_compat_flags - nf_tables rule compat flags * + * @NFT_RULE_COMPAT_F_UNUSED: unused * @NFT_RULE_COMPAT_F_INV: invert the check result */ enum nft_rule_compat_flags { + NFT_RULE_COMPAT_F_UNUSED = (1 << 0), NFT_RULE_COMPAT_F_INV = (1 << 1), NFT_RULE_COMPAT_F_MASK = NFT_RULE_COMPAT_F_INV, }; diff --git a/init/Kconfig b/init/Kconfig index 148704640..ffb927bf6 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -89,6 +89,15 @@ config CC_HAS_ASM_GOTO_TIED_OUTPUT # Detect buggy gcc and clang, fixed in gcc-11 clang-14. def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null) +config GCC_ASM_GOTO_OUTPUT_WORKAROUND + bool + depends on CC_IS_GCC && CC_HAS_ASM_GOTO_OUTPUT + # Fixed in GCC 14, 13.3, 12.4 and 11.5 + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 + default y if GCC_VERSION < 110500 + default y if GCC_VERSION >= 120000 && GCC_VERSION < 120400 + default y if GCC_VERSION >= 130000 && GCC_VERSION < 130300 + config TOOLS_SUPPORT_RELR def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh) diff --git a/io_uring/net.c b/io_uring/net.c index 67f09a40b..c062ce66a 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -875,6 +875,7 @@ retry_multishot: if (!buf) return -ENOBUFS; sr->buf = buf; + sr->len = len; } ret = import_single_range(ITER_DEST, sr->buf, len, &iov, &msg.msg_iter); @@ -1325,7 +1326,7 @@ retry: * has already been done */ if (issue_flags & IO_URING_F_MULTISHOT) - ret = IOU_ISSUE_SKIP_COMPLETE; + return IOU_ISSUE_SKIP_COMPLETE; return ret; } if (ret == -ERESTARTSYS) @@ -1349,7 +1350,8 @@ retry: if (io_post_aux_cqe(ctx, req->cqe.user_data, ret, IORING_CQE_F_MORE, false)) goto retry; - return -ECANCELED; + io_req_set_res(req, ret, 0); + return IOU_STOP_MULTISHOT; } int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) diff --git a/kernel/audit.c b/kernel/audit.c index 9bc0b0301..99127521c 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -488,15 +488,19 @@ static void auditd_conn_free(struct rcu_head *rcu) * @pid: auditd PID * @portid: auditd netlink portid * @net: auditd network namespace pointer + * @skb: the netlink command from the audit daemon + * @ack: netlink ack flag, cleared if ack'd here * * Description: * This function will obtain and drop network namespace references as * necessary. Returns zero on success, negative values on failure. */ -static int auditd_set(struct pid *pid, u32 portid, struct net *net) +static int auditd_set(struct pid *pid, u32 portid, struct net *net, + struct sk_buff *skb, bool *ack) { unsigned long flags; struct auditd_connection *ac_old, *ac_new; + struct nlmsghdr *nlh; if (!pid || !net) return -EINVAL; @@ -508,6 +512,13 @@ static int auditd_set(struct pid *pid, u32 portid, struct net *net) ac_new->portid = portid; ac_new->net = get_net(net); + /* send the ack now to avoid a race with the queue backlog */ + if (*ack) { + nlh = nlmsg_hdr(skb); + netlink_ack(skb, nlh, 0, NULL); + *ack = false; + } + spin_lock_irqsave(&auditd_conn_lock, flags); ac_old = rcu_dereference_protected(auditd_conn, lockdep_is_held(&auditd_conn_lock)); @@ -1201,7 +1212,8 @@ static int audit_replace(struct pid *pid) return auditd_send_unicast_skb(skb); } -static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh, + bool *ack) { u32 seq; void *data; @@ -1294,7 +1306,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) /* register a new auditd connection */ err = auditd_set(req_pid, NETLINK_CB(skb).portid, - sock_net(NETLINK_CB(skb).sk)); + sock_net(NETLINK_CB(skb).sk), + skb, ack); if (audit_enabled != AUDIT_OFF) audit_log_config_change("audit_pid", new_pid, @@ -1539,9 +1552,10 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) * Parse the provided skb and deal with any messages that may be present, * malformed skbs are discarded. */ -static void audit_receive(struct sk_buff *skb) +static void audit_receive(struct sk_buff *skb) { struct nlmsghdr *nlh; + bool ack; /* * len MUST be signed for nlmsg_next to be able to dec it below 0 * if the nlmsg_len was not aligned @@ -1554,9 +1568,12 @@ static void audit_receive(struct sk_buff *skb) audit_ctl_lock(); while (nlmsg_ok(nlh, len)) { - err = audit_receive_msg(skb, nlh); - /* if err or if this message says it wants a response */ - if (err || (nlh->nlmsg_flags & NLM_F_ACK)) + ack = nlh->nlmsg_flags & NLM_F_ACK; + err = audit_receive_msg(skb, nlh, &ack); + + /* send an ack if the user asked for one and audit_receive_msg + * didn't already do it, or if there was an error. */ + if (ack || err) netlink_ack(skb, nlh, err, NULL); nlh = nlmsg_next(nlh, &len); diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 08a8e8102..050893704 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -222,7 +222,7 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu, void **frames, int n, struct xdp_cpumap_stats *stats) { - struct xdp_rxq_info rxq; + struct xdp_rxq_info rxq = {}; struct xdp_buff xdp; int i, nframes = 0; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 6212e4ae0..83f8f67e9 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -28,12 +28,13 @@ * * Different map implementations will rely on rcu in map methods * lookup/update/delete, therefore eBPF programs must run under rcu lock - * if program is allowed to access maps, so check rcu_read_lock_held in - * all three functions. + * if program is allowed to access maps, so check rcu_read_lock_held() or + * rcu_read_lock_trace_held() in all three functions. */ BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key) { - WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && + !rcu_read_lock_bh_held()); return (unsigned long) map->ops->map_lookup_elem(map, key); } @@ -49,7 +50,8 @@ const struct bpf_func_proto bpf_map_lookup_elem_proto = { BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key, void *, value, u64, flags) { - WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && + !rcu_read_lock_bh_held()); return map->ops->map_update_elem(map, key, value, flags); } @@ -66,7 +68,8 @@ const struct bpf_func_proto bpf_map_update_elem_proto = { BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key) { - WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && + !rcu_read_lock_bh_held()); return map->ops->map_delete_elem(map, key); } @@ -750,19 +753,20 @@ static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, /* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary * arguments representation. */ -#define MAX_BPRINTF_BUF_LEN 512 +#define MAX_BPRINTF_BIN_ARGS 512 /* Support executing three nested bprintf helper calls on a given CPU */ #define MAX_BPRINTF_NEST_LEVEL 3 struct bpf_bprintf_buffers { - char tmp_bufs[MAX_BPRINTF_NEST_LEVEL][MAX_BPRINTF_BUF_LEN]; + char bin_args[MAX_BPRINTF_BIN_ARGS]; + char buf[MAX_BPRINTF_BUF]; }; -static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs); + +static DEFINE_PER_CPU(struct bpf_bprintf_buffers[MAX_BPRINTF_NEST_LEVEL], bpf_bprintf_bufs); static DEFINE_PER_CPU(int, bpf_bprintf_nest_level); -static int try_get_fmt_tmp_buf(char **tmp_buf) +static int try_get_buffers(struct bpf_bprintf_buffers **bufs) { - struct bpf_bprintf_buffers *bufs; int nest_level; preempt_disable(); @@ -772,18 +776,19 @@ static int try_get_fmt_tmp_buf(char **tmp_buf) preempt_enable(); return -EBUSY; } - bufs = this_cpu_ptr(&bpf_bprintf_bufs); - *tmp_buf = bufs->tmp_bufs[nest_level - 1]; + *bufs = this_cpu_ptr(&bpf_bprintf_bufs[nest_level - 1]); return 0; } -void bpf_bprintf_cleanup(void) +void bpf_bprintf_cleanup(struct bpf_bprintf_data *data) { - if (this_cpu_read(bpf_bprintf_nest_level)) { - this_cpu_dec(bpf_bprintf_nest_level); - preempt_enable(); - } + if (!data->bin_args && !data->buf) + return; + if (WARN_ON_ONCE(this_cpu_read(bpf_bprintf_nest_level) == 0)) + return; + this_cpu_dec(bpf_bprintf_nest_level); + preempt_enable(); } /* @@ -792,18 +797,20 @@ void bpf_bprintf_cleanup(void) * Returns a negative value if fmt is an invalid format string or 0 otherwise. * * This can be used in two ways: - * - Format string verification only: when bin_args is NULL + * - Format string verification only: when data->get_bin_args is false * - Arguments preparation: in addition to the above verification, it writes in - * bin_args a binary representation of arguments usable by bstr_printf where - * pointers from BPF have been sanitized. + * data->bin_args a binary representation of arguments usable by bstr_printf + * where pointers from BPF have been sanitized. * * In argument preparation mode, if 0 is returned, safe temporary buffers are * allocated and bpf_bprintf_cleanup should be called to free them after use. */ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, - u32 **bin_args, u32 num_args) + u32 num_args, struct bpf_bprintf_data *data) { + bool get_buffers = (data->get_bin_args && num_args) || data->get_buf; char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end; + struct bpf_bprintf_buffers *buffers = NULL; size_t sizeof_cur_arg, sizeof_cur_ip; int err, i, num_spec = 0; u64 cur_arg; @@ -814,14 +821,19 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, return -EINVAL; fmt_size = fmt_end - fmt; - if (bin_args) { - if (num_args && try_get_fmt_tmp_buf(&tmp_buf)) - return -EBUSY; + if (get_buffers && try_get_buffers(&buffers)) + return -EBUSY; - tmp_buf_end = tmp_buf + MAX_BPRINTF_BUF_LEN; - *bin_args = (u32 *)tmp_buf; + if (data->get_bin_args) { + if (num_args) + tmp_buf = buffers->bin_args; + tmp_buf_end = tmp_buf + MAX_BPRINTF_BIN_ARGS; + data->bin_args = (u32 *)tmp_buf; } + if (data->get_buf) + data->buf = buffers->buf; + for (i = 0; i < fmt_size; i++) { if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) { err = -EINVAL; @@ -1015,31 +1027,33 @@ nocopy_fmt: err = 0; out: if (err) - bpf_bprintf_cleanup(); + bpf_bprintf_cleanup(data); return err; } BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt, - const void *, data, u32, data_len) + const void *, args, u32, data_len) { + struct bpf_bprintf_data data = { + .get_bin_args = true, + }; int err, num_args; - u32 *bin_args; if (data_len % 8 || data_len > MAX_BPRINTF_VARARGS * 8 || - (data_len && !data)) + (data_len && !args)) return -EINVAL; num_args = data_len / 8; /* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we * can safely give an unbounded size. */ - err = bpf_bprintf_prepare(fmt, UINT_MAX, data, &bin_args, num_args); + err = bpf_bprintf_prepare(fmt, UINT_MAX, args, num_args, &data); if (err < 0) return err; - err = bstr_printf(str, str_size, fmt, bin_args); + err = bstr_printf(str, str_size, fmt, data.bin_args); - bpf_bprintf_cleanup(); + bpf_bprintf_cleanup(&data); return err + 1; } @@ -1077,6 +1091,7 @@ struct bpf_hrtimer { struct bpf_prog *prog; void __rcu *callback_fn; void *value; + struct rcu_head rcu; }; /* the actual struct hidden inside uapi struct bpf_timer */ @@ -1298,6 +1313,7 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer) if (in_nmi()) return -EOPNOTSUPP; + rcu_read_lock(); __bpf_spin_lock_irqsave(&timer->lock); t = timer->timer; if (!t) { @@ -1319,6 +1335,7 @@ out: * if it was running. */ ret = ret ?: hrtimer_cancel(&t->timer); + rcu_read_unlock(); return ret; } @@ -1373,7 +1390,7 @@ out: */ if (this_cpu_read(hrtimer_running) != t) hrtimer_cancel(&t->timer); - kfree(t); + kfree_rcu(t, rcu); } BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 8d0c38a8d..1e46a8469 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1598,6 +1598,9 @@ int generic_map_delete_batch(struct bpf_map *map, if (!max_count) return 0; + if (put_user(0, &uattr->batch.count)) + return -EFAULT; + key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); if (!key) return -ENOMEM; @@ -1657,6 +1660,9 @@ int generic_map_update_batch(struct bpf_map *map, if (!max_count) return 0; + if (put_user(0, &uattr->batch.count)) + return -EFAULT; + key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); if (!key) return -ENOMEM; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 23b6d57b5..1a29ac4db 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7448,6 +7448,7 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env, struct bpf_reg_state *fmt_reg = ®s[BPF_REG_3]; struct bpf_reg_state *data_len_reg = ®s[BPF_REG_5]; struct bpf_map *fmt_map = fmt_reg->map_ptr; + struct bpf_bprintf_data data = {}; int err, fmt_map_off, num_args; u64 fmt_addr; char *fmt; @@ -7472,7 +7473,7 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env, /* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we * can focus on validating the format specifiers. */ - err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, NULL, num_args); + err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, num_args, &data); if (err < 0) verbose(env, "Invalid format string\n"); diff --git a/kernel/events/core.c b/kernel/events/core.c index 8c7d2f4f5..872d149b1 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -11223,9 +11223,30 @@ static DEVICE_ATTR_RW(perf_event_mux_interval_ms); static struct attribute *pmu_dev_attrs[] = { &dev_attr_type.attr, &dev_attr_perf_event_mux_interval_ms.attr, + &dev_attr_nr_addr_filters.attr, + NULL, +}; + +static umode_t pmu_dev_is_visible(struct kobject *kobj, struct attribute *a, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct pmu *pmu = dev_get_drvdata(dev); + + if (n == 2 && !pmu->nr_addr_filters) + return 0; + + return a->mode; +} + +static struct attribute_group pmu_dev_attr_group = { + .is_visible = pmu_dev_is_visible, + .attrs = pmu_dev_attrs, +}; + +static const struct attribute_group *pmu_dev_groups[] = { + &pmu_dev_attr_group, NULL, }; -ATTRIBUTE_GROUPS(pmu_dev); static int pmu_bus_running; static struct bus_type pmu_bus = { @@ -11261,18 +11282,11 @@ static int pmu_dev_alloc(struct pmu *pmu) if (ret) goto free_dev; - /* For PMUs with address filters, throw in an extra attribute: */ - if (pmu->nr_addr_filters) - ret = device_create_file(pmu->dev, &dev_attr_nr_addr_filters); - - if (ret) - goto del_dev; - - if (pmu->attr_update) + if (pmu->attr_update) { ret = sysfs_update_groups(&pmu->dev->kobj, pmu->attr_update); - - if (ret) - goto del_dev; + if (ret) + goto del_dev; + } out: return ret; diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index 0c5be7ebb..08b16d20c 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c @@ -161,6 +161,9 @@ | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \ | MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK) +static DEFINE_MUTEX(membarrier_ipi_mutex); +#define SERIALIZE_IPI() guard(mutex)(&membarrier_ipi_mutex) + static void ipi_mb(void *info) { smp_mb(); /* IPIs should be serializing but paranoid. */ @@ -258,6 +261,7 @@ static int membarrier_global_expedited(void) if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) return -ENOMEM; + SERIALIZE_IPI(); cpus_read_lock(); rcu_read_lock(); for_each_online_cpu(cpu) { @@ -346,6 +350,7 @@ static int membarrier_private_expedited(int flags, int cpu_id) if (cpu_id < 0 && !zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) return -ENOMEM; + SERIALIZE_IPI(); cpus_read_lock(); if (cpu_id >= 0) { @@ -459,6 +464,7 @@ static int sync_runqueues_membarrier_state(struct mm_struct *mm) * between threads which are users of @mm has its membarrier state * updated. */ + SERIALIZE_IPI(); cpus_read_lock(); rcu_read_lock(); for_each_online_cpu(cpu) { diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 76bafa8d3..3a2335bc1 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -37,6 +37,8 @@ static struct ctl_table sched_rt_sysctls[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = sched_rt_handler, + .extra1 = SYSCTL_ONE, + .extra2 = SYSCTL_INT_MAX, }, { .procname = "sched_rt_runtime_us", @@ -44,6 +46,8 @@ static struct ctl_table sched_rt_sysctls[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = sched_rt_handler, + .extra1 = SYSCTL_NEG_ONE, + .extra2 = SYSCTL_INT_MAX, }, { .procname = "sched_rr_timeslice_ms", @@ -2970,9 +2974,6 @@ static int sched_rt_global_constraints(void) #ifdef CONFIG_SYSCTL static int sched_rt_global_validate(void) { - if (sysctl_sched_rt_period <= 0) - return -EINVAL; - if ((sysctl_sched_rt_runtime != RUNTIME_INF) && ((sysctl_sched_rt_runtime > sysctl_sched_rt_period) || ((u64)sysctl_sched_rt_runtime * @@ -3003,7 +3004,7 @@ static int sched_rt_handler(struct ctl_table *table, int write, void *buffer, old_period = sysctl_sched_rt_period; old_runtime = sysctl_sched_rt_runtime; - ret = proc_dointvec(table, write, buffer, lenp, ppos); + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (!ret && write) { ret = sched_rt_global_validate(); @@ -3047,6 +3048,9 @@ static int sched_rr_handler(struct ctl_table *table, int write, void *buffer, sched_rr_timeslice = sysctl_sched_rr_timeslice <= 0 ? RR_TIMESLICE : msecs_to_jiffies(sysctl_sched_rr_timeslice); + + if (sysctl_sched_rr_timeslice <= 0) + sysctl_sched_rr_timeslice = jiffies_to_msecs(RR_TIMESLICE); } mutex_unlock(&mutex); diff --git a/kernel/sys.c b/kernel/sys.c index c85e1abf7..d06eda138 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1778,74 +1778,87 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) struct task_struct *t; unsigned long flags; u64 tgutime, tgstime, utime, stime; - unsigned long maxrss = 0; + unsigned long maxrss; + struct mm_struct *mm; + struct signal_struct *sig = p->signal; + unsigned int seq = 0; - memset((char *)r, 0, sizeof (*r)); +retry: + memset(r, 0, sizeof(*r)); utime = stime = 0; + maxrss = 0; if (who == RUSAGE_THREAD) { task_cputime_adjusted(current, &utime, &stime); accumulate_thread_rusage(p, r); - maxrss = p->signal->maxrss; - goto out; + maxrss = sig->maxrss; + goto out_thread; } - if (!lock_task_sighand(p, &flags)) - return; + flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); switch (who) { case RUSAGE_BOTH: case RUSAGE_CHILDREN: - utime = p->signal->cutime; - stime = p->signal->cstime; - r->ru_nvcsw = p->signal->cnvcsw; - r->ru_nivcsw = p->signal->cnivcsw; - r->ru_minflt = p->signal->cmin_flt; - r->ru_majflt = p->signal->cmaj_flt; - r->ru_inblock = p->signal->cinblock; - r->ru_oublock = p->signal->coublock; - maxrss = p->signal->cmaxrss; + utime = sig->cutime; + stime = sig->cstime; + r->ru_nvcsw = sig->cnvcsw; + r->ru_nivcsw = sig->cnivcsw; + r->ru_minflt = sig->cmin_flt; + r->ru_majflt = sig->cmaj_flt; + r->ru_inblock = sig->cinblock; + r->ru_oublock = sig->coublock; + maxrss = sig->cmaxrss; if (who == RUSAGE_CHILDREN) break; fallthrough; case RUSAGE_SELF: - thread_group_cputime_adjusted(p, &tgutime, &tgstime); - utime += tgutime; - stime += tgstime; - r->ru_nvcsw += p->signal->nvcsw; - r->ru_nivcsw += p->signal->nivcsw; - r->ru_minflt += p->signal->min_flt; - r->ru_majflt += p->signal->maj_flt; - r->ru_inblock += p->signal->inblock; - r->ru_oublock += p->signal->oublock; - if (maxrss < p->signal->maxrss) - maxrss = p->signal->maxrss; - t = p; - do { + r->ru_nvcsw += sig->nvcsw; + r->ru_nivcsw += sig->nivcsw; + r->ru_minflt += sig->min_flt; + r->ru_majflt += sig->maj_flt; + r->ru_inblock += sig->inblock; + r->ru_oublock += sig->oublock; + if (maxrss < sig->maxrss) + maxrss = sig->maxrss; + + rcu_read_lock(); + __for_each_thread(sig, t) accumulate_thread_rusage(t, r); - } while_each_thread(p, t); + rcu_read_unlock(); + break; default: BUG(); } - unlock_task_sighand(p, &flags); -out: - r->ru_utime = ns_to_kernel_old_timeval(utime); - r->ru_stime = ns_to_kernel_old_timeval(stime); + if (need_seqretry(&sig->stats_lock, seq)) { + seq = 1; + goto retry; + } + done_seqretry_irqrestore(&sig->stats_lock, seq, flags); - if (who != RUSAGE_CHILDREN) { - struct mm_struct *mm = get_task_mm(p); + if (who == RUSAGE_CHILDREN) + goto out_children; - if (mm) { - setmax_mm_hiwater_rss(&maxrss, mm); - mmput(mm); - } + thread_group_cputime_adjusted(p, &tgutime, &tgstime); + utime += tgutime; + stime += tgstime; + +out_thread: + mm = get_task_mm(p); + if (mm) { + setmax_mm_hiwater_rss(&maxrss, mm); + mmput(mm); } + +out_children: r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */ + r->ru_utime = ns_to_kernel_old_timeval(utime); + r->ru_stime = ns_to_kernel_old_timeval(stime); } SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 1c90e710d..cc6db3bce 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -126,6 +126,7 @@ static DECLARE_WORK(watchdog_work, clocksource_watchdog_work); static DEFINE_SPINLOCK(watchdog_lock); static int watchdog_running; static atomic_t watchdog_reset_pending; +static int64_t watchdog_max_interval; static inline void clocksource_watchdog_lock(unsigned long *flags) { @@ -144,6 +145,7 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating); * Interval: 0.5sec. */ #define WATCHDOG_INTERVAL (HZ >> 1) +#define WATCHDOG_INTERVAL_MAX_NS ((2 * WATCHDOG_INTERVAL) * (NSEC_PER_SEC / HZ)) static void clocksource_watchdog_work(struct work_struct *work) { @@ -396,8 +398,8 @@ static inline void clocksource_reset_watchdog(void) static void clocksource_watchdog(struct timer_list *unused) { u64 csnow, wdnow, cslast, wdlast, delta; + int64_t wd_nsec, cs_nsec, interval; int next_cpu, reset_pending; - int64_t wd_nsec, cs_nsec; struct clocksource *cs; enum wd_read_status read_ret; unsigned long extra_wait = 0; @@ -467,6 +469,27 @@ static void clocksource_watchdog(struct timer_list *unused) if (atomic_read(&watchdog_reset_pending)) continue; + /* + * The processing of timer softirqs can get delayed (usually + * on account of ksoftirqd not getting to run in a timely + * manner), which causes the watchdog interval to stretch. + * Skew detection may fail for longer watchdog intervals + * on account of fixed margins being used. + * Some clocksources, e.g. acpi_pm, cannot tolerate + * watchdog intervals longer than a few seconds. + */ + interval = max(cs_nsec, wd_nsec); + if (unlikely(interval > WATCHDOG_INTERVAL_MAX_NS)) { + if (system_state > SYSTEM_SCHEDULING && + interval > 2 * watchdog_max_interval) { + watchdog_max_interval = interval; + pr_warn("Long readout interval, skipping watchdog check: cs_nsec: %lld wd_nsec: %lld\n", + cs_nsec, wd_nsec); + } + watchdog_timer.expires = jiffies; + continue; + } + /* Check the deviation from the watchdog clocksource. */ md = cs->uncertainty_margin + watchdog->uncertainty_margin; if (abs(cs_nsec - wd_nsec) > md) { diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 5561dabc9..9bb88836c 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1082,6 +1082,7 @@ static int enqueue_hrtimer(struct hrtimer *timer, enum hrtimer_mode mode) { debug_activate(timer, mode); + WARN_ON_ONCE(!base->cpu_base->online); base->cpu_base->active_bases |= 1 << base->index; @@ -2180,6 +2181,7 @@ int hrtimers_prepare_cpu(unsigned int cpu) cpu_base->softirq_next_timer = NULL; cpu_base->expires_next = KTIME_MAX; cpu_base->softirq_expires_next = KTIME_MAX; + cpu_base->online = 1; hrtimer_cpu_base_init_expiry_lock(cpu_base); return 0; } @@ -2247,6 +2249,7 @@ int hrtimers_cpu_dying(unsigned int dying_cpu) smp_call_function_single(ncpu, retrigger_next_event, NULL, 0); raw_spin_unlock(&new_base->lock); + old_base->online = 0; raw_spin_unlock(&old_base->lock); return 0; @@ -2263,7 +2266,7 @@ void __init hrtimers_init(void) /** * schedule_hrtimeout_range_clock - sleep until timeout * @expires: timeout value (ktime_t) - * @delta: slack in expires timeout (ktime_t) + * @delta: slack in expires timeout (ktime_t) for SCHED_OTHER tasks * @mode: timer mode * @clock_id: timer clock to be used */ @@ -2290,6 +2293,13 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, return -EINTR; } + /* + * Override any slack passed by the user if under + * rt contraints. + */ + if (rt_task(current)) + delta = 0; + hrtimer_init_sleeper_on_stack(&t, clock_id, mode); hrtimer_set_expires_range_ns(&t.timer, *expires, delta); hrtimer_sleeper_start_expires(&t, mode); @@ -2309,7 +2319,7 @@ EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock); /** * schedule_hrtimeout_range - sleep until timeout * @expires: timeout value (ktime_t) - * @delta: slack in expires timeout (ktime_t) + * @delta: slack in expires timeout (ktime_t) for SCHED_OTHER tasks * @mode: timer mode * * Make the current task sleep until the given expiry time has @@ -2317,7 +2327,8 @@ EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock); * the current task state has been set (see set_current_state()). * * The @delta argument gives the kernel the freedom to schedule the - * actual wakeup to a time that is both power and performance friendly. + * actual wakeup to a time that is both power and performance friendly + * for regular (non RT/DL) tasks. * The kernel give the normal best effort behavior for "@expires+@delta", * but may decide to fire the timer earlier, but no earlier than @expires. * diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index f4a494a45..3fdde232e 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -368,8 +368,6 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void) return &bpf_probe_write_user_proto; } -static DEFINE_RAW_SPINLOCK(trace_printk_lock); - #define MAX_TRACE_PRINTK_VARARGS 3 #define BPF_TRACE_PRINTK_SIZE 1024 @@ -377,23 +375,22 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, u64, arg2, u64, arg3) { u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 }; - u32 *bin_args; - static char buf[BPF_TRACE_PRINTK_SIZE]; - unsigned long flags; + struct bpf_bprintf_data data = { + .get_bin_args = true, + .get_buf = true, + }; int ret; - ret = bpf_bprintf_prepare(fmt, fmt_size, args, &bin_args, - MAX_TRACE_PRINTK_VARARGS); + ret = bpf_bprintf_prepare(fmt, fmt_size, args, + MAX_TRACE_PRINTK_VARARGS, &data); if (ret < 0) return ret; - raw_spin_lock_irqsave(&trace_printk_lock, flags); - ret = bstr_printf(buf, sizeof(buf), fmt, bin_args); + ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args); - trace_bpf_trace_printk(buf); - raw_spin_unlock_irqrestore(&trace_printk_lock, flags); + trace_bpf_trace_printk(data.buf); - bpf_bprintf_cleanup(); + bpf_bprintf_cleanup(&data); return ret; } @@ -426,30 +423,29 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void) return &bpf_trace_printk_proto; } -BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, data, +BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, args, u32, data_len) { - static char buf[BPF_TRACE_PRINTK_SIZE]; - unsigned long flags; + struct bpf_bprintf_data data = { + .get_bin_args = true, + .get_buf = true, + }; int ret, num_args; - u32 *bin_args; if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || - (data_len && !data)) + (data_len && !args)) return -EINVAL; num_args = data_len / 8; - ret = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args); + ret = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data); if (ret < 0) return ret; - raw_spin_lock_irqsave(&trace_printk_lock, flags); - ret = bstr_printf(buf, sizeof(buf), fmt, bin_args); + ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args); - trace_bpf_trace_printk(buf); - raw_spin_unlock_irqrestore(&trace_printk_lock, flags); + trace_bpf_trace_printk(data.buf); - bpf_bprintf_cleanup(); + bpf_bprintf_cleanup(&data); return ret; } @@ -471,23 +467,25 @@ const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void) } BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size, - const void *, data, u32, data_len) + const void *, args, u32, data_len) { + struct bpf_bprintf_data data = { + .get_bin_args = true, + }; int err, num_args; - u32 *bin_args; if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || - (data_len && !data)) + (data_len && !args)) return -EINVAL; num_args = data_len / 8; - err = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args); + err = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data); if (err < 0) return err; - seq_bprintf(m, fmt, bin_args); + seq_bprintf(m, fmt, data.bin_args); - bpf_bprintf_cleanup(); + bpf_bprintf_cleanup(&data); return seq_has_overflowed(m) ? -EOVERFLOW : 0; } diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 1285e7fb5..e019a9278 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1095,7 +1095,7 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, full = 0; } else { if (!cpumask_test_cpu(cpu, buffer->cpumask)) - return -EINVAL; + return EPOLLERR; cpu_buffer = buffer->buffers[cpu]; work = &cpu_buffer->irq_work; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2b3c4cd83..f667d6bdd 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -40,6 +40,7 @@ #include <linux/ctype.h> #include <linux/init.h> #include <linux/panic_notifier.h> +#include <linux/kmemleak.h> #include <linux/poll.h> #include <linux/nmi.h> #include <linux/fs.h> @@ -2249,7 +2250,7 @@ struct saved_cmdlines_buffer { unsigned *map_cmdline_to_pid; unsigned cmdline_num; int cmdline_idx; - char *saved_cmdlines; + char saved_cmdlines[]; }; static struct saved_cmdlines_buffer *savedcmd; @@ -2263,47 +2264,60 @@ static inline void set_cmdline(int idx, const char *cmdline) strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN); } -static int allocate_cmdlines_buffer(unsigned int val, - struct saved_cmdlines_buffer *s) +static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s) { + int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN); + + kfree(s->map_cmdline_to_pid); + kmemleak_free(s); + free_pages((unsigned long)s, order); +} + +static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val) +{ + struct saved_cmdlines_buffer *s; + struct page *page; + int orig_size, size; + int order; + + /* Figure out how much is needed to hold the given number of cmdlines */ + orig_size = sizeof(*s) + val * TASK_COMM_LEN; + order = get_order(orig_size); + size = 1 << (order + PAGE_SHIFT); + page = alloc_pages(GFP_KERNEL, order); + if (!page) + return NULL; + + s = page_address(page); + kmemleak_alloc(s, size, 1, GFP_KERNEL); + memset(s, 0, sizeof(*s)); + + /* Round up to actual allocation */ + val = (size - sizeof(*s)) / TASK_COMM_LEN; + s->cmdline_num = val; + s->map_cmdline_to_pid = kmalloc_array(val, sizeof(*s->map_cmdline_to_pid), GFP_KERNEL); - if (!s->map_cmdline_to_pid) - return -ENOMEM; - - s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL); - if (!s->saved_cmdlines) { - kfree(s->map_cmdline_to_pid); - return -ENOMEM; + if (!s->map_cmdline_to_pid) { + free_saved_cmdlines_buffer(s); + return NULL; } s->cmdline_idx = 0; - s->cmdline_num = val; memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(s->map_pid_to_cmdline)); memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP, val * sizeof(*s->map_cmdline_to_pid)); - return 0; + return s; } static int trace_create_savedcmd(void) { - int ret; - - savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL); - if (!savedcmd) - return -ENOMEM; + savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT); - ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd); - if (ret < 0) { - kfree(savedcmd); - savedcmd = NULL; - return -ENOMEM; - } - - return 0; + return savedcmd ? 0 : -ENOMEM; } int is_tracing_stopped(void) @@ -5972,26 +5986,14 @@ tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf, return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } -static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s) -{ - kfree(s->saved_cmdlines); - kfree(s->map_cmdline_to_pid); - kfree(s); -} - static int tracing_resize_saved_cmdlines(unsigned int val) { struct saved_cmdlines_buffer *s, *savedcmd_temp; - s = kmalloc(sizeof(*s), GFP_KERNEL); + s = allocate_cmdlines_buffer(val); if (!s) return -ENOMEM; - if (allocate_cmdlines_buffer(val, s) < 0) { - kfree(s); - return -ENOMEM; - } - preempt_disable(); arch_spin_lock(&trace_cmdline_lock); savedcmd_temp = savedcmd; diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 918730d74..f941ce01e 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -1455,8 +1455,10 @@ register_snapshot_trigger(char *glob, struct event_trigger_data *data, struct trace_event_file *file) { - if (tracing_alloc_snapshot_instance(file->tr) != 0) - return 0; + int ret = tracing_alloc_snapshot_instance(file->tr); + + if (ret < 0) + return ret; return register_trigger(glob, data, file); } diff --git a/lib/debugobjects.c b/lib/debugobjects.c index dacb80c22..5dfa582db 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -609,9 +609,8 @@ static void debug_objects_fill_pool(void) static void __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack) { - enum debug_obj_state state; + struct debug_obj *obj, o; struct debug_bucket *db; - struct debug_obj *obj; unsigned long flags; debug_objects_fill_pool(); @@ -632,24 +631,18 @@ __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack case ODEBUG_STATE_INIT: case ODEBUG_STATE_INACTIVE: obj->state = ODEBUG_STATE_INIT; - break; - - case ODEBUG_STATE_ACTIVE: - state = obj->state; - raw_spin_unlock_irqrestore(&db->lock, flags); - debug_print_object(obj, "init"); - debug_object_fixup(descr->fixup_init, addr, state); - return; - - case ODEBUG_STATE_DESTROYED: raw_spin_unlock_irqrestore(&db->lock, flags); - debug_print_object(obj, "init"); return; default: break; } + o = *obj; raw_spin_unlock_irqrestore(&db->lock, flags); + debug_print_object(&o, "init"); + + if (o.state == ODEBUG_STATE_ACTIVE) + debug_object_fixup(descr->fixup_init, addr, o.state); } /** @@ -690,11 +683,9 @@ EXPORT_SYMBOL_GPL(debug_object_init_on_stack); int debug_object_activate(void *addr, const struct debug_obj_descr *descr) { struct debug_obj o = { .object = addr, .state = ODEBUG_STATE_NOTAVAILABLE, .descr = descr }; - enum debug_obj_state state; struct debug_bucket *db; struct debug_obj *obj; unsigned long flags; - int ret; if (!debug_objects_enabled) return 0; @@ -706,49 +697,38 @@ int debug_object_activate(void *addr, const struct debug_obj_descr *descr) raw_spin_lock_irqsave(&db->lock, flags); obj = lookup_object_or_alloc(addr, db, descr, false, true); - if (likely(!IS_ERR_OR_NULL(obj))) { - bool print_object = false; - + if (unlikely(!obj)) { + raw_spin_unlock_irqrestore(&db->lock, flags); + debug_objects_oom(); + return 0; + } else if (likely(!IS_ERR(obj))) { switch (obj->state) { - case ODEBUG_STATE_INIT: - case ODEBUG_STATE_INACTIVE: - obj->state = ODEBUG_STATE_ACTIVE; - ret = 0; - break; - case ODEBUG_STATE_ACTIVE: - state = obj->state; - raw_spin_unlock_irqrestore(&db->lock, flags); - debug_print_object(obj, "activate"); - ret = debug_object_fixup(descr->fixup_activate, addr, state); - return ret ? 0 : -EINVAL; - case ODEBUG_STATE_DESTROYED: - print_object = true; - ret = -EINVAL; + o = *obj; break; + case ODEBUG_STATE_INIT: + case ODEBUG_STATE_INACTIVE: + obj->state = ODEBUG_STATE_ACTIVE; + fallthrough; default: - ret = 0; - break; + raw_spin_unlock_irqrestore(&db->lock, flags); + return 0; } - raw_spin_unlock_irqrestore(&db->lock, flags); - if (print_object) - debug_print_object(obj, "activate"); - return ret; } raw_spin_unlock_irqrestore(&db->lock, flags); + debug_print_object(&o, "activate"); - /* If NULL the allocation has hit OOM */ - if (!obj) { - debug_objects_oom(); - return 0; + switch (o.state) { + case ODEBUG_STATE_ACTIVE: + case ODEBUG_STATE_NOTAVAILABLE: + if (debug_object_fixup(descr->fixup_activate, addr, o.state)) + return 0; + fallthrough; + default: + return -EINVAL; } - - /* Object is neither static nor tracked. It's not initialized */ - debug_print_object(&o, "activate"); - ret = debug_object_fixup(descr->fixup_activate, addr, ODEBUG_STATE_NOTAVAILABLE); - return ret ? 0 : -EINVAL; } EXPORT_SYMBOL_GPL(debug_object_activate); @@ -759,10 +739,10 @@ EXPORT_SYMBOL_GPL(debug_object_activate); */ void debug_object_deactivate(void *addr, const struct debug_obj_descr *descr) { + struct debug_obj o = { .object = addr, .state = ODEBUG_STATE_NOTAVAILABLE, .descr = descr }; struct debug_bucket *db; struct debug_obj *obj; unsigned long flags; - bool print_object = false; if (!debug_objects_enabled) return; @@ -774,33 +754,24 @@ void debug_object_deactivate(void *addr, const struct debug_obj_descr *descr) obj = lookup_object(addr, db); if (obj) { switch (obj->state) { + case ODEBUG_STATE_DESTROYED: + break; case ODEBUG_STATE_INIT: case ODEBUG_STATE_INACTIVE: case ODEBUG_STATE_ACTIVE: - if (!obj->astate) - obj->state = ODEBUG_STATE_INACTIVE; - else - print_object = true; - break; - - case ODEBUG_STATE_DESTROYED: - print_object = true; - break; + if (obj->astate) + break; + obj->state = ODEBUG_STATE_INACTIVE; + fallthrough; default: - break; + raw_spin_unlock_irqrestore(&db->lock, flags); + return; } + o = *obj; } raw_spin_unlock_irqrestore(&db->lock, flags); - if (!obj) { - struct debug_obj o = { .object = addr, - .state = ODEBUG_STATE_NOTAVAILABLE, - .descr = descr }; - - debug_print_object(&o, "deactivate"); - } else if (print_object) { - debug_print_object(obj, "deactivate"); - } + debug_print_object(&o, "deactivate"); } EXPORT_SYMBOL_GPL(debug_object_deactivate); @@ -811,11 +782,9 @@ EXPORT_SYMBOL_GPL(debug_object_deactivate); */ void debug_object_destroy(void *addr, const struct debug_obj_descr *descr) { - enum debug_obj_state state; + struct debug_obj *obj, o; struct debug_bucket *db; - struct debug_obj *obj; unsigned long flags; - bool print_object = false; if (!debug_objects_enabled) return; @@ -825,32 +794,31 @@ void debug_object_destroy(void *addr, const struct debug_obj_descr *descr) raw_spin_lock_irqsave(&db->lock, flags); obj = lookup_object(addr, db); - if (!obj) - goto out_unlock; + if (!obj) { + raw_spin_unlock_irqrestore(&db->lock, flags); + return; + } switch (obj->state) { + case ODEBUG_STATE_ACTIVE: + case ODEBUG_STATE_DESTROYED: + break; case ODEBUG_STATE_NONE: case ODEBUG_STATE_INIT: case ODEBUG_STATE_INACTIVE: obj->state = ODEBUG_STATE_DESTROYED; - break; - case ODEBUG_STATE_ACTIVE: - state = obj->state; + fallthrough; + default: raw_spin_unlock_irqrestore(&db->lock, flags); - debug_print_object(obj, "destroy"); - debug_object_fixup(descr->fixup_destroy, addr, state); return; - - case ODEBUG_STATE_DESTROYED: - print_object = true; - break; - default: - break; } -out_unlock: + + o = *obj; raw_spin_unlock_irqrestore(&db->lock, flags); - if (print_object) - debug_print_object(obj, "destroy"); + debug_print_object(&o, "destroy"); + + if (o.state == ODEBUG_STATE_ACTIVE) + debug_object_fixup(descr->fixup_destroy, addr, o.state); } EXPORT_SYMBOL_GPL(debug_object_destroy); @@ -861,9 +829,8 @@ EXPORT_SYMBOL_GPL(debug_object_destroy); */ void debug_object_free(void *addr, const struct debug_obj_descr *descr) { - enum debug_obj_state state; + struct debug_obj *obj, o; struct debug_bucket *db; - struct debug_obj *obj; unsigned long flags; if (!debug_objects_enabled) @@ -874,24 +841,26 @@ void debug_object_free(void *addr, const struct debug_obj_descr *descr) raw_spin_lock_irqsave(&db->lock, flags); obj = lookup_object(addr, db); - if (!obj) - goto out_unlock; + if (!obj) { + raw_spin_unlock_irqrestore(&db->lock, flags); + return; + } switch (obj->state) { case ODEBUG_STATE_ACTIVE: - state = obj->state; - raw_spin_unlock_irqrestore(&db->lock, flags); - debug_print_object(obj, "free"); - debug_object_fixup(descr->fixup_free, addr, state); - return; + break; default: hlist_del(&obj->node); raw_spin_unlock_irqrestore(&db->lock, flags); free_object(obj); return; } -out_unlock: + + o = *obj; raw_spin_unlock_irqrestore(&db->lock, flags); + debug_print_object(&o, "free"); + + debug_object_fixup(descr->fixup_free, addr, o.state); } EXPORT_SYMBOL_GPL(debug_object_free); @@ -943,10 +912,10 @@ void debug_object_active_state(void *addr, const struct debug_obj_descr *descr, unsigned int expect, unsigned int next) { + struct debug_obj o = { .object = addr, .state = ODEBUG_STATE_NOTAVAILABLE, .descr = descr }; struct debug_bucket *db; struct debug_obj *obj; unsigned long flags; - bool print_object = false; if (!debug_objects_enabled) return; @@ -959,28 +928,19 @@ debug_object_active_state(void *addr, const struct debug_obj_descr *descr, if (obj) { switch (obj->state) { case ODEBUG_STATE_ACTIVE: - if (obj->astate == expect) - obj->astate = next; - else - print_object = true; - break; - + if (obj->astate != expect) + break; + obj->astate = next; + raw_spin_unlock_irqrestore(&db->lock, flags); + return; default: - print_object = true; break; } + o = *obj; } raw_spin_unlock_irqrestore(&db->lock, flags); - if (!obj) { - struct debug_obj o = { .object = addr, - .state = ODEBUG_STATE_NOTAVAILABLE, - .descr = descr }; - - debug_print_object(&o, "active_state"); - } else if (print_object) { - debug_print_object(obj, "active_state"); - } + debug_print_object(&o, "active_state"); } EXPORT_SYMBOL_GPL(debug_object_active_state); @@ -988,12 +948,10 @@ EXPORT_SYMBOL_GPL(debug_object_active_state); static void __debug_check_no_obj_freed(const void *address, unsigned long size) { unsigned long flags, oaddr, saddr, eaddr, paddr, chunks; - const struct debug_obj_descr *descr; - enum debug_obj_state state; + int cnt, objs_checked = 0; + struct debug_obj *obj, o; struct debug_bucket *db; struct hlist_node *tmp; - struct debug_obj *obj; - int cnt, objs_checked = 0; saddr = (unsigned long) address; eaddr = saddr + size; @@ -1015,12 +973,10 @@ repeat: switch (obj->state) { case ODEBUG_STATE_ACTIVE: - descr = obj->descr; - state = obj->state; + o = *obj; raw_spin_unlock_irqrestore(&db->lock, flags); - debug_print_object(obj, "free"); - debug_object_fixup(descr->fixup_free, - (void *) oaddr, state); + debug_print_object(&o, "free"); + debug_object_fixup(o.descr->fixup_free, (void *)oaddr, o.state); goto repeat; default: hlist_del(&obj->node); diff --git a/lib/mpi/ec.c b/lib/mpi/ec.c index 40f5908e5..e16dca1e2 100644 --- a/lib/mpi/ec.c +++ b/lib/mpi/ec.c @@ -584,6 +584,9 @@ void mpi_ec_init(struct mpi_ec_ctx *ctx, enum gcry_mpi_ec_models model, ctx->a = mpi_copy(a); ctx->b = mpi_copy(b); + ctx->d = NULL; + ctx->t.two_inv_p = NULL; + ctx->t.p_barrett = use_barrett > 0 ? mpi_barrett_init(ctx->p, 0) : NULL; mpi_ec_get_reset(ctx); diff --git a/lib/nlattr.c b/lib/nlattr.c index dffd60e40..86344df0c 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -30,6 +30,8 @@ static const u8 nla_attr_len[NLA_TYPE_MAX+1] = { [NLA_S16] = sizeof(s16), [NLA_S32] = sizeof(s32), [NLA_S64] = sizeof(s64), + [NLA_BE16] = sizeof(__be16), + [NLA_BE32] = sizeof(__be32), }; static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = { @@ -43,6 +45,8 @@ static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = { [NLA_S16] = sizeof(s16), [NLA_S32] = sizeof(s32), [NLA_S64] = sizeof(s64), + [NLA_BE16] = sizeof(__be16), + [NLA_BE32] = sizeof(__be32), }; /* diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c index 63bdad20d..98a678129 100644 --- a/mm/damon/lru_sort.c +++ b/mm/damon/lru_sort.c @@ -185,9 +185,21 @@ static struct damos *damon_lru_sort_new_cold_scheme(unsigned int cold_thres) return damon_lru_sort_new_scheme(&pattern, DAMOS_LRU_DEPRIO); } +static void damon_lru_sort_copy_quota_status(struct damos_quota *dst, + struct damos_quota *src) +{ + dst->total_charged_sz = src->total_charged_sz; + dst->total_charged_ns = src->total_charged_ns; + dst->charged_sz = src->charged_sz; + dst->charged_from = src->charged_from; + dst->charge_target_from = src->charge_target_from; + dst->charge_addr_from = src->charge_addr_from; +} + static int damon_lru_sort_apply_parameters(void) { - struct damos *scheme; + struct damos *scheme, *hot_scheme, *cold_scheme; + struct damos *old_hot_scheme = NULL, *old_cold_scheme = NULL; unsigned int hot_thres, cold_thres; int err = 0; @@ -195,18 +207,35 @@ static int damon_lru_sort_apply_parameters(void) if (err) return err; + damon_for_each_scheme(scheme, ctx) { + if (!old_hot_scheme) { + old_hot_scheme = scheme; + continue; + } + old_cold_scheme = scheme; + } + hot_thres = damon_max_nr_accesses(&damon_lru_sort_mon_attrs) * hot_thres_access_freq / 1000; - scheme = damon_lru_sort_new_hot_scheme(hot_thres); - if (!scheme) + hot_scheme = damon_lru_sort_new_hot_scheme(hot_thres); + if (!hot_scheme) return -ENOMEM; - damon_set_schemes(ctx, &scheme, 1); + if (old_hot_scheme) + damon_lru_sort_copy_quota_status(&hot_scheme->quota, + &old_hot_scheme->quota); cold_thres = cold_min_age / damon_lru_sort_mon_attrs.aggr_interval; - scheme = damon_lru_sort_new_cold_scheme(cold_thres); - if (!scheme) + cold_scheme = damon_lru_sort_new_cold_scheme(cold_thres); + if (!cold_scheme) { + damon_destroy_scheme(hot_scheme); return -ENOMEM; - damon_add_scheme(ctx, scheme); + } + if (old_cold_scheme) + damon_lru_sort_copy_quota_status(&cold_scheme->quota, + &old_cold_scheme->quota); + + damon_set_schemes(ctx, &hot_scheme, 1); + damon_add_scheme(ctx, cold_scheme); return damon_set_region_biggest_system_ram_default(target, &monitor_region_start, diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c index 162c9b1ca..cc337e94a 100644 --- a/mm/damon/reclaim.c +++ b/mm/damon/reclaim.c @@ -141,9 +141,20 @@ static struct damos *damon_reclaim_new_scheme(void) &damon_reclaim_wmarks); } +static void damon_reclaim_copy_quota_status(struct damos_quota *dst, + struct damos_quota *src) +{ + dst->total_charged_sz = src->total_charged_sz; + dst->total_charged_ns = src->total_charged_ns; + dst->charged_sz = src->charged_sz; + dst->charged_from = src->charged_from; + dst->charge_target_from = src->charge_target_from; + dst->charge_addr_from = src->charge_addr_from; +} + static int damon_reclaim_apply_parameters(void) { - struct damos *scheme; + struct damos *scheme, *old_scheme; int err = 0; err = damon_set_attrs(ctx, &damon_reclaim_mon_attrs); @@ -154,6 +165,11 @@ static int damon_reclaim_apply_parameters(void) scheme = damon_reclaim_new_scheme(); if (!scheme) return -ENOMEM; + if (!list_empty(&ctx->schemes)) { + damon_for_each_scheme(old_scheme, ctx) + damon_reclaim_copy_quota_status(&scheme->quota, + &old_scheme->quota); + } damon_set_schemes(ctx, &scheme, 1); return damon_set_region_biggest_system_ram_default(target, diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 595779467..9736e7621 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -37,6 +37,7 @@ #include <linux/page_owner.h> #include <linux/sched/sysctl.h> #include <linux/memory-tiers.h> +#include <linux/compat.h> #include <asm/tlb.h> #include <asm/pgalloc.h> @@ -607,6 +608,9 @@ static unsigned long __thp_get_unmapped_area(struct file *filp, loff_t off_align = round_up(off, size); unsigned long len_pad, ret; + if (IS_ENABLED(CONFIG_32BIT) || in_compat_syscall()) + return 0; + if (off_end <= off_align || (off_end - off_align) < size) return 0; diff --git a/mm/memblock.c b/mm/memblock.c index 511d4783d..516efec80 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -175,8 +175,9 @@ static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size) /* * Address comparison utilities */ -static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, - phys_addr_t base2, phys_addr_t size2) +unsigned long __init_memblock +memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, phys_addr_t base2, + phys_addr_t size2) { return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9da98e3e7..4570d3e31 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -7517,9 +7517,13 @@ bool mem_cgroup_swap_full(struct folio *folio) static int __init setup_swap_account(char *s) { - pr_warn_once("The swapaccount= commandline option is deprecated. " - "Please report your usecase to linux-mm@kvack.org if you " - "depend on this functionality.\n"); + bool res; + + if (!kstrtobool(s, &res) && !res) + pr_warn_once("The swapaccount=0 commandline option is deprecated " + "in favor of configuring swap control via cgroupfs. " + "Please report your usecase to linux-mm@kvack.org if you " + "depend on this functionality.\n"); return 1; } __setup("swapaccount=", setup_swap_account); diff --git a/mm/memory.c b/mm/memory.c index fc8b264ec..fb83cf563 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3761,6 +3761,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) struct page *page; struct swap_info_struct *si = NULL; rmap_t rmap_flags = RMAP_NONE; + bool need_clear_cache = false; bool exclusive = false; swp_entry_t entry; pte_t pte; @@ -3822,6 +3823,20 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (!folio) { if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && __swap_count(entry) == 1) { + /* + * Prevent parallel swapin from proceeding with + * the cache flag. Otherwise, another thread may + * finish swapin first, free the entry, and swapout + * reusing the same entry. It's undetectable as + * pte_same() returns true due to entry reuse. + */ + if (swapcache_prepare(entry)) { + /* Relax a bit to prevent rapid repeated page faults */ + schedule_timeout_uninterruptible(1); + goto out; + } + need_clear_cache = true; + /* skip swapcache */ folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, vmf->address, false); @@ -4073,6 +4088,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) unlock: pte_unmap_unlock(vmf->pte, vmf->ptl); out: + /* Clear the swap cache pin for direct swapin after PTL unlock */ + if (need_clear_cache) + swapcache_clear(si, entry); if (si) put_swap_device(si); return ret; @@ -4086,6 +4104,8 @@ out_release: folio_unlock(swapcache); folio_put(swapcache); } + if (need_clear_cache) + swapcache_clear(si, entry); if (si) put_swap_device(si); return ret; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index de5f69921..d3e9d1286 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1526,7 +1526,7 @@ static inline void wb_dirty_limits(struct dirty_throttle_control *dtc) */ dtc->wb_thresh = __wb_calc_thresh(dtc); dtc->wb_bg_thresh = dtc->thresh ? - div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0; + div64_u64(dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0; /* * In order to avoid the stacked BDI deadlock we need diff --git a/mm/readahead.c b/mm/readahead.c index ba4342804..e4b772bb7 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -483,7 +483,7 @@ static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index, if (!folio) return -ENOMEM; - mark = round_up(mark, 1UL << order); + mark = round_down(mark, 1UL << order); if (index == mark) folio_set_readahead(folio); err = filemap_add_folio(ractl->mapping, folio, index, gfp); @@ -591,7 +591,7 @@ static void ondemand_readahead(struct readahead_control *ractl, * It's the expected callback index, assume sequential access. * Ramp up sizes, and push forward the readahead window. */ - expected = round_up(ra->start + ra->size - ra->async_size, + expected = round_down(ra->start + ra->size - ra->async_size, 1UL << order); if (index == expected || index == (ra->start + ra->size)) { ra->start += ra->size; @@ -39,6 +39,7 @@ void __delete_from_swap_cache(struct folio *folio, void delete_from_swap_cache(struct folio *folio); void clear_shadow_from_swap_cache(int type, unsigned long begin, unsigned long end); +void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry); struct folio *swap_cache_get_folio(swp_entry_t entry, struct vm_area_struct *vma, unsigned long addr); struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index); @@ -98,6 +99,10 @@ static inline int swap_writepage(struct page *p, struct writeback_control *wbc) return 0; } +static inline void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry) +{ +} + static inline struct folio *swap_cache_get_folio(swp_entry_t entry, struct vm_area_struct *vma, unsigned long addr) { diff --git a/mm/swapfile.c b/mm/swapfile.c index 71db6d8a1..cca9fda9d 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3373,6 +3373,19 @@ int swapcache_prepare(swp_entry_t entry) return __swap_duplicate(entry, SWAP_HAS_CACHE); } +void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry) +{ + struct swap_cluster_info *ci; + unsigned long offset = swp_offset(entry); + unsigned char usage; + + ci = lock_cluster_or_swap_info(si, offset); + usage = __swap_entry_free_locked(si, offset, SWAP_HAS_CACHE); + unlock_cluster_or_swap_info(si, ci); + if (!usage) + free_swap_slot(entry); +} + struct swap_info_struct *swp_swap_info(swp_entry_t entry) { return swap_type_to_swap_info(swp_type(entry)); diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 650ab6cfd..992a0a168 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -327,6 +327,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long src_start, unsigned long len, + atomic_t *mmap_changing, enum mcopy_atomic_mode mode, bool wp_copy) { @@ -445,6 +446,15 @@ retry: goto out; } mmap_read_lock(dst_mm); + /* + * If memory mappings are changing because of non-cooperative + * operation (e.g. mremap) running in parallel, bail out and + * request the user to retry later + */ + if (mmap_changing && atomic_read(mmap_changing)) { + err = -EAGAIN; + break; + } dst_vma = NULL; goto retry; @@ -480,6 +490,7 @@ extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long src_start, unsigned long len, + atomic_t *mmap_changing, enum mcopy_atomic_mode mode, bool wp_copy); #endif /* CONFIG_HUGETLB_PAGE */ @@ -601,8 +612,8 @@ retry: */ if (is_vm_hugetlb_page(dst_vma)) return __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start, - src_start, len, mcopy_mode, - wp_copy); + src_start, len, mmap_changing, + mcopy_mode, wp_copy); if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma)) goto out_unlock; diff --git a/mm/zswap.c b/mm/zswap.c index b3829ada4..b7cb12679 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1013,6 +1013,8 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) if (zswap_rb_search(&tree->rbroot, entry->offset) != entry) { spin_unlock(&tree->lock); delete_from_swap_cache(page_folio(page)); + unlock_page(page); + put_page(page); ret = -ENOMEM; goto fail; } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 6a1db678d..a8932d449 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1049,6 +1049,7 @@ static void hci_error_reset(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, error_reset); + hci_dev_hold(hdev); BT_DBG("%s", hdev->name); if (hdev->hw_error) @@ -1056,10 +1057,10 @@ static void hci_error_reset(struct work_struct *work) else bt_dev_err(hdev, "hardware error 0x%2.2x", hdev->hw_error_code); - if (hci_dev_do_close(hdev)) - return; + if (!hci_dev_do_close(hdev)) + hci_dev_do_open(hdev); - hci_dev_do_open(hdev); + hci_dev_put(hdev); } void hci_uuids_clear(struct hci_dev *hdev) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 56ecc5f97..452d839c1 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5282,9 +5282,12 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, void *data, hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); - if (!conn || !hci_conn_ssp_enabled(conn)) + if (!conn || !hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) goto unlock; + /* Assume remote supports SSP since it has triggered this event */ + set_bit(HCI_CONN_SSP_ENABLED, &conn->flags); + hci_conn_hold(conn); if (!hci_dev_test_flag(hdev, HCI_MGMT)) @@ -6716,6 +6719,10 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, void *data, return send_conn_param_neg_reply(hdev, handle, HCI_ERROR_UNKNOWN_CONN_ID); + if (max > hcon->le_conn_max_interval) + return send_conn_param_neg_reply(hdev, handle, + HCI_ERROR_INVALID_LL_PARAMS); + if (hci_check_conn_params(min, max, latency, timeout)) return send_conn_param_neg_reply(hdev, handle, HCI_ERROR_INVALID_LL_PARAMS); @@ -7245,10 +7252,10 @@ static void hci_store_wake_reason(struct hci_dev *hdev, u8 event, * keep track of the bdaddr of the connection event that woke us up. */ if (event == HCI_EV_CONN_REQUEST) { - bacpy(&hdev->wake_addr, &conn_complete->bdaddr); + bacpy(&hdev->wake_addr, &conn_request->bdaddr); hdev->wake_addr_type = BDADDR_BREDR; } else if (event == HCI_EV_CONN_COMPLETE) { - bacpy(&hdev->wake_addr, &conn_request->bdaddr); + bacpy(&hdev->wake_addr, &conn_complete->bdaddr); hdev->wake_addr_type = BDADDR_BREDR; } else if (event == HCI_EV_LE_META) { struct hci_ev_le_meta *le_ev = (void *)skb->data; diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index d74fe13f3..a33734046 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -2251,8 +2251,11 @@ static int hci_le_add_accept_list_sync(struct hci_dev *hdev, /* During suspend, only wakeable devices can be in acceptlist */ if (hdev->suspended && - !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP)) + !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP)) { + hci_le_del_accept_list_sync(hdev, ¶ms->addr, + params->addr_type); return 0; + } /* Select filter policy to accept all advertising */ if (*num_entries >= hdev->le_accept_list_size) @@ -3780,12 +3783,14 @@ static int hci_set_event_mask_sync(struct hci_dev *hdev) if (lmp_bredr_capable(hdev)) { events[4] |= 0x01; /* Flow Specification Complete */ - /* Don't set Disconnect Complete when suspended as that - * would wakeup the host when disconnecting due to - * suspend. + /* Don't set Disconnect Complete and mode change when + * suspended as that would wakeup the host when disconnecting + * due to suspend. */ - if (hdev->suspended) + if (hdev->suspended) { events[0] &= 0xef; + events[2] &= 0xf7; + } } else { /* Use a different default for LE-only devices */ memset(events, 0, sizeof(events)); @@ -5480,7 +5485,7 @@ static int hci_inquiry_sync(struct hci_dev *hdev, u8 length) bt_dev_dbg(hdev, ""); - if (hci_dev_test_flag(hdev, HCI_INQUIRY)) + if (test_bit(HCI_INQUIRY, &hdev->flags)) return 0; hci_dev_lock(hdev); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 4c5793053..b4cba55be 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5614,7 +5614,13 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, memset(&rsp, 0, sizeof(rsp)); - err = hci_check_conn_params(min, max, latency, to_multiplier); + if (max > hcon->le_conn_max_interval) { + BT_DBG("requested connection interval exceeds current bounds."); + err = -EINVAL; + } else { + err = hci_check_conn_params(min, max, latency, to_multiplier); + } + if (err) rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED); else @@ -6527,7 +6533,8 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, if (len > skb->len || !cmd->ident) { BT_DBG("corrupted command"); l2cap_sig_send_rej(conn, cmd->ident); - break; + skb_pull(skb, len > skb->len ? skb->len : len); + continue; } err = l2cap_bredr_sig_cmd(conn, cmd, len, skb->data); diff --git a/net/bridge/br_cfm_netlink.c b/net/bridge/br_cfm_netlink.c index 5c4c369f8..2faab4465 100644 --- a/net/bridge/br_cfm_netlink.c +++ b/net/bridge/br_cfm_netlink.c @@ -362,7 +362,7 @@ static int br_cc_ccm_tx_parse(struct net_bridge *br, struct nlattr *attr, memset(&tx_info, 0, sizeof(tx_info)); - instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_RDI_INSTANCE]); + instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE]); nla_memcpy(&tx_info.dmac.addr, tb[IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC], sizeof(tx_info.dmac.addr)); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index db4f2641d..9765f9f9b 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1607,6 +1607,10 @@ static void br_ip6_multicast_querier_expired(struct timer_list *t) } #endif +static void br_multicast_query_delay_expired(struct timer_list *t) +{ +} + static void br_multicast_select_own_querier(struct net_bridge_mcast *brmctx, struct br_ip *ip, struct sk_buff *skb) @@ -3024,7 +3028,7 @@ br_multicast_update_query_timer(struct net_bridge_mcast *brmctx, unsigned long max_delay) { if (!timer_pending(&query->timer)) - query->delay_time = jiffies + max_delay; + mod_timer(&query->delay_timer, jiffies + max_delay); mod_timer(&query->timer, jiffies + brmctx->multicast_querier_interval); } @@ -3867,13 +3871,11 @@ void br_multicast_ctx_init(struct net_bridge *br, brmctx->multicast_querier_interval = 255 * HZ; brmctx->multicast_membership_interval = 260 * HZ; - brmctx->ip4_other_query.delay_time = 0; brmctx->ip4_querier.port_ifidx = 0; seqcount_spinlock_init(&brmctx->ip4_querier.seq, &br->multicast_lock); brmctx->multicast_igmp_version = 2; #if IS_ENABLED(CONFIG_IPV6) brmctx->multicast_mld_version = 1; - brmctx->ip6_other_query.delay_time = 0; brmctx->ip6_querier.port_ifidx = 0; seqcount_spinlock_init(&brmctx->ip6_querier.seq, &br->multicast_lock); #endif @@ -3882,6 +3884,8 @@ void br_multicast_ctx_init(struct net_bridge *br, br_ip4_multicast_local_router_expired, 0); timer_setup(&brmctx->ip4_other_query.timer, br_ip4_multicast_querier_expired, 0); + timer_setup(&brmctx->ip4_other_query.delay_timer, + br_multicast_query_delay_expired, 0); timer_setup(&brmctx->ip4_own_query.timer, br_ip4_multicast_query_expired, 0); #if IS_ENABLED(CONFIG_IPV6) @@ -3889,6 +3893,8 @@ void br_multicast_ctx_init(struct net_bridge *br, br_ip6_multicast_local_router_expired, 0); timer_setup(&brmctx->ip6_other_query.timer, br_ip6_multicast_querier_expired, 0); + timer_setup(&brmctx->ip6_other_query.delay_timer, + br_multicast_query_delay_expired, 0); timer_setup(&brmctx->ip6_own_query.timer, br_ip6_multicast_query_expired, 0); #endif @@ -4023,10 +4029,12 @@ static void __br_multicast_stop(struct net_bridge_mcast *brmctx) { del_timer_sync(&brmctx->ip4_mc_router_timer); del_timer_sync(&brmctx->ip4_other_query.timer); + del_timer_sync(&brmctx->ip4_other_query.delay_timer); del_timer_sync(&brmctx->ip4_own_query.timer); #if IS_ENABLED(CONFIG_IPV6) del_timer_sync(&brmctx->ip6_mc_router_timer); del_timer_sync(&brmctx->ip6_other_query.timer); + del_timer_sync(&brmctx->ip6_other_query.delay_timer); del_timer_sync(&brmctx->ip6_own_query.timer); #endif } @@ -4469,13 +4477,15 @@ int br_multicast_set_querier(struct net_bridge_mcast *brmctx, unsigned long val) max_delay = brmctx->multicast_query_response_interval; if (!timer_pending(&brmctx->ip4_other_query.timer)) - brmctx->ip4_other_query.delay_time = jiffies + max_delay; + mod_timer(&brmctx->ip4_other_query.delay_timer, + jiffies + max_delay); br_multicast_start_querier(brmctx, &brmctx->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) if (!timer_pending(&brmctx->ip6_other_query.timer)) - brmctx->ip6_other_query.delay_time = jiffies + max_delay; + mod_timer(&brmctx->ip6_other_query.delay_timer, + jiffies + max_delay); br_multicast_start_querier(brmctx, &brmctx->ip6_own_query); #endif diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 202ad43e3..bff48d576 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -43,6 +43,10 @@ #include <linux/sysctl.h> #endif +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include <net/netfilter/nf_conntrack_core.h> +#endif + static unsigned int brnf_net_id __read_mostly; struct brnf_net { @@ -553,6 +557,90 @@ static unsigned int br_nf_pre_routing(void *priv, return NF_STOLEN; } +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +/* conntracks' nf_confirm logic cannot handle cloned skbs referencing + * the same nf_conn entry, which will happen for multicast (broadcast) + * Frames on bridges. + * + * Example: + * macvlan0 + * br0 + * ethX ethY + * + * ethX (or Y) receives multicast or broadcast packet containing + * an IP packet, not yet in conntrack table. + * + * 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting. + * -> skb->_nfct now references a unconfirmed entry + * 2. skb is broad/mcast packet. bridge now passes clones out on each bridge + * interface. + * 3. skb gets passed up the stack. + * 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb + * and schedules a work queue to send them out on the lower devices. + * + * The clone skb->_nfct is not a copy, it is the same entry as the + * original skb. The macvlan rx handler then returns RX_HANDLER_PASS. + * 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb. + * + * The Macvlan broadcast worker and normal confirm path will race. + * + * This race will not happen if step 2 already confirmed a clone. In that + * case later steps perform skb_clone() with skb->_nfct already confirmed (in + * hash table). This works fine. + * + * But such confirmation won't happen when eb/ip/nftables rules dropped the + * packets before they reached the nf_confirm step in postrouting. + * + * Work around this problem by explicit confirmation of the entry at + * LOCAL_IN time, before upper layer has a chance to clone the unconfirmed + * entry. + * + */ +static unsigned int br_nf_local_in(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nf_conntrack *nfct = skb_nfct(skb); + const struct nf_ct_hook *ct_hook; + struct nf_conn *ct; + int ret; + + if (!nfct || skb->pkt_type == PACKET_HOST) + return NF_ACCEPT; + + ct = container_of(nfct, struct nf_conn, ct_general); + if (likely(nf_ct_is_confirmed(ct))) + return NF_ACCEPT; + + WARN_ON_ONCE(skb_shared(skb)); + WARN_ON_ONCE(refcount_read(&nfct->use) != 1); + + /* We can't call nf_confirm here, it would create a dependency + * on nf_conntrack module. + */ + ct_hook = rcu_dereference(nf_ct_hook); + if (!ct_hook) { + skb->_nfct = 0ul; + nf_conntrack_put(nfct); + return NF_ACCEPT; + } + + nf_bridge_pull_encap_header(skb); + ret = ct_hook->confirm(skb); + switch (ret & NF_VERDICT_MASK) { + case NF_STOLEN: + return NF_STOLEN; + default: + nf_bridge_push_encap_header(skb); + break; + } + + ct = container_of(nfct, struct nf_conn, ct_general); + WARN_ON_ONCE(!nf_ct_is_confirmed(ct)); + + return ret; +} +#endif /* PF_BRIDGE/FORWARD *************************************************/ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) @@ -962,6 +1050,14 @@ static const struct nf_hook_ops br_nf_ops[] = { .hooknum = NF_BR_PRE_ROUTING, .priority = NF_BR_PRI_BRNF, }, +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + { + .hook = br_nf_local_in, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_LOCAL_IN, + .priority = NF_BR_PRI_LAST, + }, +#endif { .hook = br_nf_forward_ip, .pf = NFPROTO_BRIDGE, diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 06e5f6faa..51d010f64 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -77,7 +77,7 @@ struct bridge_mcast_own_query { /* other querier */ struct bridge_mcast_other_query { struct timer_list timer; - unsigned long delay_time; + struct timer_list delay_timer; }; /* selected querier */ @@ -1083,7 +1083,7 @@ __br_multicast_querier_exists(struct net_bridge_mcast *brmctx, own_querier_enabled = false; } - return time_is_before_jiffies(querier->delay_time) && + return !timer_pending(&querier->delay_timer) && (own_querier_enabled || timer_pending(&querier->timer)); } diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 4b3982c36..b61ef2dff 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -593,21 +593,40 @@ br_switchdev_mdb_replay_one(struct notifier_block *nb, struct net_device *dev, } static int br_switchdev_mdb_queue_one(struct list_head *mdb_list, + struct net_device *dev, + unsigned long action, enum switchdev_obj_id id, const struct net_bridge_mdb_entry *mp, struct net_device *orig_dev) { - struct switchdev_obj_port_mdb *mdb; + struct switchdev_obj_port_mdb mdb = { + .obj = { + .id = id, + .orig_dev = orig_dev, + }, + }; + struct switchdev_obj_port_mdb *pmdb; - mdb = kzalloc(sizeof(*mdb), GFP_ATOMIC); - if (!mdb) - return -ENOMEM; + br_switchdev_mdb_populate(&mdb, mp); + + if (action == SWITCHDEV_PORT_OBJ_ADD && + switchdev_port_obj_act_is_deferred(dev, action, &mdb.obj)) { + /* This event is already in the deferred queue of + * events, so this replay must be elided, lest the + * driver receives duplicate events for it. This can + * only happen when replaying additions, since + * modifications are always immediately visible in + * br->mdb_list, whereas actual event delivery may be + * delayed. + */ + return 0; + } - mdb->obj.id = id; - mdb->obj.orig_dev = orig_dev; - br_switchdev_mdb_populate(mdb, mp); - list_add_tail(&mdb->obj.list, mdb_list); + pmdb = kmemdup(&mdb, sizeof(mdb), GFP_ATOMIC); + if (!pmdb) + return -ENOMEM; + list_add_tail(&pmdb->obj.list, mdb_list); return 0; } @@ -675,51 +694,50 @@ br_switchdev_mdb_replay(struct net_device *br_dev, struct net_device *dev, if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) return 0; - /* We cannot walk over br->mdb_list protected just by the rtnl_mutex, - * because the write-side protection is br->multicast_lock. But we - * need to emulate the [ blocking ] calling context of a regular - * switchdev event, so since both br->multicast_lock and RCU read side - * critical sections are atomic, we have no choice but to pick the RCU - * read side lock, queue up all our events, leave the critical section - * and notify switchdev from blocking context. + if (adding) + action = SWITCHDEV_PORT_OBJ_ADD; + else + action = SWITCHDEV_PORT_OBJ_DEL; + + /* br_switchdev_mdb_queue_one() will take care to not queue a + * replay of an event that is already pending in the switchdev + * deferred queue. In order to safely determine that, there + * must be no new deferred MDB notifications enqueued for the + * duration of the MDB scan. Therefore, grab the write-side + * lock to avoid racing with any concurrent IGMP/MLD snooping. */ - rcu_read_lock(); + spin_lock_bh(&br->multicast_lock); - hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) { + hlist_for_each_entry(mp, &br->mdb_list, mdb_node) { struct net_bridge_port_group __rcu * const *pp; const struct net_bridge_port_group *p; if (mp->host_joined) { - err = br_switchdev_mdb_queue_one(&mdb_list, + err = br_switchdev_mdb_queue_one(&mdb_list, dev, action, SWITCHDEV_OBJ_ID_HOST_MDB, mp, br_dev); if (err) { - rcu_read_unlock(); + spin_unlock_bh(&br->multicast_lock); goto out_free_mdb; } } - for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL; + for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { if (p->key.port->dev != dev) continue; - err = br_switchdev_mdb_queue_one(&mdb_list, + err = br_switchdev_mdb_queue_one(&mdb_list, dev, action, SWITCHDEV_OBJ_ID_PORT_MDB, mp, dev); if (err) { - rcu_read_unlock(); + spin_unlock_bh(&br->multicast_lock); goto out_free_mdb; } } } - rcu_read_unlock(); - - if (adding) - action = SWITCHDEV_PORT_OBJ_ADD; - else - action = SWITCHDEV_PORT_OBJ_DEL; + spin_unlock_bh(&br->multicast_lock); list_for_each_entry(obj, &mdb_list, list) { err = br_switchdev_mdb_replay_one(nb, dev, @@ -780,6 +798,16 @@ static void nbp_switchdev_unsync_objs(struct net_bridge_port *p, br_switchdev_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL); br_switchdev_vlan_replay(br_dev, ctx, false, blocking_nb, NULL); + + /* Make sure that the device leaving this bridge has seen all + * relevant events before it is disassociated. In the normal + * case, when the device is directly attached to the bridge, + * this is covered by del_nbp(). If the association was indirect + * however, e.g. via a team or bond, and the device is leaving + * that intermediate device, then the bridge port remains in + * place. + */ + switchdev_deferred_process(); } /* Let the bridge know that this port is offloaded, so that it can assign a diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index 06d94b2c6..c7c27ada6 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -291,6 +291,30 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb, return nf_conntrack_in(skb, &bridge_state); } +static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + if (skb->pkt_type == PACKET_HOST) + return NF_ACCEPT; + + /* nf_conntrack_confirm() cannot handle concurrent clones, + * this happens for broad/multicast frames with e.g. macvlan on top + * of the bridge device. + */ + ct = nf_ct_get(skb, &ctinfo); + if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) + return NF_ACCEPT; + + /* let inet prerouting call conntrack again */ + skb->_nfct = 0; + nf_ct_put(ct); + + return NF_ACCEPT; +} + static void nf_ct_bridge_frag_save(struct sk_buff *skb, struct nf_bridge_frag_data *data) { @@ -416,6 +440,12 @@ static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = { .priority = NF_IP_PRI_CONNTRACK, }, { + .hook = nf_ct_bridge_in, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_LOCAL_IN, + .priority = NF_IP_PRI_CONNTRACK_CONFIRM, + }, + { .hook = nf_ct_bridge_post, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_POST_ROUTING, diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h index 16af1a7f8..31a93cae5 100644 --- a/net/can/j1939/j1939-priv.h +++ b/net/can/j1939/j1939-priv.h @@ -86,7 +86,7 @@ struct j1939_priv { unsigned int tp_max_packet_size; /* lock for j1939_socks list */ - spinlock_t j1939_socks_lock; + rwlock_t j1939_socks_lock; struct list_head j1939_socks; struct kref rx_kref; @@ -301,6 +301,7 @@ struct j1939_sock { int ifindex; struct j1939_addr addr; + spinlock_t filters_lock; struct j1939_filter *filters; int nfilters; pgn_t pgn_rx_filter; diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c index ecff1c947..a6fb89fa6 100644 --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -274,7 +274,7 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev) return ERR_PTR(-ENOMEM); j1939_tp_init(priv); - spin_lock_init(&priv->j1939_socks_lock); + rwlock_init(&priv->j1939_socks_lock); INIT_LIST_HEAD(&priv->j1939_socks); mutex_lock(&j1939_netdev_lock); diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index b0be23559..58909b365 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -80,16 +80,16 @@ static void j1939_jsk_add(struct j1939_priv *priv, struct j1939_sock *jsk) jsk->state |= J1939_SOCK_BOUND; j1939_priv_get(priv); - spin_lock_bh(&priv->j1939_socks_lock); + write_lock_bh(&priv->j1939_socks_lock); list_add_tail(&jsk->list, &priv->j1939_socks); - spin_unlock_bh(&priv->j1939_socks_lock); + write_unlock_bh(&priv->j1939_socks_lock); } static void j1939_jsk_del(struct j1939_priv *priv, struct j1939_sock *jsk) { - spin_lock_bh(&priv->j1939_socks_lock); + write_lock_bh(&priv->j1939_socks_lock); list_del_init(&jsk->list); - spin_unlock_bh(&priv->j1939_socks_lock); + write_unlock_bh(&priv->j1939_socks_lock); j1939_priv_put(priv); jsk->state &= ~J1939_SOCK_BOUND; @@ -262,12 +262,17 @@ static bool j1939_sk_match_dst(struct j1939_sock *jsk, static bool j1939_sk_match_filter(struct j1939_sock *jsk, const struct j1939_sk_buff_cb *skcb) { - const struct j1939_filter *f = jsk->filters; - int nfilter = jsk->nfilters; + const struct j1939_filter *f; + int nfilter; + + spin_lock_bh(&jsk->filters_lock); + + f = jsk->filters; + nfilter = jsk->nfilters; if (!nfilter) /* receive all when no filters are assigned */ - return true; + goto filter_match_found; for (; nfilter; ++f, --nfilter) { if ((skcb->addr.pgn & f->pgn_mask) != f->pgn) @@ -276,9 +281,15 @@ static bool j1939_sk_match_filter(struct j1939_sock *jsk, continue; if ((skcb->addr.src_name & f->name_mask) != f->name) continue; - return true; + goto filter_match_found; } + + spin_unlock_bh(&jsk->filters_lock); return false; + +filter_match_found: + spin_unlock_bh(&jsk->filters_lock); + return true; } static bool j1939_sk_recv_match_one(struct j1939_sock *jsk, @@ -329,13 +340,13 @@ bool j1939_sk_recv_match(struct j1939_priv *priv, struct j1939_sk_buff_cb *skcb) struct j1939_sock *jsk; bool match = false; - spin_lock_bh(&priv->j1939_socks_lock); + read_lock_bh(&priv->j1939_socks_lock); list_for_each_entry(jsk, &priv->j1939_socks, list) { match = j1939_sk_recv_match_one(jsk, skcb); if (match) break; } - spin_unlock_bh(&priv->j1939_socks_lock); + read_unlock_bh(&priv->j1939_socks_lock); return match; } @@ -344,11 +355,11 @@ void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb) { struct j1939_sock *jsk; - spin_lock_bh(&priv->j1939_socks_lock); + read_lock_bh(&priv->j1939_socks_lock); list_for_each_entry(jsk, &priv->j1939_socks, list) { j1939_sk_recv_one(jsk, skb); } - spin_unlock_bh(&priv->j1939_socks_lock); + read_unlock_bh(&priv->j1939_socks_lock); } static void j1939_sk_sock_destruct(struct sock *sk) @@ -401,6 +412,7 @@ static int j1939_sk_init(struct sock *sk) atomic_set(&jsk->skb_pending, 0); spin_lock_init(&jsk->sk_session_queue_lock); INIT_LIST_HEAD(&jsk->sk_session_queue); + spin_lock_init(&jsk->filters_lock); /* j1939_sk_sock_destruct() depends on SOCK_RCU_FREE flag */ sock_set_flag(sk, SOCK_RCU_FREE); @@ -703,9 +715,11 @@ static int j1939_sk_setsockopt(struct socket *sock, int level, int optname, } lock_sock(&jsk->sk); + spin_lock_bh(&jsk->filters_lock); ofilters = jsk->filters; jsk->filters = filters; jsk->nfilters = count; + spin_unlock_bh(&jsk->filters_lock); release_sock(&jsk->sk); kfree(ofilters); return 0; @@ -1080,12 +1094,12 @@ void j1939_sk_errqueue(struct j1939_session *session, } /* spread RX notifications to all sockets subscribed to this session */ - spin_lock_bh(&priv->j1939_socks_lock); + read_lock_bh(&priv->j1939_socks_lock); list_for_each_entry(jsk, &priv->j1939_socks, list) { if (j1939_sk_recv_match_one(jsk, &session->skcb)) __j1939_sk_errqueue(session, &jsk->sk, type); } - spin_unlock_bh(&priv->j1939_socks_lock); + read_unlock_bh(&priv->j1939_socks_lock); }; void j1939_sk_send_loop_abort(struct sock *sk, int err) @@ -1273,7 +1287,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv) struct j1939_sock *jsk; int error_code = ENETDOWN; - spin_lock_bh(&priv->j1939_socks_lock); + read_lock_bh(&priv->j1939_socks_lock); list_for_each_entry(jsk, &priv->j1939_socks, list) { jsk->sk.sk_err = error_code; if (!sock_flag(&jsk->sk, SOCK_DEAD)) @@ -1281,7 +1295,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv) j1939_sk_queue_drop_all(priv, jsk, error_code); } - spin_unlock_bh(&priv->j1939_socks_lock); + read_unlock_bh(&priv->j1939_socks_lock); } static int j1939_sk_no_ioctlcmd(struct socket *sock, unsigned int cmd, diff --git a/net/core/dev.c b/net/core/dev.c index 1ba3662fa..60619fe8a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8861,7 +8861,7 @@ EXPORT_SYMBOL(dev_set_mac_address_user); int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name) { - size_t size = sizeof(sa->sa_data); + size_t size = sizeof(sa->sa_data_min); struct net_device *dev; int ret = 0; diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 7674bb9f3..5cdbfbf9a 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -342,7 +342,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, if (ifr->ifr_hwaddr.sa_family != dev->type) return -EINVAL; memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, - min(sizeof(ifr->ifr_hwaddr.sa_data), + min(sizeof(ifr->ifr_hwaddr.sa_data_min), (size_t)dev->addr_len)); call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return 0; diff --git a/net/core/filter.c b/net/core/filter.c index 3a6110ea4..cb7c4651e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5752,6 +5752,12 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; struct fib_table *tb; + if (flags & BPF_FIB_LOOKUP_TBID) { + tbid = params->tbid; + /* zero out for vlan output */ + params->tbid = 0; + } + tb = fib_get_table(net, tbid); if (unlikely(!tb)) return BPF_FIB_LKUP_RET_NOT_FWDED; @@ -5803,6 +5809,9 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, params->rt_metric = res.fi->fib_priority; params->ifindex = dev->ifindex; + if (flags & BPF_FIB_LOOKUP_SRC) + params->ipv4_src = fib_result_prefsrc(net, &res); + /* xdp and cls_bpf programs are run in RCU-bh so * rcu_read_lock_bh is not needed here */ @@ -5885,6 +5894,12 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; struct fib6_table *tb; + if (flags & BPF_FIB_LOOKUP_TBID) { + tbid = params->tbid; + /* zero out for vlan output */ + params->tbid = 0; + } + tb = ipv6_stub->fib6_get_table(net, tbid); if (unlikely(!tb)) return BPF_FIB_LKUP_RET_NOT_FWDED; @@ -5939,6 +5954,18 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, params->rt_metric = res.f6i->fib6_metric; params->ifindex = dev->ifindex; + if (flags & BPF_FIB_LOOKUP_SRC) { + if (res.f6i->fib6_prefsrc.plen) { + *src = res.f6i->fib6_prefsrc.addr; + } else { + err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev, + &fl6.daddr, 0, + src); + if (err) + return BPF_FIB_LKUP_RET_NO_SRC_ADDR; + } + } + if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH) goto set_fwd_params; @@ -5957,7 +5984,8 @@ set_fwd_params: #endif #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \ - BPF_FIB_LOOKUP_SKIP_NEIGH) + BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \ + BPF_FIB_LOOKUP_SRC) BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, struct bpf_fib_lookup *, params, int, plen, u32, flags) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 7cf1e42d7..ac379e459 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -5026,10 +5026,9 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; struct net_device *dev; - struct nlattr *br_spec, *attr = NULL; + struct nlattr *br_spec, *attr, *br_flags_attr = NULL; int rem, err = -EOPNOTSUPP; u16 flags = 0; - bool have_flags = false; if (nlmsg_len(nlh) < sizeof(*ifm)) return -EINVAL; @@ -5047,11 +5046,11 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); if (br_spec) { nla_for_each_nested(attr, br_spec, rem) { - if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !have_flags) { + if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !br_flags_attr) { if (nla_len(attr) < sizeof(flags)) return -EINVAL; - have_flags = true; + br_flags_attr = attr; flags = nla_get_u16(attr); } @@ -5095,8 +5094,8 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, } } - if (have_flags) - memcpy(nla_data(attr), &flags, sizeof(flags)); + if (br_flags_attr) + memcpy(nla_data(br_flags_attr), &flags, sizeof(flags)); out: return err; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8a819d0a7..d4bd10f87 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4213,8 +4213,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, /* GSO partial only requires that we trim off any excess that * doesn't fit into an MSS sized block, so take care of that * now. + * Cap len to not accidentally hit GSO_BY_FRAGS. */ - partial_segs = len / mss; + partial_segs = min(len, GSO_BY_FRAGS - 1U) / mss; if (partial_segs > 1) mss *= partial_segs; else diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 3818035ea..39643f78c 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -1217,8 +1217,11 @@ static void sk_psock_verdict_data_ready(struct sock *sk) rcu_read_lock(); psock = sk_psock(sk); - if (psock) - psock->saved_data_ready(sk); + if (psock) { + read_lock_bh(&sk->sk_callback_lock); + sk_psock_data_ready(sk, psock); + read_unlock_bh(&sk->sk_callback_lock); + } rcu_read_unlock(); } } diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index b1e86a726..83906d093 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -291,7 +291,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master, skb = hsr_init_skb(master); if (!skb) { - WARN_ONCE(1, "HSR: Could not send supervision frame\n"); + netdev_warn_once(master->dev, "HSR: Could not send supervision frame\n"); return; } @@ -338,7 +338,7 @@ static void send_prp_supervision_frame(struct hsr_port *master, skb = hsr_init_skb(master); if (!skb) { - WARN_ONCE(1, "PRP: Could not send supervision frame\n"); + netdev_warn_once(master->dev, "PRP: Could not send supervision frame\n"); return; } diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 80cdc6f6b..0323ab502 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -83,7 +83,7 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb) return false; /* Get next tlv */ - total_length += sizeof(struct hsr_sup_tlv) + hsr_sup_tag->tlv.HSR_TLV_length; + total_length += hsr_sup_tag->tlv.HSR_TLV_length; if (!pskb_may_pull(skb, total_length)) return false; skb_pull(skb, total_length); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 2f646335d..9408dc3bb 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1637,10 +1637,12 @@ EXPORT_SYMBOL(inet_current_timestamp); int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { - if (sk->sk_family == AF_INET) + unsigned int family = READ_ONCE(sk->sk_family); + + if (family == AF_INET) return ip_recv_error(sk, msg, len, addr_len); #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) + if (family == AF_INET6) return pingv6_ops.ipv6_recv_error(sk, msg, len, addr_len); #endif return -EINVAL; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 9456f5bb3..ccff96820 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1125,7 +1125,8 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) if (neigh) { if (!(READ_ONCE(neigh->nud_state) & NUD_NOARP)) { read_lock_bh(&neigh->lock); - memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len); + memcpy(r->arp_ha.sa_data, neigh->ha, + min(dev->addr_len, (unsigned char)sizeof(r->arp_ha.sa_data_min))); r->arp_flags = arp_state_to_flags(neigh); read_unlock_bh(&neigh->lock); r->arp_ha.sa_family = dev->type; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 35d6e74be..bb0d1252c 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1804,6 +1804,21 @@ done: return err; } +/* Combine dev_addr_genid and dev_base_seq to detect changes. + */ +static u32 inet_base_seq(const struct net *net) +{ + u32 res = atomic_read(&net->ipv4.dev_addr_genid) + + net->dev_base_seq; + + /* Must not return 0 (see nl_dump_check_consistent()). + * Chose a value far away from 0. + */ + if (!res) + res = 0x80000000; + return res; +} + static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) { const struct nlmsghdr *nlh = cb->nlh; @@ -1855,8 +1870,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) idx = 0; head = &tgt_net->dev_index_head[h]; rcu_read_lock(); - cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^ - tgt_net->dev_base_seq; + cb->seq = inet_base_seq(tgt_net); hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; @@ -2257,8 +2271,7 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb, idx = 0; head = &net->dev_index_head[h]; rcu_read_lock(); - cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^ - net->dev_base_seq; + cb->seq = inet_base_seq(net); hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index f2ed2aed0..56776e1b1 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -1111,10 +1111,33 @@ ok: return 0; error: + if (sk_hashed(sk)) { + spinlock_t *lock = inet_ehash_lockp(hinfo, sk->sk_hash); + + sock_prot_inuse_add(net, sk->sk_prot, -1); + + spin_lock(lock); + sk_nulls_del_node_init_rcu(sk); + spin_unlock(lock); + + sk->sk_hash = 0; + inet_sk(sk)->inet_sport = 0; + inet_sk(sk)->inet_num = 0; + + if (tw) + inet_twsk_bind_unhash(tw, hinfo); + } + spin_unlock(&head2->lock); if (tb_created) inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb); - spin_unlock_bh(&head->lock); + spin_unlock(&head->lock); + + if (tw) + inet_twsk_deschedule_put(tw); + + local_bh_enable(); + return -ENOMEM; } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e19ef88ae..a6d460aae 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1268,6 +1268,12 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, if (unlikely(!rt)) return -EFAULT; + cork->fragsize = ip_sk_use_pmtu(sk) ? + dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu); + + if (!inetdev_valid_mtu(cork->fragsize)) + return -ENETUNREACH; + /* * setup for corking. */ @@ -1284,12 +1290,6 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, cork->addr = ipc->addr; } - cork->fragsize = ip_sk_use_pmtu(sk) ? - dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu); - - if (!inetdev_valid_mtu(cork->fragsize)) - return -ENETUNREACH; - cork->gso_size = ipc->gso_size; cork->dst = &rt->dst; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index c1fb7580e..a00ba2d51 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1406,12 +1406,13 @@ e_inval: * ipv4_pktinfo_prepare - transfer some info from rtable to skb * @sk: socket * @skb: buffer + * @drop_dst: if true, drops skb dst * * To support IP_CMSG_PKTINFO option, we store rt_iif and specific * destination in skb->cb[] before dst drop. * This way, receiver doesn't make cache line misses to read rtable. */ -void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) +void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb, bool drop_dst) { struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); bool prepare = (inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) || @@ -1440,7 +1441,8 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) pktinfo->ipi_ifindex = 0; pktinfo->ipi_spec_dst.s_addr = 0; } - skb_dst_drop(skb); + if (drop_dst) + skb_dst_drop(skb); } int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 24961b304..328f9068c 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -540,6 +540,20 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, return 0; } +static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom) +{ + /* we must cap headroom to some upperlimit, else pskb_expand_head + * will overflow header offsets in skb_headers_offset_update(). + */ + static const unsigned int max_allowed = 512; + + if (headroom > max_allowed) + headroom = max_allowed; + + if (headroom > READ_ONCE(dev->needed_headroom)) + WRITE_ONCE(dev->needed_headroom, headroom); +} + void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto, int tunnel_hlen) { @@ -614,13 +628,13 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; - if (headroom > READ_ONCE(dev->needed_headroom)) - WRITE_ONCE(dev->needed_headroom, headroom); - - if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) { + if (skb_cow_head(skb, headroom)) { ip_rt_put(rt); goto tx_dropped; } + + ip_tunnel_adj_headroom(dev, headroom); + iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); return; @@ -800,16 +814,16 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); - if (max_headroom > READ_ONCE(dev->needed_headroom)) - WRITE_ONCE(dev->needed_headroom, max_headroom); - if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) { + if (skb_cow_head(skb, max_headroom)) { ip_rt_put(rt); dev->stats.tx_dropped++; kfree_skb(skb); return; } + ip_tunnel_adj_headroom(dev, max_headroom); + iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); return; diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 586b1b3e3..80ccd6661 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -332,7 +332,7 @@ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu) }; skb_reset_network_header(skb); - csum = csum_partial(icmp6h, len, 0); + csum = skb_checksum(skb, skb_transport_offset(skb), len, 0); icmp6h->icmp6_cksum = csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, len, IPPROTO_ICMPV6, csum); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index b80719747..d5421c38c 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1073,7 +1073,7 @@ static int ipmr_cache_report(const struct mr_table *mrt, msg = (struct igmpmsg *)skb_network_header(skb); msg->im_vif = vifi; msg->im_vif_hi = vifi >> 8; - ipv4_pktinfo_prepare(mroute_sk, pkt); + ipv4_pktinfo_prepare(mroute_sk, pkt, false); memcpy(skb->cb, pkt->cb, sizeof(skb->cb)); /* Add our header */ igmp = skb_put(skb, sizeof(struct igmphdr)); diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 407376299..fc761915c 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -279,6 +279,7 @@ void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb, goto free_nskb; nf_ct_attach(nskb, oldskb); + nf_ct_set_closing(skb_nfct(oldskb)); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) /* If we use ip_local_out for bridged traffic, the MAC source on diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 639aa5abd..7c63b91ed 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -286,11 +286,13 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) { + enum skb_drop_reason reason; + /* Charge it to the socket. */ - ipv4_pktinfo_prepare(sk, skb); - if (sock_queue_rcv_skb(sk, skb) < 0) { - kfree_skb(skb); + ipv4_pktinfo_prepare(sk, skb, true); + if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) { + kfree_skb_reason(skb, reason); return NET_RX_DROP; } @@ -301,7 +303,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb) { if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { atomic_inc(&sk->sk_drops); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_XFRM_POLICY); return NET_RX_DROP; } nf_reset_ct(skb); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 90e24c3f6..86e7695d9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1934,7 +1934,17 @@ static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb, static bool can_map_frag(const skb_frag_t *frag) { - return skb_frag_size(frag) == PAGE_SIZE && !skb_frag_off(frag); + struct page *page; + + if (skb_frag_size(frag) != PAGE_SIZE || skb_frag_off(frag)) + return false; + + page = skb_frag_page(frag); + + if (PageCompound(page) || page->mapping) + return false; + + return true; } static int find_next_mappable_frag(const skb_frag_t *frag, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 11c0e1c66..87d759bab 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2196,7 +2196,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) udp_csum_pull_header(skb); - ipv4_pktinfo_prepare(sk, skb); + ipv4_pktinfo_prepare(sk, skb, true); return __udp_queue_rcv_skb(sk, skb); csum_error: diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b8dc20fe7..164837369 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -706,6 +706,22 @@ errout: return err; } +/* Combine dev_addr_genid and dev_base_seq to detect changes. + */ +static u32 inet6_base_seq(const struct net *net) +{ + u32 res = atomic_read(&net->ipv6.dev_addr_genid) + + net->dev_base_seq; + + /* Must not return 0 (see nl_dump_check_consistent()). + * Chose a value far away from 0. + */ + if (!res) + res = 0x80000000; + return res; +} + + static int inet6_netconf_dump_devconf(struct sk_buff *skb, struct netlink_callback *cb) { @@ -739,8 +755,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb, idx = 0; head = &net->dev_index_head[h]; rcu_read_lock(); - cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ - net->dev_base_seq; + cb->seq = inet6_base_seq(net); hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; @@ -5326,7 +5341,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, } rcu_read_lock(); - cb->seq = atomic_read(&tgt_net->ipv6.dev_addr_genid) ^ tgt_net->dev_base_seq; + cb->seq = inet6_base_seq(tgt_net); for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &tgt_net->dev_index_head[h]; @@ -5458,9 +5473,10 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, } addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer); - if (!addr) - return -EINVAL; - + if (!addr) { + err = -EINVAL; + goto errout; + } ifm = nlmsg_data(nlh); if (ifm->ifa_index) dev = dev_get_by_index(tgt_net, ifm->ifa_index); diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 507a8353a..c008d2192 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -220,19 +220,26 @@ const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) { EXPORT_SYMBOL_GPL(ipv6_stub); /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ -const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; +const struct in6_addr in6addr_loopback __aligned(BITS_PER_LONG/8) + = IN6ADDR_LOOPBACK_INIT; EXPORT_SYMBOL(in6addr_loopback); -const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; +const struct in6_addr in6addr_any __aligned(BITS_PER_LONG/8) + = IN6ADDR_ANY_INIT; EXPORT_SYMBOL(in6addr_any); -const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT; +const struct in6_addr in6addr_linklocal_allnodes __aligned(BITS_PER_LONG/8) + = IN6ADDR_LINKLOCAL_ALLNODES_INIT; EXPORT_SYMBOL(in6addr_linklocal_allnodes); -const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_linklocal_allrouters __aligned(BITS_PER_LONG/8) + = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; EXPORT_SYMBOL(in6addr_linklocal_allrouters); -const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT; +const struct in6_addr in6addr_interfacelocal_allnodes __aligned(BITS_PER_LONG/8) + = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT; EXPORT_SYMBOL(in6addr_interfacelocal_allnodes); -const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_interfacelocal_allrouters __aligned(BITS_PER_LONG/8) + = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT; EXPORT_SYMBOL(in6addr_interfacelocal_allrouters); -const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_sitelocal_allrouters __aligned(BITS_PER_LONG/8) + = IN6ADDR_SITELOCAL_ALLROUTERS_INIT; EXPORT_SYMBOL(in6addr_sitelocal_allrouters); static void snmp6_free_dev(struct inet6_dev *idev) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 0b42eb8c5..62247621c 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1077,6 +1077,7 @@ static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { .udp6_lib_lookup = __udp6_lib_lookup, .ipv6_setsockopt = do_ipv6_setsockopt, .ipv6_getsockopt = do_ipv6_getsockopt, + .ipv6_dev_get_saddr = ipv6_dev_get_saddr, }; static int __init inet6_init(void) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 5fa0e3730..1cfdd9d95 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -180,6 +180,8 @@ static bool ip6_parse_tlv(bool hopbyhop, case IPV6_TLV_IOAM: if (!ipv6_hop_ioam(skb, off)) return false; + + nh = skb_network_header(skb); break; case IPV6_TLV_JUMBO: if (!ipv6_hop_jumbo(skb, off)) @@ -974,6 +976,14 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) if (!skb_valid_dst(skb)) ip6_route_input(skb); + /* About to mangle packet header */ + if (skb_ensure_writable(skb, optoff + 2 + hdr->opt_len)) + goto drop; + + /* Trace pointer may have changed */ + trace = (struct ioam6_trace_hdr *)(skb_network_header(skb) + + optoff + sizeof(*hdr)); + ioam6_fill_trace_data(skb, ns, trace, true); break; default: diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 9125e92d9..2699915bb 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -796,8 +796,8 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, struct sk_buff *skb), bool log_ecn_err) { - const struct ipv6hdr *ipv6h = ipv6_hdr(skb); - int err; + const struct ipv6hdr *ipv6h; + int nh, err; if ((!(tpi->flags & TUNNEL_CSUM) && (tunnel->parms.i_flags & TUNNEL_CSUM)) || @@ -829,7 +829,6 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, goto drop; } - ipv6h = ipv6_hdr(skb); skb->protocol = eth_type_trans(skb, tunnel->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); } else { @@ -837,7 +836,23 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, skb_reset_mac_header(skb); } + /* Save offset of outer header relative to skb->head, + * because we are going to reset the network header to the inner header + * and might change skb->head. + */ + nh = skb_network_header(skb) - skb->head; + skb_reset_network_header(skb); + + if (!pskb_inet_may_pull(skb)) { + DEV_STATS_INC(tunnel->dev, rx_length_errors); + DEV_STATS_INC(tunnel->dev, rx_errors); + goto drop; + } + + /* Get the outer header. */ + ipv6h = (struct ipv6hdr *)(skb->head + nh); + memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 433d98bbe..71d692728 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -344,6 +344,7 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb, nf_reject_ip6_tcphdr_put(nskb, oldskb, otcph, otcplen); nf_ct_attach(nskb, oldskb); + nf_ct_set_closing(skb_nfct(oldskb)); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) /* If we use ip6_local_out for bridged traffic, the MAC source on diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7f65dc750..887599d35 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5335,19 +5335,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, err_nh = NULL; list_for_each_entry(nh, &rt6_nh_list, next) { err = __ip6_ins_rt(nh->fib6_info, info, extack); - fib6_info_release(nh->fib6_info); - - if (!err) { - /* save reference to last route successfully inserted */ - rt_last = nh->fib6_info; - - /* save reference to first route for notification */ - if (!rt_notif) - rt_notif = nh->fib6_info; - } - /* nh->fib6_info is used or freed at this point, reset to NULL*/ - nh->fib6_info = NULL; if (err) { if (replace && nhn) NL_SET_ERR_MSG_MOD(extack, @@ -5355,6 +5343,12 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, err_nh = nh; goto add_errout; } + /* save reference to last route successfully inserted */ + rt_last = nh->fib6_info; + + /* save reference to first route for notification */ + if (!rt_notif) + rt_notif = nh->fib6_info; /* Because each route is added like a single route we remove * these flags after the first nexthop: if there is a collision, @@ -5415,8 +5409,7 @@ add_errout: cleanup: list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) { - if (nh->fib6_info) - fib6_info_release(nh->fib6_info); + fib6_info_release(nh->fib6_info); list_del(&nh->next); kfree(nh); } diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 29346a6ee..35508abd7 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -512,22 +512,24 @@ int __init seg6_init(void) { int err; - err = genl_register_family(&seg6_genl_family); + err = register_pernet_subsys(&ip6_segments_ops); if (err) goto out; - err = register_pernet_subsys(&ip6_segments_ops); + err = genl_register_family(&seg6_genl_family); if (err) - goto out_unregister_genl; + goto out_unregister_pernet; #ifdef CONFIG_IPV6_SEG6_LWTUNNEL err = seg6_iptunnel_init(); if (err) - goto out_unregister_pernet; + goto out_unregister_genl; err = seg6_local_init(); - if (err) - goto out_unregister_pernet; + if (err) { + seg6_iptunnel_exit(); + goto out_unregister_genl; + } #endif #ifdef CONFIG_IPV6_SEG6_HMAC @@ -548,11 +550,11 @@ out_unregister_iptun: #endif #endif #ifdef CONFIG_IPV6_SEG6_LWTUNNEL -out_unregister_pernet: - unregister_pernet_subsys(&ip6_segments_ops); -#endif out_unregister_genl: genl_unregister_family(&seg6_genl_family); +#endif +out_unregister_pernet: + unregister_pernet_subsys(&ip6_segments_ops); goto out; } diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 314ec3a51..bb92dc8b8 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -630,7 +630,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) back_from_confirm: lock_sock(sk); - ulen = len + skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0; + ulen = len + (skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0); err = ip6_append_data(sk, ip_generic_getfrag, msg, ulen, transhdrlen, &ipc6, &fl6, (struct rt6_info *)dst, diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 19c478bd8..8e3be0009 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -226,6 +226,8 @@ static int llc_ui_release(struct socket *sock) } netdev_put(llc->dev, &llc->dev_tracker); sock_put(sk); + sock_orphan(sk); + sock->sk = NULL; llc_sk_free(sk); out: return 0; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index a2c486608..6cf0b7783 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1775,6 +1775,8 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, sband->band); } + ieee80211_sta_set_rx_nss(link_sta); + return ret; } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index c07645c99..f25dc6931 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7221,8 +7221,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, rcu_read_lock(); beacon_ies = rcu_dereference(req->bss->beacon_ies); - - if (beacon_ies) { + if (!beacon_ies) { /* * Wait up to one beacon interval ... * should this be more if we miss one? @@ -7295,6 +7294,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true, req->reason_code, false); + drv_mgd_complete_tx(sdata->local, sdata, &info); return 0; } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index f3d6c3e4c..bd56015b2 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -891,6 +891,8 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) if (ieee80211_vif_is_mesh(&sdata->vif)) mesh_accept_plinks_update(sdata); + ieee80211_check_fast_xmit(sta); + return 0; out_remove: if (sta->sta.valid_links) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 2db103a56..3d62e8b71 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5,7 +5,7 @@ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * Transmit and frame generation functions. */ @@ -3044,7 +3044,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) sdata->vif.type == NL80211_IFTYPE_STATION) goto out; - if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED) || !sta->uploaded) goto out; if (test_sta_flag(sta, WLAN_STA_PS_STA) || @@ -3838,6 +3838,7 @@ begin: goto begin; skb = __skb_dequeue(&tx.skbs); + info = IEEE80211_SKB_CB(skb); if (!skb_queue_empty(&tx.skbs)) { spin_lock_bh(&fq->lock); @@ -3882,7 +3883,7 @@ begin: } encap_out: - IEEE80211_SKB_CB(skb)->control.vif = vif; + info->control.vif = vif; if (tx.sta && wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) { diff --git a/net/mctp/route.c b/net/mctp/route.c index 68be8f2b6..0144d8ebd 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -663,7 +663,7 @@ struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, spin_unlock_irqrestore(&mns->keys_lock, flags); if (!tagbits) { - kfree(key); + mctp_key_unref(key); return ERR_PTR(-EBUSY); } @@ -888,7 +888,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, dev = dev_get_by_index_rcu(sock_net(sk), cb->ifindex); if (!dev) { rcu_read_unlock(); - return rc; + goto out_free; } rt->dev = __mctp_dev_get(dev); rcu_read_unlock(); @@ -903,7 +903,8 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, rt->mtu = 0; } else { - return -EINVAL; + rc = -EINVAL; + goto out_free; } spin_lock_irqsave(&rt->dev->addrs_lock, flags); @@ -966,12 +967,17 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, rc = mctp_do_fragment_route(rt, skb, mtu, tag); } + /* route output functions consume the skb, even on error */ + skb = NULL; + out_release: if (!ext_rt) mctp_route_release(rt); mctp_dev_put(tmp_rt.dev); +out_free: + kfree_skb(skb); return rc; } diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index a53658674..7017dd606 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -13,17 +13,22 @@ #include <uapi/linux/mptcp.h> #include "protocol.h" -static int subflow_get_info(const struct sock *sk, struct sk_buff *skb) +static int subflow_get_info(struct sock *sk, struct sk_buff *skb) { struct mptcp_subflow_context *sf; struct nlattr *start; u32 flags = 0; + bool slow; int err; + if (inet_sk_state_load(sk) == TCP_LISTEN) + return 0; + start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP); if (!start) return -EMSGSIZE; + slow = lock_sock_fast(sk); rcu_read_lock(); sf = rcu_dereference(inet_csk(sk)->icsk_ulp_data); if (!sf) { @@ -63,17 +68,19 @@ static int subflow_get_info(const struct sock *sk, struct sk_buff *skb) sf->map_data_len) || nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_FLAGS, flags) || nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_REM, sf->remote_id) || - nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, sf->local_id)) { + nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, subflow_get_local_id(sf))) { err = -EMSGSIZE; goto nla_failure; } rcu_read_unlock(); + unlock_sock_fast(sk, slow); nla_nest_end(skb, start); return 0; nla_failure: rcu_read_unlock(); + unlock_sock_fast(sk, slow); nla_nest_cancel(skb, start); return err; } diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 980050f6b..3328870b0 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -407,23 +407,12 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk) } } -static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned int nr, - const struct mptcp_addr_info *addr) -{ - int i; - - for (i = 0; i < nr; i++) { - if (addrs[i].id == addr->id) - return true; - } - - return false; -} - /* Fill all the remote addresses into the array addrs[], * and return the array size. */ -static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullmesh, +static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, + struct mptcp_addr_info *local, + bool fullmesh, struct mptcp_addr_info *addrs) { bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0); @@ -446,15 +435,28 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm msk->pm.subflows++; addrs[i++] = remote; } else { + DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); + + /* Forbid creation of new subflows matching existing + * ones, possibly already created by incoming ADD_ADDR + */ + bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); + mptcp_for_each_subflow(msk, subflow) + if (READ_ONCE(subflow->local_id) == local->id) + __set_bit(subflow->remote_id, unavail_id); + mptcp_for_each_subflow(msk, subflow) { ssk = mptcp_subflow_tcp_sock(subflow); remote_address((struct sock_common *)ssk, &addrs[i]); - addrs[i].id = subflow->remote_id; + addrs[i].id = READ_ONCE(subflow->remote_id); if (deny_id0 && !addrs[i].id) continue; - if (!lookup_address_in_vec(addrs, i, &addrs[i]) && - msk->pm.subflows < subflows_max) { + if (msk->pm.subflows < subflows_max) { + /* forbid creating multiple address towards + * this id + */ + __set_bit(addrs[i].id, unavail_id); msk->pm.subflows++; i++; } @@ -603,7 +605,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH); msk->pm.local_addr_used++; - nr = fill_remote_addresses_vec(msk, fullmesh, addrs); + nr = fill_remote_addresses_vec(msk, &local->addr, fullmesh, addrs); if (nr) __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); spin_unlock_bh(&msk->pm.lock); @@ -798,18 +800,18 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, mptcp_for_each_subflow_safe(msk, subflow, tmp) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + u8 remote_id = READ_ONCE(subflow->remote_id); int how = RCV_SHUTDOWN | SEND_SHUTDOWN; - u8 id = subflow->local_id; + u8 id = subflow_get_local_id(subflow); - if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id) + if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id) continue; if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id)) continue; pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u", rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", - i, rm_id, subflow->local_id, subflow->remote_id, - msk->mpc_endpoint_id); + i, rm_id, id, remote_id, msk->mpc_endpoint_id); spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, how); @@ -900,7 +902,8 @@ static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry) } static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, - struct mptcp_pm_addr_entry *entry) + struct mptcp_pm_addr_entry *entry, + bool needs_id) { struct mptcp_pm_addr_entry *cur, *del_entry = NULL; unsigned int addr_max; @@ -942,7 +945,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, } } - if (!entry->addr.id) { + if (!entry->addr.id && needs_id) { find_next: entry->addr.id = find_next_zero_bit(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1, @@ -953,7 +956,7 @@ find_next: } } - if (!entry->addr.id) + if (!entry->addr.id && needs_id) goto out; __set_bit(entry->addr.id, pernet->id_bitmap); @@ -1095,7 +1098,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) entry->ifindex = 0; entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT; entry->lsk = NULL; - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry); + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true); if (ret < 0) kfree(entry); @@ -1311,6 +1314,18 @@ next: return 0; } +static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr, + struct genl_info *info) +{ + struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; + + if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr, + mptcp_pm_addr_policy, info->extack) && + tb[MPTCP_PM_ADDR_ATTR_ID]) + return true; + return false; +} + static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; @@ -1352,7 +1367,8 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) goto out_free; } } - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry); + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, + !mptcp_pm_has_addr_attr_id(attr, info)); if (ret < 0) { GENL_SET_ERR_MSG(info, "too many addresses or duplicate one"); goto out_free; @@ -2014,7 +2030,7 @@ static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk) if (WARN_ON_ONCE(!sf)) return -EINVAL; - if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, sf->local_id)) + if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, subflow_get_local_id(sf))) return -EMSGSIZE; if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id)) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 38cbdc66d..414ed70e7 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -25,8 +25,9 @@ void mptcp_free_local_addr_list(struct mptcp_sock *msk) } } -int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, - struct mptcp_pm_addr_entry *entry) +static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *entry, + bool needs_id) { DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); struct mptcp_pm_addr_entry *match = NULL; @@ -41,7 +42,7 @@ int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, spin_lock_bh(&msk->pm.lock); list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) { addr_match = mptcp_addresses_equal(&e->addr, &entry->addr, true); - if (addr_match && entry->addr.id == 0) + if (addr_match && entry->addr.id == 0 && needs_id) entry->addr.id = e->addr.id; id_match = (e->addr.id == entry->addr.id); if (addr_match && id_match) { @@ -64,7 +65,7 @@ int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, } *e = *entry; - if (!e->addr.id) + if (!e->addr.id && needs_id) e->addr.id = find_next_zero_bit(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1, 1); @@ -132,10 +133,21 @@ int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc) { - struct mptcp_pm_addr_entry new_entry; + struct mptcp_pm_addr_entry *entry = NULL, *e, new_entry; __be16 msk_sport = ((struct inet_sock *) inet_sk((struct sock *)msk))->inet_sport; + spin_lock_bh(&msk->pm.lock); + list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) { + if (mptcp_addresses_equal(&e->addr, skc, false)) { + entry = e; + break; + } + } + spin_unlock_bh(&msk->pm.lock); + if (entry) + return entry->addr.id; + memset(&new_entry, 0, sizeof(struct mptcp_pm_addr_entry)); new_entry.addr = *skc; new_entry.addr.id = 0; @@ -144,7 +156,7 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, if (new_entry.addr.port == msk_sport) new_entry.addr.port = 0; - return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry); + return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry, true); } int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) @@ -186,7 +198,7 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) goto announce_err; } - err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val); + err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val, false); if (err < 0) { GENL_SET_ERR_MSG(info, "did not match address and id"); goto announce_err; @@ -210,6 +222,40 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) return err; } +static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk, + struct genl_info *info) +{ + struct mptcp_rm_list list = { .nr = 0 }; + struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + bool has_id_0 = false; + int err = -EINVAL; + + lock_sock(sk); + mptcp_for_each_subflow(msk, subflow) { + if (READ_ONCE(subflow->local_id) == 0) { + has_id_0 = true; + break; + } + } + if (!has_id_0) { + GENL_SET_ERR_MSG(info, "address with id 0 not found"); + goto remove_err; + } + + list.ids[list.nr++] = 0; + + spin_lock_bh(&msk->pm.lock); + mptcp_pm_remove_addr(msk, &list); + spin_unlock_bh(&msk->pm.lock); + + err = 0; + +remove_err: + release_sock(sk); + return err; +} + int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info) { struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; @@ -241,6 +287,11 @@ int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info) goto remove_err; } + if (id_val == 0) { + err = mptcp_userspace_pm_remove_id_zero_address(msk, info); + goto remove_err; + } + lock_sock((struct sock *)msk); list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { @@ -324,7 +375,7 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info) } local.addr = addr_l; - err = mptcp_userspace_pm_append_new_local_addr(msk, &local); + err = mptcp_userspace_pm_append_new_local_addr(msk, &local, false); if (err < 0) { GENL_SET_ERR_MSG(info, "did not match address and id"); goto create_err; @@ -438,6 +489,16 @@ int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info) goto destroy_err; } +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + if (addr_l.family == AF_INET && ipv6_addr_v4mapped(&addr_r.addr6)) { + ipv6_addr_set_v4mapped(addr_l.addr.s_addr, &addr_l.addr6); + addr_l.family = AF_INET6; + } + if (addr_r.family == AF_INET && ipv6_addr_v4mapped(&addr_l.addr6)) { + ipv6_addr_set_v4mapped(addr_r.addr.s_addr, &addr_r.addr6); + addr_r.family = AF_INET6; + } +#endif if (addr_l.family != addr_r.family) { GENL_SET_ERR_MSG(info, "address families do not match"); err = -EINVAL; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 76539d100..3bc215814 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -119,7 +119,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) subflow->request_mptcp = 1; /* This is the first subflow, always with id 0 */ - subflow->local_id_valid = 1; + WRITE_ONCE(subflow->local_id, 0); mptcp_sock_graft(msk->first, sk->sk_socket); return 0; @@ -1319,6 +1319,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, mpext = skb_ext_find(skb, SKB_EXT_MPTCP); if (!mptcp_skb_can_collapse_to(data_seq, skb, mpext)) { TCP_SKB_CB(skb)->eor = 1; + tcp_mark_push(tcp_sk(ssk), skb); goto alloc_skb; } @@ -1582,8 +1583,11 @@ static void mptcp_update_post_push(struct mptcp_sock *msk, void mptcp_check_and_set_pending(struct sock *sk) { - if (mptcp_send_head(sk)) - mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING); + if (mptcp_send_head(sk)) { + mptcp_data_lock(sk); + mptcp_sk(sk)->cb_flags |= BIT(MPTCP_PUSH_PENDING); + mptcp_data_unlock(sk); + } } void __mptcp_push_pending(struct sock *sk, unsigned int flags) @@ -2336,9 +2340,6 @@ bool __mptcp_retransmit_pending_data(struct sock *sk) if (__mptcp_check_fallback(mptcp_sk(sk))) return false; - if (tcp_rtx_and_write_queues_empty(sk)) - return false; - /* the closing socket has some data untransmitted and/or unacked: * some data in the mptcp rtx queue has not really xmitted yet. * keep it simple and re-inject the whole mptcp level rtx queue @@ -2422,7 +2423,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, goto out_release; } - dispose_it = !msk->subflow || ssk != msk->subflow->sk; + dispose_it = msk->free_first || ssk != msk->first; if (dispose_it) list_del(&subflow->node); @@ -2440,7 +2441,8 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk); if (!dispose_it) { __mptcp_subflow_disconnect(ssk, subflow, flags); - msk->subflow->state = SS_UNCONNECTED; + if (msk->subflow && ssk == msk->subflow->sk) + msk->subflow->state = SS_UNCONNECTED; release_sock(ssk); goto out; @@ -3144,7 +3146,6 @@ static int mptcp_disconnect(struct sock *sk, int flags) msk->last_snd = NULL; WRITE_ONCE(msk->flags, 0); msk->cb_flags = 0; - msk->push_pending = 0; msk->recovery = false; msk->can_ack = false; msk->fully_established = false; @@ -3168,8 +3169,50 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk) return (struct ipv6_pinfo *)(((u8 *)sk) + offset); } + +static void mptcp_copy_ip6_options(struct sock *newsk, const struct sock *sk) +{ + const struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_txoptions *opt; + struct ipv6_pinfo *newnp; + + newnp = inet6_sk(newsk); + + rcu_read_lock(); + opt = rcu_dereference(np->opt); + if (opt) { + opt = ipv6_dup_options(newsk, opt); + if (!opt) + net_warn_ratelimited("%s: Failed to copy ip6 options\n", __func__); + } + RCU_INIT_POINTER(newnp->opt, opt); + rcu_read_unlock(); +} #endif +static void mptcp_copy_ip_options(struct sock *newsk, const struct sock *sk) +{ + struct ip_options_rcu *inet_opt, *newopt = NULL; + const struct inet_sock *inet = inet_sk(sk); + struct inet_sock *newinet; + + newinet = inet_sk(newsk); + + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); + if (inet_opt) { + newopt = sock_kmalloc(newsk, sizeof(*inet_opt) + + inet_opt->opt.optlen, GFP_ATOMIC); + if (newopt) + memcpy(newopt, inet_opt, sizeof(*inet_opt) + + inet_opt->opt.optlen); + else + net_warn_ratelimited("%s: Failed to copy ip options\n", __func__); + } + RCU_INIT_POINTER(newinet->inet_opt, newopt); + rcu_read_unlock(); +} + struct sock *mptcp_sk_clone_init(const struct sock *sk, const struct mptcp_options_received *mp_opt, struct sock *ssk, @@ -3190,6 +3233,13 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, __mptcp_init_sock(nsk); +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + if (nsk->sk_family == AF_INET6) + mptcp_copy_ip6_options(nsk, sk); + else +#endif + mptcp_copy_ip_options(nsk, sk); + msk = mptcp_sk(nsk); msk->local_key = subflow_req->local_key; msk->token = subflow_req->token; @@ -3202,7 +3252,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, msk->write_seq = subflow_req->idsn + 1; msk->snd_nxt = msk->write_seq; msk->snd_una = msk->write_seq; - msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd; + msk->wnd_end = msk->snd_nxt + tcp_sk(ssk)->snd_wnd; msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq; if (mp_opt->suboptions & OPTIONS_MPTCP_MPC) { @@ -3341,10 +3391,10 @@ static void mptcp_destroy(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - /* clears msk->subflow, allowing the following to close - * even the initial subflow - */ mptcp_dispose_initial_subflow(msk); + + /* allow the following to close even the initial subflow */ + msk->free_first = 1; mptcp_destroy_common(msk, 0); sk_sockets_allocated_dec(sk); } @@ -3388,8 +3438,7 @@ static void mptcp_release_cb(struct sock *sk) struct mptcp_sock *msk = mptcp_sk(sk); for (;;) { - unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) | - msk->push_pending; + unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED); struct list_head join_list; if (!flags) @@ -3405,7 +3454,6 @@ static void mptcp_release_cb(struct sock *sk) * datapath acquires the msk socket spinlock while helding * the subflow socket lock */ - msk->push_pending = 0; msk->cb_flags &= ~flags; spin_unlock_bh(&sk->sk_lock.slock); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 4ec8e0a81..2bc37773e 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -272,7 +272,6 @@ struct mptcp_sock { int rmem_released; unsigned long flags; unsigned long cb_flags; - unsigned long push_pending; bool recovery; /* closing subflow write queue reinjected */ bool can_ack; bool fully_established; @@ -287,7 +286,8 @@ struct mptcp_sock { cork:1, nodelay:1, fastopening:1, - in_accept_queue:1; + in_accept_queue:1, + free_first:1; struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; @@ -475,7 +475,6 @@ struct mptcp_subflow_context { can_ack : 1, /* only after processing the remote a key */ disposable : 1, /* ctx can be free at ulp release time */ stale : 1, /* unable to snd/rcv data, do not use for xmit */ - local_id_valid : 1, /* local_id is correctly initialized */ valid_csum_seen : 1; /* at least one csum validated */ enum mptcp_data_avail data_avail; u32 remote_nonce; @@ -483,7 +482,7 @@ struct mptcp_subflow_context { u32 local_nonce; u32 remote_token; u8 hmac[MPTCPOPT_HMAC_LEN]; - u8 local_id; + s16 local_id; /* if negative not initialized yet */ u8 remote_id; u8 reset_seen:1; u8 reset_transient:1; @@ -529,6 +528,7 @@ mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow) { memset(&subflow->reset, 0, sizeof(subflow->reset)); subflow->request_mptcp = 1; + WRITE_ONCE(subflow->local_id, -1); } static inline u64 @@ -834,8 +834,6 @@ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list); void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, struct list_head *rm_list); -int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, - struct mptcp_pm_addr_entry *entry); void mptcp_free_local_addr_list(struct mptcp_sock *msk); int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info); int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info); @@ -911,6 +909,15 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); +static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow) +{ + int local_id = READ_ONCE(subflow->local_id); + + if (local_id < 0) + return 0; + return local_id; +} + void __init mptcp_pm_nl_init(void); void mptcp_pm_nl_work(struct mptcp_sock *msk); void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 45d20e20c..891c2f4fe 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -446,7 +446,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->backup = mp_opt.backup; subflow->thmac = mp_opt.thmac; subflow->remote_nonce = mp_opt.nonce; - subflow->remote_id = mp_opt.join_id; + WRITE_ONCE(subflow->remote_id, mp_opt.join_id); pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d", subflow, subflow->thmac, subflow->remote_nonce, subflow->backup); @@ -489,8 +489,8 @@ do_reset: static void subflow_set_local_id(struct mptcp_subflow_context *subflow, int local_id) { - subflow->local_id = local_id; - subflow->local_id_valid = 1; + WARN_ON_ONCE(local_id < 0 || local_id > 255); + WRITE_ONCE(subflow->local_id, local_id); } static int subflow_chk_local_id(struct sock *sk) @@ -499,7 +499,7 @@ static int subflow_chk_local_id(struct sock *sk) struct mptcp_sock *msk = mptcp_sk(subflow->conn); int err; - if (likely(subflow->local_id_valid)) + if (likely(subflow->local_id >= 0)) return 0; err = mptcp_pm_get_local_id(msk, (struct sock_common *)sk); @@ -1477,7 +1477,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk, remote_token, local_id, remote_id); subflow->remote_token = remote_token; - subflow->remote_id = remote_id; + WRITE_ONCE(subflow->remote_id, remote_id); subflow->request_join = 1; subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP); mptcp_info2sockaddr(remote, &addr, ssk->sk_family); @@ -1630,6 +1630,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk, pr_debug("subflow=%p", ctx); ctx->tcp_sock = sk; + WRITE_ONCE(ctx->local_id, -1); return ctx; } @@ -1867,13 +1868,13 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->idsn = subflow_req->idsn; /* this is the first subflow, id is always 0 */ - new_ctx->local_id_valid = 1; + subflow_set_local_id(new_ctx, 0); } else if (subflow_req->mp_join) { new_ctx->ssn_offset = subflow_req->ssn_offset; new_ctx->mp_join = 1; new_ctx->fully_established = 1; new_ctx->backup = subflow_req->backup; - new_ctx->remote_id = subflow_req->remote_id; + WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id); new_ctx->token = subflow_req->token; new_ctx->thmac = subflow_req->thmac; diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 55a7f72d5..edf920742 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -707,6 +707,22 @@ void nf_conntrack_destroy(struct nf_conntrack *nfct) } EXPORT_SYMBOL(nf_conntrack_destroy); +void nf_ct_set_closing(struct nf_conntrack *nfct) +{ + const struct nf_ct_hook *ct_hook; + + if (!nfct) + return; + + rcu_read_lock(); + ct_hook = rcu_dereference(nf_ct_hook); + if (ct_hook) + ct_hook->set_closing(nfct); + + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(nf_ct_set_closing); + bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, const struct sk_buff *skb) { diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 26ab0e961..9523104a9 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -28,6 +28,7 @@ #define mtype_del IPSET_TOKEN(MTYPE, _del) #define mtype_list IPSET_TOKEN(MTYPE, _list) #define mtype_gc IPSET_TOKEN(MTYPE, _gc) +#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc) #define mtype MTYPE #define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id))) @@ -57,9 +58,6 @@ mtype_destroy(struct ip_set *set) { struct mtype *map = set->data; - if (SET_WITH_TIMEOUT(set)) - del_timer_sync(&map->gc); - if (set->dsize && set->extensions & IPSET_EXT_DESTROY) mtype_ext_cleanup(set); ip_set_free(map->members); @@ -288,6 +286,15 @@ mtype_gc(struct timer_list *t) add_timer(&map->gc); } +static void +mtype_cancel_gc(struct ip_set *set) +{ + struct mtype *map = set->data; + + if (SET_WITH_TIMEOUT(set)) + del_timer_sync(&map->gc); +} + static const struct ip_set_type_variant mtype = { .kadt = mtype_kadt, .uadt = mtype_uadt, @@ -301,6 +308,7 @@ static const struct ip_set_type_variant mtype = { .head = mtype_head, .list = mtype_list, .same_set = mtype_same_set, + .cancel_gc = mtype_cancel_gc, }; #endif /* __IP_SET_BITMAP_IP_GEN_H */ diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index d47dfdcb8..f645da82d 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1156,6 +1156,7 @@ static int ip_set_create(struct sk_buff *skb, const struct nfnl_info *info, return ret; cleanup: + set->variant->cancel_gc(set); set->variant->destroy(set); put_out: module_put(set->type->me); @@ -1184,6 +1185,14 @@ ip_set_destroy_set(struct ip_set *set) kfree(set); } +static void +ip_set_destroy_set_rcu(struct rcu_head *head) +{ + struct ip_set *set = container_of(head, struct ip_set, rcu); + + ip_set_destroy_set(set); +} + static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const attr[]) { @@ -1195,8 +1204,6 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; - /* Must wait for flush to be really finished in list:set */ - rcu_barrier(); /* Commands are serialized and references are * protected by the ip_set_ref_lock. @@ -1208,8 +1215,10 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, * counter, so if it's already zero, we can proceed * without holding the lock. */ - read_lock_bh(&ip_set_ref_lock); if (!attr[IPSET_ATTR_SETNAME]) { + /* Must wait for flush to be really finished in list:set */ + rcu_barrier(); + read_lock_bh(&ip_set_ref_lock); for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); if (s && (s->ref || s->ref_netlink)) { @@ -1223,6 +1232,8 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, s = ip_set(inst, i); if (s) { ip_set(inst, i) = NULL; + /* Must cancel garbage collectors */ + s->variant->cancel_gc(s); ip_set_destroy_set(s); } } @@ -1230,6 +1241,9 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, inst->is_destroyed = false; } else { u32 flags = flag_exist(info->nlh); + u16 features = 0; + + read_lock_bh(&ip_set_ref_lock); s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &i); if (!s) { @@ -1240,10 +1254,16 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, ret = -IPSET_ERR_BUSY; goto out; } + features = s->type->features; ip_set(inst, i) = NULL; read_unlock_bh(&ip_set_ref_lock); - - ip_set_destroy_set(s); + if (features & IPSET_TYPE_NAME) { + /* Must wait for flush to be really finished */ + rcu_barrier(); + } + /* Must cancel garbage collectors */ + s->variant->cancel_gc(s); + call_rcu(&s->rcu, ip_set_destroy_set_rcu); } return 0; out: @@ -1396,9 +1416,6 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info, ip_set(inst, to_id) = from; write_unlock_bh(&ip_set_ref_lock); - /* Make sure all readers of the old set pointers are completed. */ - synchronize_rcu(); - return 0; } @@ -2364,6 +2381,7 @@ ip_set_net_exit(struct net *net) set = ip_set(inst, i); if (set) { ip_set(inst, i) = NULL; + set->variant->cancel_gc(set); ip_set_destroy_set(set); } } @@ -2411,8 +2429,11 @@ ip_set_fini(void) { nf_unregister_sockopt(&so_set); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); - unregister_pernet_subsys(&ip_set_net_ops); + + /* Wait for call_rcu() in destroy */ + rcu_barrier(); + pr_debug("these are the famous last words\n"); } diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 7499192af..ef04e556a 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -210,6 +210,7 @@ htable_size(u8 hbits) #undef mtype_gc_do #undef mtype_gc #undef mtype_gc_init +#undef mtype_cancel_gc #undef mtype_variant #undef mtype_data_match @@ -254,6 +255,7 @@ htable_size(u8 hbits) #define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do) #define mtype_gc IPSET_TOKEN(MTYPE, _gc) #define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) +#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc) #define mtype_variant IPSET_TOKEN(MTYPE, _variant) #define mtype_data_match IPSET_TOKEN(MTYPE, _data_match) @@ -417,7 +419,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy) u32 i; for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = __ipset_dereference(hbucket(t, i)); + n = (__force struct hbucket *)hbucket(t, i); if (!n) continue; if (set->extensions & IPSET_EXT_DESTROY && ext_destroy) @@ -437,10 +439,7 @@ mtype_destroy(struct ip_set *set) struct htype *h = set->data; struct list_head *l, *lt; - if (SET_WITH_TIMEOUT(set)) - cancel_delayed_work_sync(&h->gc.dwork); - - mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true); + mtype_ahash_destroy(set, (__force struct htable *)h->table, true); list_for_each_safe(l, lt, &h->ad) { list_del(l); kfree(l); @@ -586,6 +585,15 @@ mtype_gc_init(struct htable_gc *gc) queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ); } +static void +mtype_cancel_gc(struct ip_set *set) +{ + struct htype *h = set->data; + + if (SET_WITH_TIMEOUT(set)) + cancel_delayed_work_sync(&h->gc.dwork); +} + static int mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags); @@ -1416,6 +1424,7 @@ static const struct ip_set_type_variant mtype_variant = { .uref = mtype_uref, .resize = mtype_resize, .same_set = mtype_same_set, + .cancel_gc = mtype_cancel_gc, .region_lock = true, }; diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 5a67f7966..6bc701998 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -426,9 +426,6 @@ list_set_destroy(struct ip_set *set) struct list_set *map = set->data; struct set_elem *e, *n; - if (SET_WITH_TIMEOUT(set)) - del_timer_sync(&map->gc); - list_for_each_entry_safe(e, n, &map->members, list) { list_del(&e->list); ip_set_put_byindex(map->net, e->id); @@ -545,6 +542,15 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b) a->extensions == b->extensions; } +static void +list_set_cancel_gc(struct ip_set *set) +{ + struct list_set *map = set->data; + + if (SET_WITH_TIMEOUT(set)) + del_timer_sync(&map->gc); +} + static const struct ip_set_type_variant set_variant = { .kadt = list_set_kadt, .uadt = list_set_uadt, @@ -558,6 +564,7 @@ static const struct ip_set_type_variant set_variant = { .head = list_set_head, .list = list_set_list, .same_set = list_set_same_set, + .cancel_gc = list_set_cancel_gc, }; static void diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 796026296..024f93fc8 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2772,11 +2772,24 @@ err_cachep: return ret; } +static void nf_conntrack_set_closing(struct nf_conntrack *nfct) +{ + struct nf_conn *ct = nf_ct_to_nf_conn(nfct); + + switch (nf_ct_protonum(ct)) { + case IPPROTO_TCP: + nf_conntrack_tcp_set_closing(ct); + break; + } +} + static const struct nf_ct_hook nf_conntrack_hook = { .update = nf_conntrack_update, .destroy = nf_ct_destroy, .get_tuple_skb = nf_conntrack_get_tuple_skb, .attach = nf_conntrack_attach, + .set_closing = nf_conntrack_set_closing, + .confirm = __nf_conntrack_confirm, }; void nf_conntrack_init_end(void) diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index e697a824b..540d97715 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -533,6 +533,8 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, /* Get fields bitmap */ if (nf_h323_error_boundary(bs, 0, f->sz)) return H323_ERROR_BOUND; + if (f->sz > 32) + return H323_ERROR_RANGE; bmp = get_bitmap(bs, f->sz); if (base) *(unsigned int *)base = bmp; @@ -589,6 +591,8 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, bmp2_len = get_bits(bs, 7) + 1; if (nf_h323_error_boundary(bs, 0, bmp2_len)) return H323_ERROR_BOUND; + if (bmp2_len > 32) + return H323_ERROR_RANGE; bmp2 = get_bitmap(bs, bmp2_len); bmp |= bmp2 >> f->sz; if (base) diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index c94a9971d..7ffd69849 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -299,7 +299,7 @@ sctp_new(struct nf_conn *ct, const struct sk_buff *skb, pr_debug("Setting vtag %x for secondary conntrack\n", sh->vtag); ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag; - } else { + } else if (sch->type == SCTP_CID_SHUTDOWN_ACK) { /* If it is a shutdown ack OOTB packet, we expect a return shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */ pr_debug("Setting vtag %x for new conn OOTB\n", diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 3ac1af6f5..9480e638e 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -457,7 +457,8 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender, const struct sk_buff *skb, unsigned int dataoff, const struct tcphdr *tcph, - u32 end, u32 win) + u32 end, u32 win, + enum ip_conntrack_dir dir) { /* SYN-ACK in reply to a SYN * or SYN from reply direction in simultaneous open. @@ -471,7 +472,8 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender, * Both sides must send the Window Scale option * to enable window scaling in either direction. */ - if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE && + if (dir == IP_CT_DIR_REPLY && + !(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) { sender->td_scale = 0; receiver->td_scale = 0; @@ -542,7 +544,7 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir, if (tcph->syn) { tcp_init_sender(sender, receiver, skb, dataoff, tcph, - end, win); + end, win, dir); if (!tcph->ack) /* Simultaneous open */ return NFCT_TCP_ACCEPT; @@ -585,7 +587,7 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir, */ tcp_init_sender(sender, receiver, skb, dataoff, tcph, - end, win); + end, win, dir); if (dir == IP_CT_DIR_REPLY && !tcph->ack) return NFCT_TCP_ACCEPT; @@ -911,6 +913,41 @@ static bool tcp_can_early_drop(const struct nf_conn *ct) return false; } +void nf_conntrack_tcp_set_closing(struct nf_conn *ct) +{ + enum tcp_conntrack old_state; + const unsigned int *timeouts; + u32 timeout; + + if (!nf_ct_is_confirmed(ct)) + return; + + spin_lock_bh(&ct->lock); + old_state = ct->proto.tcp.state; + ct->proto.tcp.state = TCP_CONNTRACK_CLOSE; + + if (old_state == TCP_CONNTRACK_CLOSE || + test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) { + spin_unlock_bh(&ct->lock); + return; + } + + timeouts = nf_ct_timeout_lookup(ct); + if (!timeouts) { + const struct nf_tcp_net *tn; + + tn = nf_tcp_pernet(nf_ct_net(ct)); + timeouts = tn->timeouts; + } + + timeout = timeouts[TCP_CONNTRACK_CLOSE]; + WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp); + + spin_unlock_bh(&ct->lock); + + nf_conntrack_event_cache(IPCT_PROTOINFO, ct); +} + static void nf_ct_tcp_state_reset(struct ip_ct_tcp_state *state) { state->td_end = 0; diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index c1d99cb37..99195cf6b 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -87,12 +87,22 @@ static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple) return 0; } +static struct dst_entry *nft_route_dst_fetch(struct nf_flow_route *route, + enum flow_offload_tuple_dir dir) +{ + struct dst_entry *dst = route->tuple[dir].dst; + + route->tuple[dir].dst = NULL; + + return dst; +} + static int flow_offload_fill_route(struct flow_offload *flow, - const struct nf_flow_route *route, + struct nf_flow_route *route, enum flow_offload_tuple_dir dir) { struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple; - struct dst_entry *dst = route->tuple[dir].dst; + struct dst_entry *dst = nft_route_dst_fetch(route, dir); int i, j = 0; switch (flow_tuple->l3proto) { @@ -122,12 +132,10 @@ static int flow_offload_fill_route(struct flow_offload *flow, ETH_ALEN); flow_tuple->out.ifidx = route->tuple[dir].out.ifindex; flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex; + dst_release(dst); break; case FLOW_OFFLOAD_XMIT_XFRM: case FLOW_OFFLOAD_XMIT_NEIGH: - if (!dst_hold_safe(route->tuple[dir].dst)) - return -1; - flow_tuple->dst_cache = dst; flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple); break; @@ -148,27 +156,12 @@ static void nft_flow_dst_release(struct flow_offload *flow, dst_release(flow->tuplehash[dir].tuple.dst_cache); } -int flow_offload_route_init(struct flow_offload *flow, - const struct nf_flow_route *route) +void flow_offload_route_init(struct flow_offload *flow, + struct nf_flow_route *route) { - int err; - - err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL); - if (err < 0) - return err; - - err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY); - if (err < 0) - goto err_route_reply; - + flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL); + flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY); flow->type = NF_FLOW_OFFLOAD_ROUTE; - - return 0; - -err_route_reply: - nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL); - - return err; } EXPORT_SYMBOL_GPL(flow_offload_route_init); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 8a2929014..be93a0249 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -193,11 +193,12 @@ void nf_logger_put(int pf, enum nf_log_type type) return; } - BUG_ON(loggers[pf][type] == NULL); - rcu_read_lock(); logger = rcu_dereference(loggers[pf][type]); - module_put(logger->me); + if (!logger) + WARN_ON_ONCE(1); + else + module_put(logger->me); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(nf_logger_put); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 1edb21382..d3ba947f4 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -686,15 +686,16 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj) return err; } -static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, - struct nft_flowtable *flowtable) +static struct nft_trans * +nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, + struct nft_flowtable *flowtable) { struct nft_trans *trans; trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_flowtable)); if (trans == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); if (msg_type == NFT_MSG_NEWFLOWTABLE) nft_activate_next(ctx->net, flowtable); @@ -703,22 +704,22 @@ static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, nft_trans_flowtable(trans) = flowtable; nft_trans_commit_list_add_tail(ctx->net, trans); - return 0; + return trans; } static int nft_delflowtable(struct nft_ctx *ctx, struct nft_flowtable *flowtable) { - int err; + struct nft_trans *trans; - err = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable); - if (err < 0) - return err; + trans = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable); + if (IS_ERR(trans)) + return PTR_ERR(trans); nft_deactivate_next(ctx->net, flowtable); nft_use_dec(&ctx->table->use); - return err; + return 0; } static void __nft_reg_track_clobber(struct nft_regs_track *track, u8 dreg) @@ -1245,6 +1246,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx) return 0; err_register_hooks: + ctx->table->flags |= NFT_TABLE_F_DORMANT; nft_trans_destroy(trans); return ret; } @@ -2057,7 +2059,7 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net, struct nft_hook *hook; int err; - hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT); + hook = kzalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT); if (!hook) { err = -ENOMEM; goto err_hook_alloc; @@ -2458,19 +2460,15 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, RCU_INIT_POINTER(chain->blob_gen_0, blob); RCU_INIT_POINTER(chain->blob_gen_1, blob); - err = nf_tables_register_hook(net, table, chain); - if (err < 0) - goto err_destroy_chain; - if (!nft_use_inc(&table->use)) { err = -EMFILE; - goto err_use; + goto err_destroy_chain; } trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN); if (IS_ERR(trans)) { err = PTR_ERR(trans); - goto err_unregister_hook; + goto err_trans; } nft_trans_chain_policy(trans) = NFT_CHAIN_POLICY_UNSET; @@ -2478,17 +2476,22 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, nft_trans_chain_policy(trans) = policy; err = nft_chain_add(table, chain); - if (err < 0) { - nft_trans_destroy(trans); - goto err_unregister_hook; - } + if (err < 0) + goto err_chain_add; + + /* This must be LAST to ensure no packets are walking over this chain. */ + err = nf_tables_register_hook(net, table, chain); + if (err < 0) + goto err_register_hook; return 0; -err_unregister_hook: +err_register_hook: + nft_chain_del(chain); +err_chain_add: + nft_trans_destroy(trans); +err_trans: nft_use_dec_restore(&table->use); -err_use: - nf_tables_unregister_hook(net, table, chain); err_destroy_chain: nf_tables_chain_destroy(ctx); @@ -4749,6 +4752,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; + if (flags & NFT_SET_ANONYMOUS) + return -EOPNOTSUPP; + err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout); if (err) return err; @@ -4757,6 +4763,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (nla[NFTA_SET_GC_INTERVAL] != NULL) { if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; + + if (flags & NFT_SET_ANONYMOUS) + return -EOPNOTSUPP; + desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL])); } @@ -7129,11 +7139,15 @@ nla_put_failure: return -1; } -static const struct nft_object_type *__nft_obj_type_get(u32 objtype) +static const struct nft_object_type *__nft_obj_type_get(u32 objtype, u8 family) { const struct nft_object_type *type; list_for_each_entry(type, &nf_tables_objects, list) { + if (type->family != NFPROTO_UNSPEC && + type->family != family) + continue; + if (objtype == type->type) return type; } @@ -7141,11 +7155,11 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype) } static const struct nft_object_type * -nft_obj_type_get(struct net *net, u32 objtype) +nft_obj_type_get(struct net *net, u32 objtype, u8 family) { const struct nft_object_type *type; - type = __nft_obj_type_get(objtype); + type = __nft_obj_type_get(objtype, family); if (type != NULL && try_module_get(type->owner)) return type; @@ -7238,7 +7252,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info, if (info->nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; - type = __nft_obj_type_get(objtype); + type = __nft_obj_type_get(objtype, family); if (WARN_ON_ONCE(!type)) return -ENOENT; @@ -7252,7 +7266,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info, if (!nft_use_inc(&table->use)) return -EMFILE; - type = nft_obj_type_get(net, objtype); + type = nft_obj_type_get(net, objtype, family); if (IS_ERR(type)) { err = PTR_ERR(type); goto err_type; @@ -7933,7 +7947,7 @@ err_unregister_net_hooks: return err; } -static void nft_flowtable_hooks_destroy(struct list_head *hook_list) +static void nft_hooks_destroy(struct list_head *hook_list) { struct nft_hook *hook, *next; @@ -8026,9 +8040,9 @@ static int nf_tables_newflowtable(struct sk_buff *skb, u8 family = info->nfmsg->nfgen_family; const struct nf_flowtable_type *type; struct nft_flowtable *flowtable; - struct nft_hook *hook, *next; struct net *net = info->net; struct nft_table *table; + struct nft_trans *trans; struct nft_ctx ctx; int err; @@ -8108,34 +8122,34 @@ static int nf_tables_newflowtable(struct sk_buff *skb, err = nft_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK], &flowtable_hook, flowtable, true); if (err < 0) - goto err4; + goto err_flowtable_parse_hooks; list_splice(&flowtable_hook.list, &flowtable->hook_list); flowtable->data.priority = flowtable_hook.priority; flowtable->hooknum = flowtable_hook.num; + trans = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + goto err_flowtable_trans; + } + + /* This must be LAST to ensure no packets are walking over this flowtable. */ err = nft_register_flowtable_net_hooks(ctx.net, table, &flowtable->hook_list, flowtable); - if (err < 0) { - nft_flowtable_hooks_destroy(&flowtable->hook_list); - goto err4; - } - - err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable); if (err < 0) - goto err5; + goto err_flowtable_hooks; list_add_tail_rcu(&flowtable->list, &table->flowtables); return 0; -err5: - list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) { - nft_unregister_flowtable_hook(net, flowtable, hook); - list_del_rcu(&hook->list); - kfree_rcu(hook, rcu); - } -err4: + +err_flowtable_hooks: + nft_trans_destroy(trans); +err_flowtable_trans: + nft_hooks_destroy(&flowtable->hook_list); +err_flowtable_parse_hooks: flowtable->data.type->free(&flowtable->data); err3: module_put(type->owner); @@ -8888,7 +8902,7 @@ static void nft_commit_release(struct nft_trans *trans) break; case NFT_MSG_DELFLOWTABLE: if (nft_trans_flowtable_update(trans)) - nft_flowtable_hooks_destroy(&nft_trans_flowtable_hooks(trans)); + nft_hooks_destroy(&nft_trans_flowtable_hooks(trans)); else nf_tables_flowtable_destroy(nft_trans_flowtable(trans)); break; @@ -9845,7 +9859,7 @@ static void nf_tables_abort_release(struct nft_trans *trans) break; case NFT_MSG_NEWFLOWTABLE: if (nft_trans_flowtable_update(trans)) - nft_flowtable_hooks_destroy(&nft_trans_flowtable_hooks(trans)); + nft_hooks_destroy(&nft_trans_flowtable_hooks(trans)); else nf_tables_flowtable_destroy(nft_trans_flowtable(trans)); break; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 6952da7df..e4b8c02c5 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -135,7 +135,7 @@ static void nft_target_eval_bridge(const struct nft_expr *expr, static const struct nla_policy nft_target_policy[NFTA_TARGET_MAX + 1] = { [NFTA_TARGET_NAME] = { .type = NLA_NUL_STRING }, - [NFTA_TARGET_REV] = { .type = NLA_U32 }, + [NFTA_TARGET_REV] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_TARGET_INFO] = { .type = NLA_BINARY }, }; @@ -200,6 +200,7 @@ static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1] static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) { struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1]; + u32 l4proto; u32 flags; int err; @@ -212,12 +213,18 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) return -EINVAL; flags = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_FLAGS])); - if (flags & ~NFT_RULE_COMPAT_F_MASK) + if (flags & NFT_RULE_COMPAT_F_UNUSED || + flags & ~NFT_RULE_COMPAT_F_MASK) return -EINVAL; if (flags & NFT_RULE_COMPAT_F_INV) *inv = true; - *proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO])); + l4proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO])); + if (l4proto > U16_MAX) + return -EINVAL; + + *proto = l4proto; + return 0; } @@ -351,10 +358,20 @@ static int nft_target_validate(const struct nft_ctx *ctx, if (ctx->family != NFPROTO_IPV4 && ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET && ctx->family != NFPROTO_BRIDGE && ctx->family != NFPROTO_ARP) return -EOPNOTSUPP; + ret = nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING)); + if (ret) + return ret; + if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); @@ -418,7 +435,7 @@ static void nft_match_eval(const struct nft_expr *expr, static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = { [NFTA_MATCH_NAME] = { .type = NLA_NUL_STRING }, - [NFTA_MATCH_REV] = { .type = NLA_U32 }, + [NFTA_MATCH_REV] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_MATCH_INFO] = { .type = NLA_BINARY }, }; @@ -600,10 +617,20 @@ static int nft_match_validate(const struct nft_ctx *ctx, if (ctx->family != NFPROTO_IPV4 && ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET && ctx->family != NFPROTO_BRIDGE && ctx->family != NFPROTO_ARP) return -EOPNOTSUPP; + ret = nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING)); + if (ret) + return ret; + if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); @@ -721,7 +748,7 @@ out_put: static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = { [NFTA_COMPAT_NAME] = { .type = NLA_NUL_STRING, .len = NFT_COMPAT_NAME_MAX-1 }, - [NFTA_COMPAT_REV] = { .type = NLA_U32 }, + [NFTA_COMPAT_REV] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_COMPAT_TYPE] = { .type = NLA_U32 }, }; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 641dc21f9..2bfe3cdfb 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -484,6 +484,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, break; #endif case NFT_CT_ID: + if (tb[NFTA_CT_DIRECTION]) + return -EINVAL; + len = sizeof(u32); break; default: @@ -1231,7 +1234,30 @@ static int nft_ct_expect_obj_init(const struct nft_ctx *ctx, if (tb[NFTA_CT_EXPECT_L3PROTO]) priv->l3num = ntohs(nla_get_be16(tb[NFTA_CT_EXPECT_L3PROTO])); + switch (priv->l3num) { + case NFPROTO_IPV4: + case NFPROTO_IPV6: + if (priv->l3num == ctx->family || ctx->family == NFPROTO_INET) + break; + + return -EINVAL; + case NFPROTO_INET: /* tuple.src.l3num supports NFPROTO_IPV4/6 only */ + default: + return -EAFNOSUPPORT; + } + priv->l4proto = nla_get_u8(tb[NFTA_CT_EXPECT_L4PROTO]); + switch (priv->l4proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + case IPPROTO_DCCP: + case IPPROTO_SCTP: + break; + default: + return -EOPNOTSUPP; + } + priv->dport = nla_get_be16(tb[NFTA_CT_EXPECT_DPORT]); priv->timeout = nla_get_u32(tb[NFTA_CT_EXPECT_TIMEOUT]); priv->size = nla_get_u8(tb[NFTA_CT_EXPECT_SIZE]); diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 3d9f6dda5..7a8707632 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -250,9 +250,14 @@ static int nft_flow_route(const struct nft_pktinfo *pkt, break; } + if (!dst_hold_safe(this_dst)) + return -ENOENT; + nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt)); - if (!other_dst) + if (!other_dst) { + dst_release(this_dst); return -ENOENT; + } nft_default_forward_path(route, this_dst, dir); nft_default_forward_path(route, other_dst, !dir); @@ -349,8 +354,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, if (!flow) goto err_flow_alloc; - if (flow_offload_route_init(flow, &route) < 0) - goto err_flow_add; + flow_offload_route_init(flow, &route); if (tcph) { ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; @@ -361,12 +365,12 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, if (ret < 0) goto err_flow_add; - dst_release(route.tuple[!dir].dst); return; err_flow_add: flow_offload_free(flow); err_flow_alloc: + dst_release(route.tuple[dir].dst); dst_release(route.tuple[!dir].dst); err_flow_route: clear_bit(IPS_OFFLOAD_BIT, &ct->status); diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 4e1cc3172..e1969209b 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -342,9 +342,6 @@ #include "nft_set_pipapo_avx2.h" #include "nft_set_pipapo.h" -/* Current working bitmap index, toggled between field matches */ -static DEFINE_PER_CPU(bool, nft_pipapo_scratch_index); - /** * pipapo_refill() - For each set bit, set bits from selected mapping table item * @map: Bitmap to be scanned for set bits @@ -412,6 +409,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext) { struct nft_pipapo *priv = nft_set_priv(set); + struct nft_pipapo_scratch *scratch; unsigned long *res_map, *fill_map; u8 genmask = nft_genmask_cur(net); const u8 *rp = (const u8 *)key; @@ -422,15 +420,17 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, local_bh_disable(); - map_index = raw_cpu_read(nft_pipapo_scratch_index); - m = rcu_dereference(priv->match); if (unlikely(!m || !*raw_cpu_ptr(m->scratch))) goto out; - res_map = *raw_cpu_ptr(m->scratch) + (map_index ? m->bsize_max : 0); - fill_map = *raw_cpu_ptr(m->scratch) + (map_index ? 0 : m->bsize_max); + scratch = *raw_cpu_ptr(m->scratch); + + map_index = scratch->map_index; + + res_map = scratch->map + (map_index ? m->bsize_max : 0); + fill_map = scratch->map + (map_index ? 0 : m->bsize_max); memset(res_map, 0xff, m->bsize_max * sizeof(*res_map)); @@ -460,7 +460,7 @@ next_match: b = pipapo_refill(res_map, f->bsize, f->rules, fill_map, f->mt, last); if (b < 0) { - raw_cpu_write(nft_pipapo_scratch_index, map_index); + scratch->map_index = map_index; local_bh_enable(); return false; @@ -477,7 +477,7 @@ next_match: * current inactive bitmap is clean and can be reused as * *next* bitmap (not initial) for the next packet. */ - raw_cpu_write(nft_pipapo_scratch_index, map_index); + scratch->map_index = map_index; local_bh_enable(); return true; @@ -1102,6 +1102,25 @@ static void pipapo_map(struct nft_pipapo_match *m, } /** + * pipapo_free_scratch() - Free per-CPU map at original (not aligned) address + * @m: Matching data + * @cpu: CPU number + */ +static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int cpu) +{ + struct nft_pipapo_scratch *s; + void *mem; + + s = *per_cpu_ptr(m->scratch, cpu); + if (!s) + return; + + mem = s; + mem -= s->align_off; + kfree(mem); +} + +/** * pipapo_realloc_scratch() - Reallocate scratch maps for partial match results * @clone: Copy of matching data with pending insertions and deletions * @bsize_max: Maximum bucket size, scratch maps cover two buckets @@ -1114,12 +1133,13 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, int i; for_each_possible_cpu(i) { - unsigned long *scratch; + struct nft_pipapo_scratch *scratch; #ifdef NFT_PIPAPO_ALIGN - unsigned long *scratch_aligned; + void *scratch_aligned; + u32 align_off; #endif - - scratch = kzalloc_node(bsize_max * sizeof(*scratch) * 2 + + scratch = kzalloc_node(struct_size(scratch, map, + bsize_max * 2) + NFT_PIPAPO_ALIGN_HEADROOM, GFP_KERNEL, cpu_to_node(i)); if (!scratch) { @@ -1133,14 +1153,25 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, return -ENOMEM; } - kfree(*per_cpu_ptr(clone->scratch, i)); - - *per_cpu_ptr(clone->scratch, i) = scratch; + pipapo_free_scratch(clone, i); #ifdef NFT_PIPAPO_ALIGN - scratch_aligned = NFT_PIPAPO_LT_ALIGN(scratch); - *per_cpu_ptr(clone->scratch_aligned, i) = scratch_aligned; + /* Align &scratch->map (not the struct itself): the extra + * %NFT_PIPAPO_ALIGN_HEADROOM bytes passed to kzalloc_node() + * above guarantee we can waste up to those bytes in order + * to align the map field regardless of its offset within + * the struct. + */ + BUILD_BUG_ON(offsetof(struct nft_pipapo_scratch, map) > NFT_PIPAPO_ALIGN_HEADROOM); + + scratch_aligned = NFT_PIPAPO_LT_ALIGN(&scratch->map); + scratch_aligned -= offsetof(struct nft_pipapo_scratch, map); + align_off = scratch_aligned - (void *)scratch; + + scratch = scratch_aligned; + scratch->align_off = align_off; #endif + *per_cpu_ptr(clone->scratch, i) = scratch; } return 0; @@ -1294,11 +1325,6 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) if (!new->scratch) goto out_scratch; -#ifdef NFT_PIPAPO_ALIGN - new->scratch_aligned = alloc_percpu(*new->scratch_aligned); - if (!new->scratch_aligned) - goto out_scratch; -#endif for_each_possible_cpu(i) *per_cpu_ptr(new->scratch, i) = NULL; @@ -1350,10 +1376,7 @@ out_lt: } out_scratch_realloc: for_each_possible_cpu(i) - kfree(*per_cpu_ptr(new->scratch, i)); -#ifdef NFT_PIPAPO_ALIGN - free_percpu(new->scratch_aligned); -#endif + pipapo_free_scratch(new, i); out_scratch: free_percpu(new->scratch); kfree(new); @@ -1638,13 +1661,9 @@ static void pipapo_free_match(struct nft_pipapo_match *m) int i; for_each_possible_cpu(i) - kfree(*per_cpu_ptr(m->scratch, i)); + pipapo_free_scratch(m, i); -#ifdef NFT_PIPAPO_ALIGN - free_percpu(m->scratch_aligned); -#endif free_percpu(m->scratch); - pipapo_free_fields(m); kfree(m); @@ -2132,7 +2151,7 @@ static int nft_pipapo_init(const struct nft_set *set, m->field_count = field_count; m->bsize_max = 0; - m->scratch = alloc_percpu(unsigned long *); + m->scratch = alloc_percpu(struct nft_pipapo_scratch *); if (!m->scratch) { err = -ENOMEM; goto out_scratch; @@ -2140,16 +2159,6 @@ static int nft_pipapo_init(const struct nft_set *set, for_each_possible_cpu(i) *per_cpu_ptr(m->scratch, i) = NULL; -#ifdef NFT_PIPAPO_ALIGN - m->scratch_aligned = alloc_percpu(unsigned long *); - if (!m->scratch_aligned) { - err = -ENOMEM; - goto out_free; - } - for_each_possible_cpu(i) - *per_cpu_ptr(m->scratch_aligned, i) = NULL; -#endif - rcu_head_init(&m->rcu); nft_pipapo_for_each_field(f, i, m) { @@ -2180,9 +2189,6 @@ static int nft_pipapo_init(const struct nft_set *set, return 0; out_free: -#ifdef NFT_PIPAPO_ALIGN - free_percpu(m->scratch_aligned); -#endif free_percpu(m->scratch); out_scratch: kfree(m); @@ -2236,11 +2242,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, nft_set_pipapo_match_destroy(ctx, set, m); -#ifdef NFT_PIPAPO_ALIGN - free_percpu(m->scratch_aligned); -#endif for_each_possible_cpu(cpu) - kfree(*per_cpu_ptr(m->scratch, cpu)); + pipapo_free_scratch(m, cpu); free_percpu(m->scratch); pipapo_free_fields(m); kfree(m); @@ -2253,11 +2256,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, if (priv->dirty) nft_set_pipapo_match_destroy(ctx, set, m); -#ifdef NFT_PIPAPO_ALIGN - free_percpu(priv->clone->scratch_aligned); -#endif for_each_possible_cpu(cpu) - kfree(*per_cpu_ptr(priv->clone->scratch, cpu)); + pipapo_free_scratch(priv->clone, cpu); free_percpu(priv->clone->scratch); pipapo_free_fields(priv->clone); diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index 25a755915..30a3d092c 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -131,20 +131,28 @@ struct nft_pipapo_field { }; /** + * struct nft_pipapo_scratch - percpu data used for lookup and matching + * @map_index: Current working bitmap index, toggled between field matches + * @align_off: Offset to get the originally allocated address + * @map: store partial matching results during lookup + */ +struct nft_pipapo_scratch { + u8 map_index; + u32 align_off; + unsigned long map[]; +}; + +/** * struct nft_pipapo_match - Data used for lookup and matching * @field_count Amount of fields in set * @scratch: Preallocated per-CPU maps for partial matching results - * @scratch_aligned: Version of @scratch aligned to NFT_PIPAPO_ALIGN bytes * @bsize_max: Maximum lookup table bucket size of all fields, in longs * @rcu Matching data is swapped on commits * @f: Fields, with lookup and mapping tables */ struct nft_pipapo_match { int field_count; -#ifdef NFT_PIPAPO_ALIGN - unsigned long * __percpu *scratch_aligned; -#endif - unsigned long * __percpu *scratch; + struct nft_pipapo_scratch * __percpu *scratch; size_t bsize_max; struct rcu_head rcu; struct nft_pipapo_field f[]; diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index 52e0d026d..a3a8ddca9 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -57,7 +57,7 @@ /* Jump to label if @reg is zero */ #define NFT_PIPAPO_AVX2_NOMATCH_GOTO(reg, label) \ - asm_volatile_goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \ + asm goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \ "je %l[" #label "]" : : : : label) /* Store 256 bits from YMM register into memory. Contrary to bucket load @@ -71,9 +71,6 @@ #define NFT_PIPAPO_AVX2_ZERO(reg) \ asm volatile("vpxor %ymm" #reg ", %ymm" #reg ", %ymm" #reg) -/* Current working bitmap index, toggled between field matches */ -static DEFINE_PER_CPU(bool, nft_pipapo_avx2_scratch_index); - /** * nft_pipapo_avx2_prepare() - Prepare before main algorithm body * @@ -1120,11 +1117,12 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext) { struct nft_pipapo *priv = nft_set_priv(set); - unsigned long *res, *fill, *scratch; + struct nft_pipapo_scratch *scratch; u8 genmask = nft_genmask_cur(net); const u8 *rp = (const u8 *)key; struct nft_pipapo_match *m; struct nft_pipapo_field *f; + unsigned long *res, *fill; bool map_index; int i, ret = 0; @@ -1141,15 +1139,16 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, */ kernel_fpu_begin_mask(0); - scratch = *raw_cpu_ptr(m->scratch_aligned); + scratch = *raw_cpu_ptr(m->scratch); if (unlikely(!scratch)) { kernel_fpu_end(); return false; } - map_index = raw_cpu_read(nft_pipapo_avx2_scratch_index); - res = scratch + (map_index ? m->bsize_max : 0); - fill = scratch + (map_index ? 0 : m->bsize_max); + map_index = scratch->map_index; + + res = scratch->map + (map_index ? m->bsize_max : 0); + fill = scratch->map + (map_index ? 0 : m->bsize_max); /* Starting map doesn't need to be set for this implementation */ @@ -1221,7 +1220,7 @@ next_match: out: if (i % 2) - raw_cpu_write(nft_pipapo_avx2_scratch_index, !map_index); + scratch->map_index = !map_index; kernel_fpu_end(); return ret >= 0; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index e34662f4a..5bf5572e9 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -235,7 +235,7 @@ static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set, static const struct nft_rbtree_elem * nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv, - struct nft_rbtree_elem *rbe, u8 genmask) + struct nft_rbtree_elem *rbe) { struct nft_set *set = (struct nft_set *)__set; struct rb_node *prev = rb_prev(&rbe->node); @@ -254,7 +254,7 @@ nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv, while (prev) { rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); if (nft_rbtree_interval_end(rbe_prev) && - nft_set_elem_active(&rbe_prev->ext, genmask)) + nft_set_elem_active(&rbe_prev->ext, NFT_GENMASK_ANY)) break; prev = rb_prev(prev); @@ -365,7 +365,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, nft_set_elem_active(&rbe->ext, cur_genmask)) { const struct nft_rbtree_elem *removed_end; - removed_end = nft_rbtree_gc_elem(set, priv, rbe, genmask); + removed_end = nft_rbtree_gc_elem(set, priv, rbe); if (IS_ERR(removed_end)) return PTR_ERR(removed_end); diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 983ade4be..efb505445 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -713,6 +713,7 @@ static const struct nft_object_ops nft_tunnel_obj_ops = { static struct nft_object_type nft_tunnel_obj_type __read_mostly = { .type = NFT_OBJECT_TUNNEL, + .family = NFPROTO_NETDEV, .ops = &nft_tunnel_obj_ops, .maxattr = NFTA_TUNNEL_KEY_MAX, .policy = nft_tunnel_key_policy, diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 6857a4965..e9b81cba1 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -167,7 +167,7 @@ static inline u32 netlink_group_mask(u32 group) static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb, gfp_t gfp_mask) { - unsigned int len = skb_end_offset(skb); + unsigned int len = skb->len; struct sk_buff *new; new = alloc_skb(len, gfp_mask); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index ec5747969..f0879295d 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -453,16 +453,16 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, nr_init_timers(sk); nr->t1 = - msecs_to_jiffies(sysctl_netrom_transport_timeout); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_timeout)); nr->t2 = - msecs_to_jiffies(sysctl_netrom_transport_acknowledge_delay); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_acknowledge_delay)); nr->n2 = - msecs_to_jiffies(sysctl_netrom_transport_maximum_tries); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_maximum_tries)); nr->t4 = - msecs_to_jiffies(sysctl_netrom_transport_busy_delay); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_busy_delay)); nr->idle = - msecs_to_jiffies(sysctl_netrom_transport_no_activity_timeout); - nr->window = sysctl_netrom_transport_requested_window_size; + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_no_activity_timeout)); + nr->window = READ_ONCE(sysctl_netrom_transport_requested_window_size); nr->bpqext = 1; nr->state = NR_STATE_0; @@ -954,7 +954,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) * G8PZT's Xrouter which is sending packets with command type 7 * as an extension of the protocol. */ - if (sysctl_netrom_reset_circuit && + if (READ_ONCE(sysctl_netrom_reset_circuit) && (frametype != NR_RESET || flags != 0)) nr_transmit_reset(skb, 1); diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 3aaac4a22..2c34389c3 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -81,7 +81,7 @@ static int nr_header(struct sk_buff *skb, struct net_device *dev, buff[6] |= AX25_SSSID_SPARE; buff += AX25_ADDR_LEN; - *buff++ = sysctl_netrom_network_ttl_initialiser; + *buff++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); *buff++ = NR_PROTO_IP; *buff++ = NR_PROTO_IP; diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c index 2f084b6f6..97944db6b 100644 --- a/net/netrom/nr_in.c +++ b/net/netrom/nr_in.c @@ -97,7 +97,7 @@ static int nr_state1_machine(struct sock *sk, struct sk_buff *skb, break; case NR_RESET: - if (sysctl_netrom_reset_circuit) + if (READ_ONCE(sysctl_netrom_reset_circuit)) nr_disconnect(sk, ECONNRESET); break; @@ -128,7 +128,7 @@ static int nr_state2_machine(struct sock *sk, struct sk_buff *skb, break; case NR_RESET: - if (sysctl_netrom_reset_circuit) + if (READ_ONCE(sysctl_netrom_reset_circuit)) nr_disconnect(sk, ECONNRESET); break; @@ -262,7 +262,7 @@ static int nr_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype break; case NR_RESET: - if (sysctl_netrom_reset_circuit) + if (READ_ONCE(sysctl_netrom_reset_circuit)) nr_disconnect(sk, ECONNRESET); break; diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c index 44929657f..5e531394a 100644 --- a/net/netrom/nr_out.c +++ b/net/netrom/nr_out.c @@ -204,7 +204,7 @@ void nr_transmit_buffer(struct sock *sk, struct sk_buff *skb) dptr[6] |= AX25_SSSID_SPARE; dptr += AX25_ADDR_LEN; - *dptr++ = sysctl_netrom_network_ttl_initialiser; + *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); if (!nr_route_frame(skb, NULL)) { kfree_skb(skb); diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index baea3cbd7..70480869a 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -153,7 +153,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, nr_neigh->digipeat = NULL; nr_neigh->ax25 = NULL; nr_neigh->dev = dev; - nr_neigh->quality = sysctl_netrom_default_path_quality; + nr_neigh->quality = READ_ONCE(sysctl_netrom_default_path_quality); nr_neigh->locked = 0; nr_neigh->count = 0; nr_neigh->number = nr_neigh_no++; @@ -728,7 +728,7 @@ void nr_link_failed(ax25_cb *ax25, int reason) nr_neigh->ax25 = NULL; ax25_cb_put(ax25); - if (++nr_neigh->failed < sysctl_netrom_link_fails_count) { + if (++nr_neigh->failed < READ_ONCE(sysctl_netrom_link_fails_count)) { nr_neigh_put(nr_neigh); return; } @@ -766,7 +766,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) if (ax25 != NULL) { ret = nr_add_node(nr_src, "", &ax25->dest_addr, ax25->digipeat, ax25->ax25_dev->dev, 0, - sysctl_netrom_obsolescence_count_initialiser); + READ_ONCE(sysctl_netrom_obsolescence_count_initialiser)); if (ret) return ret; } @@ -780,7 +780,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) return ret; } - if (!sysctl_netrom_routing_control && ax25 != NULL) + if (!READ_ONCE(sysctl_netrom_routing_control) && ax25 != NULL) return 0; /* Its Time-To-Live has expired */ diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c index e2d2af924..c3bbd5880 100644 --- a/net/netrom/nr_subr.c +++ b/net/netrom/nr_subr.c @@ -182,7 +182,8 @@ void nr_write_internal(struct sock *sk, int frametype) *dptr++ = nr->my_id; *dptr++ = frametype; *dptr++ = nr->window; - if (nr->bpqext) *dptr++ = sysctl_netrom_network_ttl_initialiser; + if (nr->bpqext) + *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); break; case NR_DISCREQ: @@ -236,7 +237,7 @@ void __nr_transmit_reply(struct sk_buff *skb, int mine, unsigned char cmdflags) dptr[6] |= AX25_SSSID_SPARE; dptr += AX25_ADDR_LEN; - *dptr++ = sysctl_netrom_network_ttl_initialiser; + *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); if (mine) { *dptr++ = 0; diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 7535afd15..b5071a2f5 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1207,6 +1207,10 @@ void nci_free_device(struct nci_dev *ndev) { nfc_free_device(ndev->nfc_dev); nci_hci_deallocate(ndev); + + /* drop partial rx data packet if present */ + if (ndev->rx_data_reassembly) + kfree_skb(ndev->rx_data_reassembly); kfree(ndev); } EXPORT_SYMBOL(nci_free_device); diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index ead5418c1..e3c85ceb1 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -47,6 +47,7 @@ struct ovs_len_tbl { #define OVS_ATTR_NESTED -1 #define OVS_ATTR_VARIABLE -2 +#define OVS_COPY_ACTIONS_MAX_DEPTH 16 static bool actions_may_change_flow(const struct nlattr *actions) { @@ -2543,13 +2544,15 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log); + u32 mpls_label_count, bool log, + u32 depth); static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log, bool last) + u32 mpls_label_count, bool log, bool last, + u32 depth) { const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; const struct nlattr *probability, *actions; @@ -2600,7 +2603,8 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, return err; err = __ovs_nla_copy_actions(net, actions, key, sfa, - eth_type, vlan_tci, mpls_label_count, log); + eth_type, vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -2615,7 +2619,8 @@ static int validate_and_copy_dec_ttl(struct net *net, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log) + u32 mpls_label_count, bool log, + u32 depth) { const struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1]; int start, action_start, err, rem; @@ -2658,7 +2663,8 @@ static int validate_and_copy_dec_ttl(struct net *net, return action_start; err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type, - vlan_tci, mpls_label_count, log); + vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -2672,7 +2678,8 @@ static int validate_and_copy_clone(struct net *net, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log, bool last) + u32 mpls_label_count, bool log, bool last, + u32 depth) { int start, err; u32 exec; @@ -2692,7 +2699,8 @@ static int validate_and_copy_clone(struct net *net, return err; err = __ovs_nla_copy_actions(net, attr, key, sfa, - eth_type, vlan_tci, mpls_label_count, log); + eth_type, vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -3061,7 +3069,7 @@ static int validate_and_copy_check_pkt_len(struct net *net, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, u32 mpls_label_count, - bool log, bool last) + bool log, bool last, u32 depth) { const struct nlattr *acts_if_greater, *acts_if_lesser_eq; struct nlattr *a[OVS_CHECK_PKT_LEN_ATTR_MAX + 1]; @@ -3109,7 +3117,8 @@ static int validate_and_copy_check_pkt_len(struct net *net, return nested_acts_start; err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa, - eth_type, vlan_tci, mpls_label_count, log); + eth_type, vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -3122,7 +3131,8 @@ static int validate_and_copy_check_pkt_len(struct net *net, return nested_acts_start; err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa, - eth_type, vlan_tci, mpls_label_count, log); + eth_type, vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -3150,12 +3160,16 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log) + u32 mpls_label_count, bool log, + u32 depth) { u8 mac_proto = ovs_key_mac_proto(key); const struct nlattr *a; int rem, err; + if (depth > OVS_COPY_ACTIONS_MAX_DEPTH) + return -EOVERFLOW; + nla_for_each_nested(a, attr, rem) { /* Expected argument lengths, (u32)-1 for variable length. */ static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { @@ -3350,7 +3364,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, err = validate_and_copy_sample(net, a, key, sfa, eth_type, vlan_tci, mpls_label_count, - log, last); + log, last, depth); if (err) return err; skip_copy = true; @@ -3421,7 +3435,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, err = validate_and_copy_clone(net, a, key, sfa, eth_type, vlan_tci, mpls_label_count, - log, last); + log, last, depth); if (err) return err; skip_copy = true; @@ -3435,7 +3449,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, eth_type, vlan_tci, mpls_label_count, - log, last); + log, last, + depth); if (err) return err; skip_copy = true; @@ -3445,7 +3460,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, case OVS_ACTION_ATTR_DEC_TTL: err = validate_and_copy_dec_ttl(net, a, key, sfa, eth_type, vlan_tci, - mpls_label_count, log); + mpls_label_count, log, + depth); if (err) return err; skip_copy = true; @@ -3485,7 +3501,8 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, (*sfa)->orig_len = nla_len(attr); err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type, - key->eth.vlan.tci, mpls_label_count, log); + key->eth.vlan.tci, mpls_label_count, log, + 0); if (err) ovs_nla_free_flow_actions(*sfa); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 51882f07e..c3117350f 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3284,7 +3284,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; - char name[sizeof(uaddr->sa_data) + 1]; + char name[sizeof(uaddr->sa_data_min) + 1]; /* * Check legality @@ -3295,8 +3295,8 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, /* uaddr->sa_data comes from the userspace, it's not guaranteed to be * zero-terminated. */ - memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data)); - name[sizeof(uaddr->sa_data)] = 0; + memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data_min)); + name[sizeof(uaddr->sa_data_min)] = 0; return packet_do_bind(sk, name, 0, 0); } @@ -3566,11 +3566,11 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, return -EOPNOTSUPP; uaddr->sa_family = AF_PACKET; - memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data)); + memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data_min)); rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex)); if (dev) - strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data)); + strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data_min)); rcu_read_unlock(); return sizeof(*uaddr); diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c index ff5f49ab2..39a6c5713 100644 --- a/net/phonet/datagram.c +++ b/net/phonet/datagram.c @@ -35,10 +35,10 @@ static int pn_ioctl(struct sock *sk, int cmd, unsigned long arg) switch (cmd) { case SIOCINQ: - lock_sock(sk); + spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); answ = skb ? skb->len : 0; - release_sock(sk); + spin_unlock_bh(&sk->sk_receive_queue.lock); return put_user(answ, (int __user *)arg); case SIOCPNADDRESOURCE: diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 83ea13a50..607f54c23 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -917,6 +917,37 @@ static int pep_sock_enable(struct sock *sk, struct sockaddr *addr, int len) return 0; } +static unsigned int pep_first_packet_length(struct sock *sk) +{ + struct pep_sock *pn = pep_sk(sk); + struct sk_buff_head *q; + struct sk_buff *skb; + unsigned int len = 0; + bool found = false; + + if (sock_flag(sk, SOCK_URGINLINE)) { + q = &pn->ctrlreq_queue; + spin_lock_bh(&q->lock); + skb = skb_peek(q); + if (skb) { + len = skb->len; + found = true; + } + spin_unlock_bh(&q->lock); + } + + if (likely(!found)) { + q = &sk->sk_receive_queue; + spin_lock_bh(&q->lock); + skb = skb_peek(q); + if (skb) + len = skb->len; + spin_unlock_bh(&q->lock); + } + + return len; +} + static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) { struct pep_sock *pn = pep_sk(sk); @@ -930,15 +961,7 @@ static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) break; } - lock_sock(sk); - if (sock_flag(sk, SOCK_URGINLINE) && - !skb_queue_empty(&pn->ctrlreq_queue)) - answ = skb_peek(&pn->ctrlreq_queue)->len; - else if (!skb_queue_empty(&sk->sk_receive_queue)) - answ = skb_peek(&sk->sk_receive_queue)->len; - else - answ = 0; - release_sock(sk); + answ = pep_first_packet_length(sk); ret = put_user(answ, (int __user *)arg); break; diff --git a/net/rds/rdma.c b/net/rds/rdma.c index fba82d365..a4e3c5de9 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -301,6 +301,9 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, kfree(sg); } ret = PTR_ERR(trans_private); + /* Trigger connection so that its ready for the next retry */ + if (ret == -ENODEV) + rds_conn_connect_if_down(cp->cp_conn); goto out; } diff --git a/net/rds/send.c b/net/rds/send.c index 0c5504068..a4ba45c43 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1314,12 +1314,8 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) /* Parse any control messages the user may have included. */ ret = rds_cmsg_send(rs, rm, msg, &allocated_mr, &vct); - if (ret) { - /* Trigger connection so that its ready for the next retry */ - if (ret == -EAGAIN) - rds_conn_connect_if_down(conn); + if (ret) goto out; - } if (rm->rdma.op_active && !conn->c_trans->xmit_rdma) { printk_ratelimited(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index aab069701..5d91ef562 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -41,6 +41,14 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, _enter("%d", conn->debug_id); + if (sp && sp->hdr.type == RXRPC_PACKET_TYPE_ACK) { + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &pkt.ack, sizeof(pkt.ack)) < 0) + return; + if (pkt.ack.reason == RXRPC_ACK_PING_RESPONSE) + return; + } + chan = &conn->channels[channel]; /* If the last call got moved on whilst we were waiting to run, just diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index 6e6aa02c6..249353417 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -31,7 +31,7 @@ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *peer, struct rxrpc_conn_proto k; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rb_node *p; - unsigned int seq = 0; + unsigned int seq = 1; k.epoch = sp->hdr.epoch; k.cid = sp->hdr.cid & RXRPC_CIDMASK; @@ -41,6 +41,7 @@ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *peer, * under just the RCU read lock, so we have to check for * changes. */ + seq++; /* 2 on the 1st/lockless path, otherwise odd */ read_seqbegin_or_lock(&peer->service_conn_lock, &seq); p = rcu_dereference_raw(peer->service_conns.rb_node); diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 24cf0bf7c..9c4a80fce 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -45,23 +45,6 @@ if NET_SCHED comment "Queueing/Scheduling" -config NET_SCH_CBQ - tristate "Class Based Queueing (CBQ)" - help - Say Y here if you want to use the Class-Based Queueing (CBQ) packet - scheduling algorithm. This algorithm classifies the waiting packets - into a tree-like hierarchy of classes; the leaves of this tree are - in turn scheduled by separate algorithms. - - See the top of <file:net/sched/sch_cbq.c> for more details. - - CBQ is a commonly used scheduler, so if you're unsure, you should - say Y here. Then say Y to all the queueing algorithms below that you - want to use as leaf disciplines. - - To compile this code as a module, choose M here: the - module will be called sch_cbq. - config NET_SCH_HTB tristate "Hierarchical Token Bucket (HTB)" help @@ -85,20 +68,6 @@ config NET_SCH_HFSC To compile this code as a module, choose M here: the module will be called sch_hfsc. -config NET_SCH_ATM - tristate "ATM Virtual Circuits (ATM)" - depends on ATM - help - Say Y here if you want to use the ATM pseudo-scheduler. This - provides a framework for invoking classifiers, which in turn - select classes of this queuing discipline. Each class maps - the flow(s) it is handling to a given virtual circuit. - - See the top of <file:net/sched/sch_atm.c> for more details. - - To compile this code as a module, choose M here: the - module will be called sch_atm. - config NET_SCH_PRIO tristate "Multi Band Priority Queueing (PRIO)" help @@ -217,17 +186,6 @@ config NET_SCH_GRED To compile this code as a module, choose M here: the module will be called sch_gred. -config NET_SCH_DSMARK - tristate "Differentiated Services marker (DSMARK)" - help - Say Y if you want to schedule packets according to the - Differentiated Services architecture proposed in RFC 2475. - Technical information on this method, with pointers to associated - RFCs, is available at <http://www.gta.ufrj.br/diffserv/>. - - To compile this code as a module, choose M here: the - module will be called sch_dsmark. - config NET_SCH_NETEM tristate "Network emulator (NETEM)" help diff --git a/net/sched/Makefile b/net/sched/Makefile index 8a33a35fc..a66ac1e7b 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -33,20 +33,17 @@ obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o obj-$(CONFIG_NET_ACT_CT) += act_ct.o obj-$(CONFIG_NET_ACT_GATE) += act_gate.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o -obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o obj-$(CONFIG_NET_SCH_RED) += sch_red.o obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o -obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o obj-$(CONFIG_NET_SCH_SFB) += sch_sfb.o obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o -obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c deleted file mode 100644 index 4a981ca90..000000000 --- a/net/sched/sch_atm.c +++ /dev/null @@ -1,706 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* net/sched/sch_atm.c - ATM VC selection "queueing discipline" */ - -/* Written 1998-2000 by Werner Almesberger, EPFL ICA */ - -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/skbuff.h> -#include <linux/atmdev.h> -#include <linux/atmclip.h> -#include <linux/rtnetlink.h> -#include <linux/file.h> /* for fput */ -#include <net/netlink.h> -#include <net/pkt_sched.h> -#include <net/pkt_cls.h> - -/* - * The ATM queuing discipline provides a framework for invoking classifiers - * (aka "filters"), which in turn select classes of this queuing discipline. - * Each class maps the flow(s) it is handling to a given VC. Multiple classes - * may share the same VC. - * - * When creating a class, VCs are specified by passing the number of the open - * socket descriptor by which the calling process references the VC. The kernel - * keeps the VC open at least until all classes using it are removed. - * - * In this file, most functions are named atm_tc_* to avoid confusion with all - * the atm_* in net/atm. This naming convention differs from what's used in the - * rest of net/sched. - * - * Known bugs: - * - sometimes messes up the IP stack - * - any manipulations besides the few operations described in the README, are - * untested and likely to crash the system - * - should lock the flow while there is data in the queue (?) - */ - -#define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back)) - -struct atm_flow_data { - struct Qdisc_class_common common; - struct Qdisc *q; /* FIFO, TBF, etc. */ - struct tcf_proto __rcu *filter_list; - struct tcf_block *block; - struct atm_vcc *vcc; /* VCC; NULL if VCC is closed */ - void (*old_pop)(struct atm_vcc *vcc, - struct sk_buff *skb); /* chaining */ - struct atm_qdisc_data *parent; /* parent qdisc */ - struct socket *sock; /* for closing */ - int ref; /* reference count */ - struct gnet_stats_basic_sync bstats; - struct gnet_stats_queue qstats; - struct list_head list; - struct atm_flow_data *excess; /* flow for excess traffic; - NULL to set CLP instead */ - int hdr_len; - unsigned char hdr[]; /* header data; MUST BE LAST */ -}; - -struct atm_qdisc_data { - struct atm_flow_data link; /* unclassified skbs go here */ - struct list_head flows; /* NB: "link" is also on this - list */ - struct tasklet_struct task; /* dequeue tasklet */ -}; - -/* ------------------------- Class/flow operations ------------------------- */ - -static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow; - - list_for_each_entry(flow, &p->flows, list) { - if (flow->common.classid == classid) - return flow; - } - return NULL; -} - -static int atm_tc_graft(struct Qdisc *sch, unsigned long arg, - struct Qdisc *new, struct Qdisc **old, - struct netlink_ext_ack *extack) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)arg; - - pr_debug("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n", - sch, p, flow, new, old); - if (list_empty(&flow->list)) - return -EINVAL; - if (!new) - new = &noop_qdisc; - *old = flow->q; - flow->q = new; - if (*old) - qdisc_reset(*old); - return 0; -} - -static struct Qdisc *atm_tc_leaf(struct Qdisc *sch, unsigned long cl) -{ - struct atm_flow_data *flow = (struct atm_flow_data *)cl; - - pr_debug("atm_tc_leaf(sch %p,flow %p)\n", sch, flow); - return flow ? flow->q : NULL; -} - -static unsigned long atm_tc_find(struct Qdisc *sch, u32 classid) -{ - struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch); - struct atm_flow_data *flow; - - pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid); - flow = lookup_flow(sch, classid); - pr_debug("%s: flow %p\n", __func__, flow); - return (unsigned long)flow; -} - -static unsigned long atm_tc_bind_filter(struct Qdisc *sch, - unsigned long parent, u32 classid) -{ - struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch); - struct atm_flow_data *flow; - - pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid); - flow = lookup_flow(sch, classid); - if (flow) - flow->ref++; - pr_debug("%s: flow %p\n", __func__, flow); - return (unsigned long)flow; -} - -/* - * atm_tc_put handles all destructions, including the ones that are explicitly - * requested (atm_tc_destroy, etc.). The assumption here is that we never drop - * anything that still seems to be in use. - */ -static void atm_tc_put(struct Qdisc *sch, unsigned long cl) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)cl; - - pr_debug("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); - if (--flow->ref) - return; - pr_debug("atm_tc_put: destroying\n"); - list_del_init(&flow->list); - pr_debug("atm_tc_put: qdisc %p\n", flow->q); - qdisc_put(flow->q); - tcf_block_put(flow->block); - if (flow->sock) { - pr_debug("atm_tc_put: f_count %ld\n", - file_count(flow->sock->file)); - flow->vcc->pop = flow->old_pop; - sockfd_put(flow->sock); - } - if (flow->excess) - atm_tc_put(sch, (unsigned long)flow->excess); - if (flow != &p->link) - kfree(flow); - /* - * If flow == &p->link, the qdisc no longer works at this point and - * needs to be removed. (By the caller of atm_tc_put.) - */ -} - -static void sch_atm_pop(struct atm_vcc *vcc, struct sk_buff *skb) -{ - struct atm_qdisc_data *p = VCC2FLOW(vcc)->parent; - - pr_debug("sch_atm_pop(vcc %p,skb %p,[qdisc %p])\n", vcc, skb, p); - VCC2FLOW(vcc)->old_pop(vcc, skb); - tasklet_schedule(&p->task); -} - -static const u8 llc_oui_ip[] = { - 0xaa, /* DSAP: non-ISO */ - 0xaa, /* SSAP: non-ISO */ - 0x03, /* Ctrl: Unnumbered Information Command PDU */ - 0x00, /* OUI: EtherType */ - 0x00, 0x00, - 0x08, 0x00 -}; /* Ethertype IP (0800) */ - -static const struct nla_policy atm_policy[TCA_ATM_MAX + 1] = { - [TCA_ATM_FD] = { .type = NLA_U32 }, - [TCA_ATM_EXCESS] = { .type = NLA_U32 }, -}; - -static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, - struct nlattr **tca, unsigned long *arg, - struct netlink_ext_ack *extack) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)*arg; - struct atm_flow_data *excess = NULL; - struct nlattr *opt = tca[TCA_OPTIONS]; - struct nlattr *tb[TCA_ATM_MAX + 1]; - struct socket *sock; - int fd, error, hdr_len; - void *hdr; - - pr_debug("atm_tc_change(sch %p,[qdisc %p],classid %x,parent %x," - "flow %p,opt %p)\n", sch, p, classid, parent, flow, opt); - /* - * The concept of parents doesn't apply for this qdisc. - */ - if (parent && parent != TC_H_ROOT && parent != sch->handle) - return -EINVAL; - /* - * ATM classes cannot be changed. In order to change properties of the - * ATM connection, that socket needs to be modified directly (via the - * native ATM API. In order to send a flow to a different VC, the old - * class needs to be removed and a new one added. (This may be changed - * later.) - */ - if (flow) - return -EBUSY; - if (opt == NULL) - return -EINVAL; - - error = nla_parse_nested_deprecated(tb, TCA_ATM_MAX, opt, atm_policy, - NULL); - if (error < 0) - return error; - - if (!tb[TCA_ATM_FD]) - return -EINVAL; - fd = nla_get_u32(tb[TCA_ATM_FD]); - pr_debug("atm_tc_change: fd %d\n", fd); - if (tb[TCA_ATM_HDR]) { - hdr_len = nla_len(tb[TCA_ATM_HDR]); - hdr = nla_data(tb[TCA_ATM_HDR]); - } else { - hdr_len = RFC1483LLC_LEN; - hdr = NULL; /* default LLC/SNAP for IP */ - } - if (!tb[TCA_ATM_EXCESS]) - excess = NULL; - else { - excess = (struct atm_flow_data *) - atm_tc_find(sch, nla_get_u32(tb[TCA_ATM_EXCESS])); - if (!excess) - return -ENOENT; - } - pr_debug("atm_tc_change: type %d, payload %d, hdr_len %d\n", - opt->nla_type, nla_len(opt), hdr_len); - sock = sockfd_lookup(fd, &error); - if (!sock) - return error; /* f_count++ */ - pr_debug("atm_tc_change: f_count %ld\n", file_count(sock->file)); - if (sock->ops->family != PF_ATMSVC && sock->ops->family != PF_ATMPVC) { - error = -EPROTOTYPE; - goto err_out; - } - /* @@@ should check if the socket is really operational or we'll crash - on vcc->send */ - if (classid) { - if (TC_H_MAJ(classid ^ sch->handle)) { - pr_debug("atm_tc_change: classid mismatch\n"); - error = -EINVAL; - goto err_out; - } - } else { - int i; - unsigned long cl; - - for (i = 1; i < 0x8000; i++) { - classid = TC_H_MAKE(sch->handle, 0x8000 | i); - cl = atm_tc_find(sch, classid); - if (!cl) - break; - } - } - pr_debug("atm_tc_change: new id %x\n", classid); - flow = kzalloc(sizeof(struct atm_flow_data) + hdr_len, GFP_KERNEL); - pr_debug("atm_tc_change: flow %p\n", flow); - if (!flow) { - error = -ENOBUFS; - goto err_out; - } - - error = tcf_block_get(&flow->block, &flow->filter_list, sch, - extack); - if (error) { - kfree(flow); - goto err_out; - } - - flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid, - extack); - if (!flow->q) - flow->q = &noop_qdisc; - pr_debug("atm_tc_change: qdisc %p\n", flow->q); - flow->sock = sock; - flow->vcc = ATM_SD(sock); /* speedup */ - flow->vcc->user_back = flow; - pr_debug("atm_tc_change: vcc %p\n", flow->vcc); - flow->old_pop = flow->vcc->pop; - flow->parent = p; - flow->vcc->pop = sch_atm_pop; - flow->common.classid = classid; - flow->ref = 1; - flow->excess = excess; - list_add(&flow->list, &p->link.list); - flow->hdr_len = hdr_len; - if (hdr) - memcpy(flow->hdr, hdr, hdr_len); - else - memcpy(flow->hdr, llc_oui_ip, sizeof(llc_oui_ip)); - *arg = (unsigned long)flow; - return 0; -err_out: - sockfd_put(sock); - return error; -} - -static int atm_tc_delete(struct Qdisc *sch, unsigned long arg, - struct netlink_ext_ack *extack) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)arg; - - pr_debug("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); - if (list_empty(&flow->list)) - return -EINVAL; - if (rcu_access_pointer(flow->filter_list) || flow == &p->link) - return -EBUSY; - /* - * Reference count must be 2: one for "keepalive" (set at class - * creation), and one for the reference held when calling delete. - */ - if (flow->ref < 2) { - pr_err("atm_tc_delete: flow->ref == %d\n", flow->ref); - return -EINVAL; - } - if (flow->ref > 2) - return -EBUSY; /* catch references via excess, etc. */ - atm_tc_put(sch, arg); - return 0; -} - -static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow; - - pr_debug("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker); - if (walker->stop) - return; - list_for_each_entry(flow, &p->flows, list) { - if (!tc_qdisc_stats_dump(sch, (unsigned long)flow, walker)) - break; - } -} - -static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl, - struct netlink_ext_ack *extack) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)cl; - - pr_debug("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); - return flow ? flow->block : p->link.block; -} - -/* --------------------------- Qdisc operations ---------------------------- */ - -static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, - struct sk_buff **to_free) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow; - struct tcf_result res; - int result; - int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - - pr_debug("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); - result = TC_ACT_OK; /* be nice to gcc */ - flow = NULL; - if (TC_H_MAJ(skb->priority) != sch->handle || - !(flow = (struct atm_flow_data *)atm_tc_find(sch, skb->priority))) { - struct tcf_proto *fl; - - list_for_each_entry(flow, &p->flows, list) { - fl = rcu_dereference_bh(flow->filter_list); - if (fl) { - result = tcf_classify(skb, NULL, fl, &res, true); - if (result < 0) - continue; - if (result == TC_ACT_SHOT) - goto done; - - flow = (struct atm_flow_data *)res.class; - if (!flow) - flow = lookup_flow(sch, res.classid); - goto drop; - } - } - flow = NULL; -done: - ; - } - if (!flow) { - flow = &p->link; - } else { - if (flow->vcc) - ATM_SKB(skb)->atm_options = flow->vcc->atm_options; - /*@@@ looks good ... but it's not supposed to work :-) */ -#ifdef CONFIG_NET_CLS_ACT - switch (result) { - case TC_ACT_QUEUED: - case TC_ACT_STOLEN: - case TC_ACT_TRAP: - __qdisc_drop(skb, to_free); - return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - case TC_ACT_SHOT: - __qdisc_drop(skb, to_free); - goto drop; - case TC_ACT_RECLASSIFY: - if (flow->excess) - flow = flow->excess; - else - ATM_SKB(skb)->atm_options |= ATM_ATMOPT_CLP; - break; - } -#endif - } - - ret = qdisc_enqueue(skb, flow->q, to_free); - if (ret != NET_XMIT_SUCCESS) { -drop: __maybe_unused - if (net_xmit_drop_count(ret)) { - qdisc_qstats_drop(sch); - if (flow) - flow->qstats.drops++; - } - return ret; - } - /* - * Okay, this may seem weird. We pretend we've dropped the packet if - * it goes via ATM. The reason for this is that the outer qdisc - * expects to be able to q->dequeue the packet later on if we return - * success at this place. Also, sch->q.qdisc needs to reflect whether - * there is a packet egligible for dequeuing or not. Note that the - * statistics of the outer qdisc are necessarily wrong because of all - * this. There's currently no correct solution for this. - */ - if (flow == &p->link) { - sch->q.qlen++; - return NET_XMIT_SUCCESS; - } - tasklet_schedule(&p->task); - return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; -} - -/* - * Dequeue packets and send them over ATM. Note that we quite deliberately - * avoid checking net_device's flow control here, simply because sch_atm - * uses its own channels, which have nothing to do with any CLIP/LANE/or - * non-ATM interfaces. - */ - -static void sch_atm_dequeue(struct tasklet_struct *t) -{ - struct atm_qdisc_data *p = from_tasklet(p, t, task); - struct Qdisc *sch = qdisc_from_priv(p); - struct atm_flow_data *flow; - struct sk_buff *skb; - - pr_debug("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p); - list_for_each_entry(flow, &p->flows, list) { - if (flow == &p->link) - continue; - /* - * If traffic is properly shaped, this won't generate nasty - * little bursts. Otherwise, it may ... (but that's okay) - */ - while ((skb = flow->q->ops->peek(flow->q))) { - if (!atm_may_send(flow->vcc, skb->truesize)) - break; - - skb = qdisc_dequeue_peeked(flow->q); - if (unlikely(!skb)) - break; - - qdisc_bstats_update(sch, skb); - bstats_update(&flow->bstats, skb); - pr_debug("atm_tc_dequeue: sending on class %p\n", flow); - /* remove any LL header somebody else has attached */ - skb_pull(skb, skb_network_offset(skb)); - if (skb_headroom(skb) < flow->hdr_len) { - struct sk_buff *new; - - new = skb_realloc_headroom(skb, flow->hdr_len); - dev_kfree_skb(skb); - if (!new) - continue; - skb = new; - } - pr_debug("sch_atm_dequeue: ip %p, data %p\n", - skb_network_header(skb), skb->data); - ATM_SKB(skb)->vcc = flow->vcc; - memcpy(skb_push(skb, flow->hdr_len), flow->hdr, - flow->hdr_len); - refcount_add(skb->truesize, - &sk_atm(flow->vcc)->sk_wmem_alloc); - /* atm.atm_options are already set by atm_tc_enqueue */ - flow->vcc->send(flow->vcc, skb); - } - } -} - -static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct sk_buff *skb; - - pr_debug("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p); - tasklet_schedule(&p->task); - skb = qdisc_dequeue_peeked(p->link.q); - if (skb) - sch->q.qlen--; - return skb; -} - -static struct sk_buff *atm_tc_peek(struct Qdisc *sch) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - - pr_debug("atm_tc_peek(sch %p,[qdisc %p])\n", sch, p); - - return p->link.q->ops->peek(p->link.q); -} - -static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt, - struct netlink_ext_ack *extack) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - int err; - - pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); - INIT_LIST_HEAD(&p->flows); - INIT_LIST_HEAD(&p->link.list); - gnet_stats_basic_sync_init(&p->link.bstats); - list_add(&p->link.list, &p->flows); - p->link.q = qdisc_create_dflt(sch->dev_queue, - &pfifo_qdisc_ops, sch->handle, extack); - if (!p->link.q) - p->link.q = &noop_qdisc; - pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q); - p->link.vcc = NULL; - p->link.sock = NULL; - p->link.common.classid = sch->handle; - p->link.ref = 1; - - err = tcf_block_get(&p->link.block, &p->link.filter_list, sch, - extack); - if (err) - return err; - - tasklet_setup(&p->task, sch_atm_dequeue); - return 0; -} - -static void atm_tc_reset(struct Qdisc *sch) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow; - - pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p); - list_for_each_entry(flow, &p->flows, list) - qdisc_reset(flow->q); -} - -static void atm_tc_destroy(struct Qdisc *sch) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow, *tmp; - - pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p); - list_for_each_entry(flow, &p->flows, list) { - tcf_block_put(flow->block); - flow->block = NULL; - } - - list_for_each_entry_safe(flow, tmp, &p->flows, list) { - if (flow->ref > 1) - pr_err("atm_destroy: %p->ref = %d\n", flow, flow->ref); - atm_tc_put(sch, (unsigned long)flow); - } - tasklet_kill(&p->task); -} - -static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, - struct sk_buff *skb, struct tcmsg *tcm) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)cl; - struct nlattr *nest; - - pr_debug("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n", - sch, p, flow, skb, tcm); - if (list_empty(&flow->list)) - return -EINVAL; - tcm->tcm_handle = flow->common.classid; - tcm->tcm_info = flow->q->handle; - - nest = nla_nest_start_noflag(skb, TCA_OPTIONS); - if (nest == NULL) - goto nla_put_failure; - - if (nla_put(skb, TCA_ATM_HDR, flow->hdr_len, flow->hdr)) - goto nla_put_failure; - if (flow->vcc) { - struct sockaddr_atmpvc pvc; - int state; - - memset(&pvc, 0, sizeof(pvc)); - pvc.sap_family = AF_ATMPVC; - pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1; - pvc.sap_addr.vpi = flow->vcc->vpi; - pvc.sap_addr.vci = flow->vcc->vci; - if (nla_put(skb, TCA_ATM_ADDR, sizeof(pvc), &pvc)) - goto nla_put_failure; - state = ATM_VF2VS(flow->vcc->flags); - if (nla_put_u32(skb, TCA_ATM_STATE, state)) - goto nla_put_failure; - } - if (flow->excess) { - if (nla_put_u32(skb, TCA_ATM_EXCESS, flow->common.classid)) - goto nla_put_failure; - } else { - if (nla_put_u32(skb, TCA_ATM_EXCESS, 0)) - goto nla_put_failure; - } - return nla_nest_end(skb, nest); - -nla_put_failure: - nla_nest_cancel(skb, nest); - return -1; -} -static int -atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg, - struct gnet_dump *d) -{ - struct atm_flow_data *flow = (struct atm_flow_data *)arg; - - if (gnet_stats_copy_basic(d, NULL, &flow->bstats, true) < 0 || - gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0) - return -1; - - return 0; -} - -static int atm_tc_dump(struct Qdisc *sch, struct sk_buff *skb) -{ - return 0; -} - -static const struct Qdisc_class_ops atm_class_ops = { - .graft = atm_tc_graft, - .leaf = atm_tc_leaf, - .find = atm_tc_find, - .change = atm_tc_change, - .delete = atm_tc_delete, - .walk = atm_tc_walk, - .tcf_block = atm_tc_tcf_block, - .bind_tcf = atm_tc_bind_filter, - .unbind_tcf = atm_tc_put, - .dump = atm_tc_dump_class, - .dump_stats = atm_tc_dump_class_stats, -}; - -static struct Qdisc_ops atm_qdisc_ops __read_mostly = { - .cl_ops = &atm_class_ops, - .id = "atm", - .priv_size = sizeof(struct atm_qdisc_data), - .enqueue = atm_tc_enqueue, - .dequeue = atm_tc_dequeue, - .peek = atm_tc_peek, - .init = atm_tc_init, - .reset = atm_tc_reset, - .destroy = atm_tc_destroy, - .dump = atm_tc_dump, - .owner = THIS_MODULE, -}; - -static int __init atm_init(void) -{ - return register_qdisc(&atm_qdisc_ops); -} - -static void __exit atm_exit(void) -{ - unregister_qdisc(&atm_qdisc_ops); -} - -module_init(atm_init) -module_exit(atm_exit) -MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c deleted file mode 100644 index 36db5f678..000000000 --- a/net/sched/sch_cbq.c +++ /dev/null @@ -1,1727 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * net/sched/sch_cbq.c Class-Based Queueing discipline. - * - * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> - */ - -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/skbuff.h> -#include <net/netlink.h> -#include <net/pkt_sched.h> -#include <net/pkt_cls.h> - - -/* Class-Based Queueing (CBQ) algorithm. - ======================================= - - Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource - Management Models for Packet Networks", - IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995 - - [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995 - - [3] Sally Floyd, "Notes on Class-Based Queueing: Setting - Parameters", 1996 - - [4] Sally Floyd and Michael Speer, "Experimental Results - for Class-Based Queueing", 1998, not published. - - ----------------------------------------------------------------------- - - Algorithm skeleton was taken from NS simulator cbq.cc. - If someone wants to check this code against the LBL version, - he should take into account that ONLY the skeleton was borrowed, - the implementation is different. Particularly: - - --- The WRR algorithm is different. Our version looks more - reasonable (I hope) and works when quanta are allowed to be - less than MTU, which is always the case when real time classes - have small rates. Note, that the statement of [3] is - incomplete, delay may actually be estimated even if class - per-round allotment is less than MTU. Namely, if per-round - allotment is W*r_i, and r_1+...+r_k = r < 1 - - delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B - - In the worst case we have IntServ estimate with D = W*r+k*MTU - and C = MTU*r. The proof (if correct at all) is trivial. - - - --- It seems that cbq-2.0 is not very accurate. At least, I cannot - interpret some places, which look like wrong translations - from NS. Anyone is advised to find these differences - and explain to me, why I am wrong 8). - - --- Linux has no EOI event, so that we cannot estimate true class - idle time. Workaround is to consider the next dequeue event - as sign that previous packet is finished. This is wrong because of - internal device queueing, but on a permanently loaded link it is true. - Moreover, combined with clock integrator, this scheme looks - very close to an ideal solution. */ - -struct cbq_sched_data; - - -struct cbq_class { - struct Qdisc_class_common common; - struct cbq_class *next_alive; /* next class with backlog in this priority band */ - -/* Parameters */ - unsigned char priority; /* class priority */ - unsigned char priority2; /* priority to be used after overlimit */ - unsigned char ewma_log; /* time constant for idle time calculation */ - - u32 defmap; - - /* Link-sharing scheduler parameters */ - long maxidle; /* Class parameters: see below. */ - long offtime; - long minidle; - u32 avpkt; - struct qdisc_rate_table *R_tab; - - /* General scheduler (WRR) parameters */ - long allot; - long quantum; /* Allotment per WRR round */ - long weight; /* Relative allotment: see below */ - - struct Qdisc *qdisc; /* Ptr to CBQ discipline */ - struct cbq_class *split; /* Ptr to split node */ - struct cbq_class *share; /* Ptr to LS parent in the class tree */ - struct cbq_class *tparent; /* Ptr to tree parent in the class tree */ - struct cbq_class *borrow; /* NULL if class is bandwidth limited; - parent otherwise */ - struct cbq_class *sibling; /* Sibling chain */ - struct cbq_class *children; /* Pointer to children chain */ - - struct Qdisc *q; /* Elementary queueing discipline */ - - -/* Variables */ - unsigned char cpriority; /* Effective priority */ - unsigned char delayed; - unsigned char level; /* level of the class in hierarchy: - 0 for leaf classes, and maximal - level of children + 1 for nodes. - */ - - psched_time_t last; /* Last end of service */ - psched_time_t undertime; - long avgidle; - long deficit; /* Saved deficit for WRR */ - psched_time_t penalized; - struct gnet_stats_basic_sync bstats; - struct gnet_stats_queue qstats; - struct net_rate_estimator __rcu *rate_est; - struct tc_cbq_xstats xstats; - - struct tcf_proto __rcu *filter_list; - struct tcf_block *block; - - int filters; - - struct cbq_class *defaults[TC_PRIO_MAX + 1]; -}; - -struct cbq_sched_data { - struct Qdisc_class_hash clhash; /* Hash table of all classes */ - int nclasses[TC_CBQ_MAXPRIO + 1]; - unsigned int quanta[TC_CBQ_MAXPRIO + 1]; - - struct cbq_class link; - - unsigned int activemask; - struct cbq_class *active[TC_CBQ_MAXPRIO + 1]; /* List of all classes - with backlog */ - -#ifdef CONFIG_NET_CLS_ACT - struct cbq_class *rx_class; -#endif - struct cbq_class *tx_class; - struct cbq_class *tx_borrowed; - int tx_len; - psched_time_t now; /* Cached timestamp */ - unsigned int pmask; - - struct qdisc_watchdog watchdog; /* Watchdog timer, - started when CBQ has - backlog, but cannot - transmit just now */ - psched_tdiff_t wd_expires; - int toplevel; - u32 hgenerator; -}; - - -#define L2T(cl, len) qdisc_l2t((cl)->R_tab, len) - -static inline struct cbq_class * -cbq_class_lookup(struct cbq_sched_data *q, u32 classid) -{ - struct Qdisc_class_common *clc; - - clc = qdisc_class_find(&q->clhash, classid); - if (clc == NULL) - return NULL; - return container_of(clc, struct cbq_class, common); -} - -#ifdef CONFIG_NET_CLS_ACT - -static struct cbq_class * -cbq_reclassify(struct sk_buff *skb, struct cbq_class *this) -{ - struct cbq_class *cl; - - for (cl = this->tparent; cl; cl = cl->tparent) { - struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT]; - - if (new != NULL && new != this) - return new; - } - return NULL; -} - -#endif - -/* Classify packet. The procedure is pretty complicated, but - * it allows us to combine link sharing and priority scheduling - * transparently. - * - * Namely, you can put link sharing rules (f.e. route based) at root of CBQ, - * so that it resolves to split nodes. Then packets are classified - * by logical priority, or a more specific classifier may be attached - * to the split node. - */ - -static struct cbq_class * -cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *head = &q->link; - struct cbq_class **defmap; - struct cbq_class *cl = NULL; - u32 prio = skb->priority; - struct tcf_proto *fl; - struct tcf_result res; - - /* - * Step 1. If skb->priority points to one of our classes, use it. - */ - if (TC_H_MAJ(prio ^ sch->handle) == 0 && - (cl = cbq_class_lookup(q, prio)) != NULL) - return cl; - - *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - for (;;) { - int result = 0; - defmap = head->defaults; - - fl = rcu_dereference_bh(head->filter_list); - /* - * Step 2+n. Apply classifier. - */ - result = tcf_classify(skb, NULL, fl, &res, true); - if (!fl || result < 0) - goto fallback; - if (result == TC_ACT_SHOT) - return NULL; - - cl = (void *)res.class; - if (!cl) { - if (TC_H_MAJ(res.classid)) - cl = cbq_class_lookup(q, res.classid); - else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL) - cl = defmap[TC_PRIO_BESTEFFORT]; - - if (cl == NULL) - goto fallback; - } - if (cl->level >= head->level) - goto fallback; -#ifdef CONFIG_NET_CLS_ACT - switch (result) { - case TC_ACT_QUEUED: - case TC_ACT_STOLEN: - case TC_ACT_TRAP: - *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - fallthrough; - case TC_ACT_RECLASSIFY: - return cbq_reclassify(skb, cl); - } -#endif - if (cl->level == 0) - return cl; - - /* - * Step 3+n. If classifier selected a link sharing class, - * apply agency specific classifier. - * Repeat this procedure until we hit a leaf node. - */ - head = cl; - } - -fallback: - cl = head; - - /* - * Step 4. No success... - */ - if (TC_H_MAJ(prio) == 0 && - !(cl = head->defaults[prio & TC_PRIO_MAX]) && - !(cl = head->defaults[TC_PRIO_BESTEFFORT])) - return head; - - return cl; -} - -/* - * A packet has just been enqueued on the empty class. - * cbq_activate_class adds it to the tail of active class list - * of its priority band. - */ - -static inline void cbq_activate_class(struct cbq_class *cl) -{ - struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - int prio = cl->cpriority; - struct cbq_class *cl_tail; - - cl_tail = q->active[prio]; - q->active[prio] = cl; - - if (cl_tail != NULL) { - cl->next_alive = cl_tail->next_alive; - cl_tail->next_alive = cl; - } else { - cl->next_alive = cl; - q->activemask |= (1<<prio); - } -} - -/* - * Unlink class from active chain. - * Note that this same procedure is done directly in cbq_dequeue* - * during round-robin procedure. - */ - -static void cbq_deactivate_class(struct cbq_class *this) -{ - struct cbq_sched_data *q = qdisc_priv(this->qdisc); - int prio = this->cpriority; - struct cbq_class *cl; - struct cbq_class *cl_prev = q->active[prio]; - - do { - cl = cl_prev->next_alive; - if (cl == this) { - cl_prev->next_alive = cl->next_alive; - cl->next_alive = NULL; - - if (cl == q->active[prio]) { - q->active[prio] = cl_prev; - if (cl == q->active[prio]) { - q->active[prio] = NULL; - q->activemask &= ~(1<<prio); - return; - } - } - return; - } - } while ((cl_prev = cl) != q->active[prio]); -} - -static void -cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl) -{ - int toplevel = q->toplevel; - - if (toplevel > cl->level) { - psched_time_t now = psched_get_time(); - - do { - if (cl->undertime < now) { - q->toplevel = cl->level; - return; - } - } while ((cl = cl->borrow) != NULL && toplevel > cl->level); - } -} - -static int -cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch, - struct sk_buff **to_free) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - int ret; - struct cbq_class *cl = cbq_classify(skb, sch, &ret); - -#ifdef CONFIG_NET_CLS_ACT - q->rx_class = cl; -#endif - if (cl == NULL) { - if (ret & __NET_XMIT_BYPASS) - qdisc_qstats_drop(sch); - __qdisc_drop(skb, to_free); - return ret; - } - - ret = qdisc_enqueue(skb, cl->q, to_free); - if (ret == NET_XMIT_SUCCESS) { - sch->q.qlen++; - cbq_mark_toplevel(q, cl); - if (!cl->next_alive) - cbq_activate_class(cl); - return ret; - } - - if (net_xmit_drop_count(ret)) { - qdisc_qstats_drop(sch); - cbq_mark_toplevel(q, cl); - cl->qstats.drops++; - } - return ret; -} - -/* Overlimit action: penalize leaf class by adding offtime */ -static void cbq_overlimit(struct cbq_class *cl) -{ - struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - psched_tdiff_t delay = cl->undertime - q->now; - - if (!cl->delayed) { - delay += cl->offtime; - - /* - * Class goes to sleep, so that it will have no - * chance to work avgidle. Let's forgive it 8) - * - * BTW cbq-2.0 has a crap in this - * place, apparently they forgot to shift it by cl->ewma_log. - */ - if (cl->avgidle < 0) - delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log); - if (cl->avgidle < cl->minidle) - cl->avgidle = cl->minidle; - if (delay <= 0) - delay = 1; - cl->undertime = q->now + delay; - - cl->xstats.overactions++; - cl->delayed = 1; - } - if (q->wd_expires == 0 || q->wd_expires > delay) - q->wd_expires = delay; - - /* Dirty work! We must schedule wakeups based on - * real available rate, rather than leaf rate, - * which may be tiny (even zero). - */ - if (q->toplevel == TC_CBQ_MAXLEVEL) { - struct cbq_class *b; - psched_tdiff_t base_delay = q->wd_expires; - - for (b = cl->borrow; b; b = b->borrow) { - delay = b->undertime - q->now; - if (delay < base_delay) { - if (delay <= 0) - delay = 1; - base_delay = delay; - } - } - - q->wd_expires = base_delay; - } -} - -/* - * It is mission critical procedure. - * - * We "regenerate" toplevel cutoff, if transmitting class - * has backlog and it is not regulated. It is not part of - * original CBQ description, but looks more reasonable. - * Probably, it is wrong. This question needs further investigation. - */ - -static inline void -cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl, - struct cbq_class *borrowed) -{ - if (cl && q->toplevel >= borrowed->level) { - if (cl->q->q.qlen > 1) { - do { - if (borrowed->undertime == PSCHED_PASTPERFECT) { - q->toplevel = borrowed->level; - return; - } - } while ((borrowed = borrowed->borrow) != NULL); - } -#if 0 - /* It is not necessary now. Uncommenting it - will save CPU cycles, but decrease fairness. - */ - q->toplevel = TC_CBQ_MAXLEVEL; -#endif - } -} - -static void -cbq_update(struct cbq_sched_data *q) -{ - struct cbq_class *this = q->tx_class; - struct cbq_class *cl = this; - int len = q->tx_len; - psched_time_t now; - - q->tx_class = NULL; - /* Time integrator. We calculate EOS time - * by adding expected packet transmission time. - */ - now = q->now + L2T(&q->link, len); - - for ( ; cl; cl = cl->share) { - long avgidle = cl->avgidle; - long idle; - - _bstats_update(&cl->bstats, len, 1); - - /* - * (now - last) is total time between packet right edges. - * (last_pktlen/rate) is "virtual" busy time, so that - * - * idle = (now - last) - last_pktlen/rate - */ - - idle = now - cl->last; - if ((unsigned long)idle > 128*1024*1024) { - avgidle = cl->maxidle; - } else { - idle -= L2T(cl, len); - - /* true_avgidle := (1-W)*true_avgidle + W*idle, - * where W=2^{-ewma_log}. But cl->avgidle is scaled: - * cl->avgidle == true_avgidle/W, - * hence: - */ - avgidle += idle - (avgidle>>cl->ewma_log); - } - - if (avgidle <= 0) { - /* Overlimit or at-limit */ - - if (avgidle < cl->minidle) - avgidle = cl->minidle; - - cl->avgidle = avgidle; - - /* Calculate expected time, when this class - * will be allowed to send. - * It will occur, when: - * (1-W)*true_avgidle + W*delay = 0, i.e. - * idle = (1/W - 1)*(-true_avgidle) - * or - * idle = (1 - W)*(-cl->avgidle); - */ - idle = (-avgidle) - ((-avgidle) >> cl->ewma_log); - - /* - * That is not all. - * To maintain the rate allocated to the class, - * we add to undertime virtual clock, - * necessary to complete transmitted packet. - * (len/phys_bandwidth has been already passed - * to the moment of cbq_update) - */ - - idle -= L2T(&q->link, len); - idle += L2T(cl, len); - - cl->undertime = now + idle; - } else { - /* Underlimit */ - - cl->undertime = PSCHED_PASTPERFECT; - if (avgidle > cl->maxidle) - cl->avgidle = cl->maxidle; - else - cl->avgidle = avgidle; - } - if ((s64)(now - cl->last) > 0) - cl->last = now; - } - - cbq_update_toplevel(q, this, q->tx_borrowed); -} - -static inline struct cbq_class * -cbq_under_limit(struct cbq_class *cl) -{ - struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - struct cbq_class *this_cl = cl; - - if (cl->tparent == NULL) - return cl; - - if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) { - cl->delayed = 0; - return cl; - } - - do { - /* It is very suspicious place. Now overlimit - * action is generated for not bounded classes - * only if link is completely congested. - * Though it is in agree with ancestor-only paradigm, - * it looks very stupid. Particularly, - * it means that this chunk of code will either - * never be called or result in strong amplification - * of burstiness. Dangerous, silly, and, however, - * no another solution exists. - */ - cl = cl->borrow; - if (!cl) { - this_cl->qstats.overlimits++; - cbq_overlimit(this_cl); - return NULL; - } - if (cl->level > q->toplevel) - return NULL; - } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime); - - cl->delayed = 0; - return cl; -} - -static inline struct sk_buff * -cbq_dequeue_prio(struct Qdisc *sch, int prio) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl_tail, *cl_prev, *cl; - struct sk_buff *skb; - int deficit; - - cl_tail = cl_prev = q->active[prio]; - cl = cl_prev->next_alive; - - do { - deficit = 0; - - /* Start round */ - do { - struct cbq_class *borrow = cl; - - if (cl->q->q.qlen && - (borrow = cbq_under_limit(cl)) == NULL) - goto skip_class; - - if (cl->deficit <= 0) { - /* Class exhausted its allotment per - * this round. Switch to the next one. - */ - deficit = 1; - cl->deficit += cl->quantum; - goto next_class; - } - - skb = cl->q->dequeue(cl->q); - - /* Class did not give us any skb :-( - * It could occur even if cl->q->q.qlen != 0 - * f.e. if cl->q == "tbf" - */ - if (skb == NULL) - goto skip_class; - - cl->deficit -= qdisc_pkt_len(skb); - q->tx_class = cl; - q->tx_borrowed = borrow; - if (borrow != cl) { -#ifndef CBQ_XSTATS_BORROWS_BYTES - borrow->xstats.borrows++; - cl->xstats.borrows++; -#else - borrow->xstats.borrows += qdisc_pkt_len(skb); - cl->xstats.borrows += qdisc_pkt_len(skb); -#endif - } - q->tx_len = qdisc_pkt_len(skb); - - if (cl->deficit <= 0) { - q->active[prio] = cl; - cl = cl->next_alive; - cl->deficit += cl->quantum; - } - return skb; - -skip_class: - if (cl->q->q.qlen == 0 || prio != cl->cpriority) { - /* Class is empty or penalized. - * Unlink it from active chain. - */ - cl_prev->next_alive = cl->next_alive; - cl->next_alive = NULL; - - /* Did cl_tail point to it? */ - if (cl == cl_tail) { - /* Repair it! */ - cl_tail = cl_prev; - - /* Was it the last class in this band? */ - if (cl == cl_tail) { - /* Kill the band! */ - q->active[prio] = NULL; - q->activemask &= ~(1<<prio); - if (cl->q->q.qlen) - cbq_activate_class(cl); - return NULL; - } - - q->active[prio] = cl_tail; - } - if (cl->q->q.qlen) - cbq_activate_class(cl); - - cl = cl_prev; - } - -next_class: - cl_prev = cl; - cl = cl->next_alive; - } while (cl_prev != cl_tail); - } while (deficit); - - q->active[prio] = cl_prev; - - return NULL; -} - -static inline struct sk_buff * -cbq_dequeue_1(struct Qdisc *sch) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct sk_buff *skb; - unsigned int activemask; - - activemask = q->activemask & 0xFF; - while (activemask) { - int prio = ffz(~activemask); - activemask &= ~(1<<prio); - skb = cbq_dequeue_prio(sch, prio); - if (skb) - return skb; - } - return NULL; -} - -static struct sk_buff * -cbq_dequeue(struct Qdisc *sch) -{ - struct sk_buff *skb; - struct cbq_sched_data *q = qdisc_priv(sch); - psched_time_t now; - - now = psched_get_time(); - - if (q->tx_class) - cbq_update(q); - - q->now = now; - - for (;;) { - q->wd_expires = 0; - - skb = cbq_dequeue_1(sch); - if (skb) { - qdisc_bstats_update(sch, skb); - sch->q.qlen--; - return skb; - } - - /* All the classes are overlimit. - * - * It is possible, if: - * - * 1. Scheduler is empty. - * 2. Toplevel cutoff inhibited borrowing. - * 3. Root class is overlimit. - * - * Reset 2d and 3d conditions and retry. - * - * Note, that NS and cbq-2.0 are buggy, peeking - * an arbitrary class is appropriate for ancestor-only - * sharing, but not for toplevel algorithm. - * - * Our version is better, but slower, because it requires - * two passes, but it is unavoidable with top-level sharing. - */ - - if (q->toplevel == TC_CBQ_MAXLEVEL && - q->link.undertime == PSCHED_PASTPERFECT) - break; - - q->toplevel = TC_CBQ_MAXLEVEL; - q->link.undertime = PSCHED_PASTPERFECT; - } - - /* No packets in scheduler or nobody wants to give them to us :-( - * Sigh... start watchdog timer in the last case. - */ - - if (sch->q.qlen) { - qdisc_qstats_overlimit(sch); - if (q->wd_expires) - qdisc_watchdog_schedule(&q->watchdog, - now + q->wd_expires); - } - return NULL; -} - -/* CBQ class maintenance routines */ - -static void cbq_adjust_levels(struct cbq_class *this) -{ - if (this == NULL) - return; - - do { - int level = 0; - struct cbq_class *cl; - - cl = this->children; - if (cl) { - do { - if (cl->level > level) - level = cl->level; - } while ((cl = cl->sibling) != this->children); - } - this->level = level + 1; - } while ((this = this->tparent) != NULL); -} - -static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio) -{ - struct cbq_class *cl; - unsigned int h; - - if (q->quanta[prio] == 0) - return; - - for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { - /* BUGGGG... Beware! This expression suffer of - * arithmetic overflows! - */ - if (cl->priority == prio) { - cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/ - q->quanta[prio]; - } - if (cl->quantum <= 0 || - cl->quantum > 32*qdisc_dev(cl->qdisc)->mtu) { - pr_warn("CBQ: class %08x has bad quantum==%ld, repaired.\n", - cl->common.classid, cl->quantum); - cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1; - } - } - } -} - -static void cbq_sync_defmap(struct cbq_class *cl) -{ - struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - struct cbq_class *split = cl->split; - unsigned int h; - int i; - - if (split == NULL) - return; - - for (i = 0; i <= TC_PRIO_MAX; i++) { - if (split->defaults[i] == cl && !(cl->defmap & (1<<i))) - split->defaults[i] = NULL; - } - - for (i = 0; i <= TC_PRIO_MAX; i++) { - int level = split->level; - - if (split->defaults[i]) - continue; - - for (h = 0; h < q->clhash.hashsize; h++) { - struct cbq_class *c; - - hlist_for_each_entry(c, &q->clhash.hash[h], - common.hnode) { - if (c->split == split && c->level < level && - c->defmap & (1<<i)) { - split->defaults[i] = c; - level = c->level; - } - } - } - } -} - -static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask) -{ - struct cbq_class *split = NULL; - - if (splitid == 0) { - split = cl->split; - if (!split) - return; - splitid = split->common.classid; - } - - if (split == NULL || split->common.classid != splitid) { - for (split = cl->tparent; split; split = split->tparent) - if (split->common.classid == splitid) - break; - } - - if (split == NULL) - return; - - if (cl->split != split) { - cl->defmap = 0; - cbq_sync_defmap(cl); - cl->split = split; - cl->defmap = def & mask; - } else - cl->defmap = (cl->defmap & ~mask) | (def & mask); - - cbq_sync_defmap(cl); -} - -static void cbq_unlink_class(struct cbq_class *this) -{ - struct cbq_class *cl, **clp; - struct cbq_sched_data *q = qdisc_priv(this->qdisc); - - qdisc_class_hash_remove(&q->clhash, &this->common); - - if (this->tparent) { - clp = &this->sibling; - cl = *clp; - do { - if (cl == this) { - *clp = cl->sibling; - break; - } - clp = &cl->sibling; - } while ((cl = *clp) != this->sibling); - - if (this->tparent->children == this) { - this->tparent->children = this->sibling; - if (this->sibling == this) - this->tparent->children = NULL; - } - } else { - WARN_ON(this->sibling != this); - } -} - -static void cbq_link_class(struct cbq_class *this) -{ - struct cbq_sched_data *q = qdisc_priv(this->qdisc); - struct cbq_class *parent = this->tparent; - - this->sibling = this; - qdisc_class_hash_insert(&q->clhash, &this->common); - - if (parent == NULL) - return; - - if (parent->children == NULL) { - parent->children = this; - } else { - this->sibling = parent->children->sibling; - parent->children->sibling = this; - } -} - -static void -cbq_reset(struct Qdisc *sch) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl; - int prio; - unsigned int h; - - q->activemask = 0; - q->pmask = 0; - q->tx_class = NULL; - q->tx_borrowed = NULL; - qdisc_watchdog_cancel(&q->watchdog); - q->toplevel = TC_CBQ_MAXLEVEL; - q->now = psched_get_time(); - - for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++) - q->active[prio] = NULL; - - for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { - qdisc_reset(cl->q); - - cl->next_alive = NULL; - cl->undertime = PSCHED_PASTPERFECT; - cl->avgidle = cl->maxidle; - cl->deficit = cl->quantum; - cl->cpriority = cl->priority; - } - } -} - - -static void cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss) -{ - if (lss->change & TCF_CBQ_LSS_FLAGS) { - cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent; - cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent; - } - if (lss->change & TCF_CBQ_LSS_EWMA) - cl->ewma_log = lss->ewma_log; - if (lss->change & TCF_CBQ_LSS_AVPKT) - cl->avpkt = lss->avpkt; - if (lss->change & TCF_CBQ_LSS_MINIDLE) - cl->minidle = -(long)lss->minidle; - if (lss->change & TCF_CBQ_LSS_MAXIDLE) { - cl->maxidle = lss->maxidle; - cl->avgidle = lss->maxidle; - } - if (lss->change & TCF_CBQ_LSS_OFFTIME) - cl->offtime = lss->offtime; -} - -static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl) -{ - q->nclasses[cl->priority]--; - q->quanta[cl->priority] -= cl->weight; - cbq_normalize_quanta(q, cl->priority); -} - -static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl) -{ - q->nclasses[cl->priority]++; - q->quanta[cl->priority] += cl->weight; - cbq_normalize_quanta(q, cl->priority); -} - -static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr) -{ - struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - - if (wrr->allot) - cl->allot = wrr->allot; - if (wrr->weight) - cl->weight = wrr->weight; - if (wrr->priority) { - cl->priority = wrr->priority - 1; - cl->cpriority = cl->priority; - if (cl->priority >= cl->priority2) - cl->priority2 = TC_CBQ_MAXPRIO - 1; - } - - cbq_addprio(q, cl); - return 0; -} - -static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt) -{ - cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange); - return 0; -} - -static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = { - [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) }, - [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) }, - [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) }, - [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) }, - [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) }, - [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, - [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) }, -}; - -static int cbq_opt_parse(struct nlattr *tb[TCA_CBQ_MAX + 1], - struct nlattr *opt, - struct netlink_ext_ack *extack) -{ - int err; - - if (!opt) { - NL_SET_ERR_MSG(extack, "CBQ options are required for this operation"); - return -EINVAL; - } - - err = nla_parse_nested_deprecated(tb, TCA_CBQ_MAX, opt, - cbq_policy, extack); - if (err < 0) - return err; - - if (tb[TCA_CBQ_WRROPT]) { - const struct tc_cbq_wrropt *wrr = nla_data(tb[TCA_CBQ_WRROPT]); - - if (wrr->priority > TC_CBQ_MAXPRIO) { - NL_SET_ERR_MSG(extack, "priority is bigger than TC_CBQ_MAXPRIO"); - err = -EINVAL; - } - } - return err; -} - -static int cbq_init(struct Qdisc *sch, struct nlattr *opt, - struct netlink_ext_ack *extack) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct nlattr *tb[TCA_CBQ_MAX + 1]; - struct tc_ratespec *r; - int err; - - qdisc_watchdog_init(&q->watchdog, sch); - - err = cbq_opt_parse(tb, opt, extack); - if (err < 0) - return err; - - if (!tb[TCA_CBQ_RTAB] || !tb[TCA_CBQ_RATE]) { - NL_SET_ERR_MSG(extack, "Rate specification missing or incomplete"); - return -EINVAL; - } - - r = nla_data(tb[TCA_CBQ_RATE]); - - q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB], extack); - if (!q->link.R_tab) - return -EINVAL; - - err = tcf_block_get(&q->link.block, &q->link.filter_list, sch, extack); - if (err) - goto put_rtab; - - err = qdisc_class_hash_init(&q->clhash); - if (err < 0) - goto put_block; - - q->link.sibling = &q->link; - q->link.common.classid = sch->handle; - q->link.qdisc = sch; - q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - sch->handle, NULL); - if (!q->link.q) - q->link.q = &noop_qdisc; - else - qdisc_hash_add(q->link.q, true); - - q->link.priority = TC_CBQ_MAXPRIO - 1; - q->link.priority2 = TC_CBQ_MAXPRIO - 1; - q->link.cpriority = TC_CBQ_MAXPRIO - 1; - q->link.allot = psched_mtu(qdisc_dev(sch)); - q->link.quantum = q->link.allot; - q->link.weight = q->link.R_tab->rate.rate; - - q->link.ewma_log = TC_CBQ_DEF_EWMA; - q->link.avpkt = q->link.allot/2; - q->link.minidle = -0x7FFFFFFF; - - q->toplevel = TC_CBQ_MAXLEVEL; - q->now = psched_get_time(); - - cbq_link_class(&q->link); - - if (tb[TCA_CBQ_LSSOPT]) - cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT])); - - cbq_addprio(q, &q->link); - return 0; - -put_block: - tcf_block_put(q->link.block); - -put_rtab: - qdisc_put_rtab(q->link.R_tab); - return err; -} - -static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl) -{ - unsigned char *b = skb_tail_pointer(skb); - - if (nla_put(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate)) - goto nla_put_failure; - return skb->len; - -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} - -static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl) -{ - unsigned char *b = skb_tail_pointer(skb); - struct tc_cbq_lssopt opt; - - opt.flags = 0; - if (cl->borrow == NULL) - opt.flags |= TCF_CBQ_LSS_BOUNDED; - if (cl->share == NULL) - opt.flags |= TCF_CBQ_LSS_ISOLATED; - opt.ewma_log = cl->ewma_log; - opt.level = cl->level; - opt.avpkt = cl->avpkt; - opt.maxidle = cl->maxidle; - opt.minidle = (u32)(-cl->minidle); - opt.offtime = cl->offtime; - opt.change = ~0; - if (nla_put(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt)) - goto nla_put_failure; - return skb->len; - -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} - -static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl) -{ - unsigned char *b = skb_tail_pointer(skb); - struct tc_cbq_wrropt opt; - - memset(&opt, 0, sizeof(opt)); - opt.flags = 0; - opt.allot = cl->allot; - opt.priority = cl->priority + 1; - opt.cpriority = cl->cpriority + 1; - opt.weight = cl->weight; - if (nla_put(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt)) - goto nla_put_failure; - return skb->len; - -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} - -static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl) -{ - unsigned char *b = skb_tail_pointer(skb); - struct tc_cbq_fopt opt; - - if (cl->split || cl->defmap) { - opt.split = cl->split ? cl->split->common.classid : 0; - opt.defmap = cl->defmap; - opt.defchange = ~0; - if (nla_put(skb, TCA_CBQ_FOPT, sizeof(opt), &opt)) - goto nla_put_failure; - } - return skb->len; - -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} - -static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl) -{ - if (cbq_dump_lss(skb, cl) < 0 || - cbq_dump_rate(skb, cl) < 0 || - cbq_dump_wrr(skb, cl) < 0 || - cbq_dump_fopt(skb, cl) < 0) - return -1; - return 0; -} - -static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct nlattr *nest; - - nest = nla_nest_start_noflag(skb, TCA_OPTIONS); - if (nest == NULL) - goto nla_put_failure; - if (cbq_dump_attr(skb, &q->link) < 0) - goto nla_put_failure; - return nla_nest_end(skb, nest); - -nla_put_failure: - nla_nest_cancel(skb, nest); - return -1; -} - -static int -cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - - q->link.xstats.avgidle = q->link.avgidle; - return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats)); -} - -static int -cbq_dump_class(struct Qdisc *sch, unsigned long arg, - struct sk_buff *skb, struct tcmsg *tcm) -{ - struct cbq_class *cl = (struct cbq_class *)arg; - struct nlattr *nest; - - if (cl->tparent) - tcm->tcm_parent = cl->tparent->common.classid; - else - tcm->tcm_parent = TC_H_ROOT; - tcm->tcm_handle = cl->common.classid; - tcm->tcm_info = cl->q->handle; - - nest = nla_nest_start_noflag(skb, TCA_OPTIONS); - if (nest == NULL) - goto nla_put_failure; - if (cbq_dump_attr(skb, cl) < 0) - goto nla_put_failure; - return nla_nest_end(skb, nest); - -nla_put_failure: - nla_nest_cancel(skb, nest); - return -1; -} - -static int -cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, - struct gnet_dump *d) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl = (struct cbq_class *)arg; - __u32 qlen; - - cl->xstats.avgidle = cl->avgidle; - cl->xstats.undertime = 0; - qdisc_qstats_qlen_backlog(cl->q, &qlen, &cl->qstats.backlog); - - if (cl->undertime != PSCHED_PASTPERFECT) - cl->xstats.undertime = cl->undertime - q->now; - - if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || - gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || - gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0) - return -1; - - return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats)); -} - -static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, - struct Qdisc **old, struct netlink_ext_ack *extack) -{ - struct cbq_class *cl = (struct cbq_class *)arg; - - if (new == NULL) { - new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - cl->common.classid, extack); - if (new == NULL) - return -ENOBUFS; - } - - *old = qdisc_replace(sch, new, &cl->q); - return 0; -} - -static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg) -{ - struct cbq_class *cl = (struct cbq_class *)arg; - - return cl->q; -} - -static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg) -{ - struct cbq_class *cl = (struct cbq_class *)arg; - - cbq_deactivate_class(cl); -} - -static unsigned long cbq_find(struct Qdisc *sch, u32 classid) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - - return (unsigned long)cbq_class_lookup(q, classid); -} - -static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - - WARN_ON(cl->filters); - - tcf_block_put(cl->block); - qdisc_put(cl->q); - qdisc_put_rtab(cl->R_tab); - gen_kill_estimator(&cl->rate_est); - if (cl != &q->link) - kfree(cl); -} - -static void cbq_destroy(struct Qdisc *sch) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct hlist_node *next; - struct cbq_class *cl; - unsigned int h; - -#ifdef CONFIG_NET_CLS_ACT - q->rx_class = NULL; -#endif - /* - * Filters must be destroyed first because we don't destroy the - * classes from root to leafs which means that filters can still - * be bound to classes which have been destroyed already. --TGR '04 - */ - for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { - tcf_block_put(cl->block); - cl->block = NULL; - } - } - for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h], - common.hnode) - cbq_destroy_class(sch, cl); - } - qdisc_class_hash_destroy(&q->clhash); -} - -static int -cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca, - unsigned long *arg, struct netlink_ext_ack *extack) -{ - int err; - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl = (struct cbq_class *)*arg; - struct nlattr *opt = tca[TCA_OPTIONS]; - struct nlattr *tb[TCA_CBQ_MAX + 1]; - struct cbq_class *parent; - struct qdisc_rate_table *rtab = NULL; - - err = cbq_opt_parse(tb, opt, extack); - if (err < 0) - return err; - - if (tb[TCA_CBQ_OVL_STRATEGY] || tb[TCA_CBQ_POLICE]) { - NL_SET_ERR_MSG(extack, "Neither overlimit strategy nor policing attributes can be used for changing class params"); - return -EOPNOTSUPP; - } - - if (cl) { - /* Check parent */ - if (parentid) { - if (cl->tparent && - cl->tparent->common.classid != parentid) { - NL_SET_ERR_MSG(extack, "Invalid parent id"); - return -EINVAL; - } - if (!cl->tparent && parentid != TC_H_ROOT) { - NL_SET_ERR_MSG(extack, "Parent must be root"); - return -EINVAL; - } - } - - if (tb[TCA_CBQ_RATE]) { - rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), - tb[TCA_CBQ_RTAB], extack); - if (rtab == NULL) - return -EINVAL; - } - - if (tca[TCA_RATE]) { - err = gen_replace_estimator(&cl->bstats, NULL, - &cl->rate_est, - NULL, - true, - tca[TCA_RATE]); - if (err) { - NL_SET_ERR_MSG(extack, "Failed to replace specified rate estimator"); - qdisc_put_rtab(rtab); - return err; - } - } - - /* Change class parameters */ - sch_tree_lock(sch); - - if (cl->next_alive != NULL) - cbq_deactivate_class(cl); - - if (rtab) { - qdisc_put_rtab(cl->R_tab); - cl->R_tab = rtab; - } - - if (tb[TCA_CBQ_LSSOPT]) - cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT])); - - if (tb[TCA_CBQ_WRROPT]) { - cbq_rmprio(q, cl); - cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT])); - } - - if (tb[TCA_CBQ_FOPT]) - cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT])); - - if (cl->q->q.qlen) - cbq_activate_class(cl); - - sch_tree_unlock(sch); - - return 0; - } - - if (parentid == TC_H_ROOT) - return -EINVAL; - - if (!tb[TCA_CBQ_WRROPT] || !tb[TCA_CBQ_RATE] || !tb[TCA_CBQ_LSSOPT]) { - NL_SET_ERR_MSG(extack, "One of the following attributes MUST be specified: WRR, rate or link sharing"); - return -EINVAL; - } - - rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB], - extack); - if (rtab == NULL) - return -EINVAL; - - if (classid) { - err = -EINVAL; - if (TC_H_MAJ(classid ^ sch->handle) || - cbq_class_lookup(q, classid)) { - NL_SET_ERR_MSG(extack, "Specified class not found"); - goto failure; - } - } else { - int i; - classid = TC_H_MAKE(sch->handle, 0x8000); - - for (i = 0; i < 0x8000; i++) { - if (++q->hgenerator >= 0x8000) - q->hgenerator = 1; - if (cbq_class_lookup(q, classid|q->hgenerator) == NULL) - break; - } - err = -ENOSR; - if (i >= 0x8000) { - NL_SET_ERR_MSG(extack, "Unable to generate classid"); - goto failure; - } - classid = classid|q->hgenerator; - } - - parent = &q->link; - if (parentid) { - parent = cbq_class_lookup(q, parentid); - err = -EINVAL; - if (!parent) { - NL_SET_ERR_MSG(extack, "Failed to find parentid"); - goto failure; - } - } - - err = -ENOBUFS; - cl = kzalloc(sizeof(*cl), GFP_KERNEL); - if (cl == NULL) - goto failure; - - gnet_stats_basic_sync_init(&cl->bstats); - err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); - if (err) { - kfree(cl); - goto failure; - } - - if (tca[TCA_RATE]) { - err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, - NULL, true, tca[TCA_RATE]); - if (err) { - NL_SET_ERR_MSG(extack, "Couldn't create new estimator"); - tcf_block_put(cl->block); - kfree(cl); - goto failure; - } - } - - cl->R_tab = rtab; - rtab = NULL; - cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid, - NULL); - if (!cl->q) - cl->q = &noop_qdisc; - else - qdisc_hash_add(cl->q, true); - - cl->common.classid = classid; - cl->tparent = parent; - cl->qdisc = sch; - cl->allot = parent->allot; - cl->quantum = cl->allot; - cl->weight = cl->R_tab->rate.rate; - - sch_tree_lock(sch); - cbq_link_class(cl); - cl->borrow = cl->tparent; - if (cl->tparent != &q->link) - cl->share = cl->tparent; - cbq_adjust_levels(parent); - cl->minidle = -0x7FFFFFFF; - cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT])); - cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT])); - if (cl->ewma_log == 0) - cl->ewma_log = q->link.ewma_log; - if (cl->maxidle == 0) - cl->maxidle = q->link.maxidle; - if (cl->avpkt == 0) - cl->avpkt = q->link.avpkt; - if (tb[TCA_CBQ_FOPT]) - cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT])); - sch_tree_unlock(sch); - - qdisc_class_hash_grow(sch, &q->clhash); - - *arg = (unsigned long)cl; - return 0; - -failure: - qdisc_put_rtab(rtab); - return err; -} - -static int cbq_delete(struct Qdisc *sch, unsigned long arg, - struct netlink_ext_ack *extack) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl = (struct cbq_class *)arg; - - if (cl->filters || cl->children || cl == &q->link) - return -EBUSY; - - sch_tree_lock(sch); - - qdisc_purge_queue(cl->q); - - if (cl->next_alive) - cbq_deactivate_class(cl); - - if (q->tx_borrowed == cl) - q->tx_borrowed = q->tx_class; - if (q->tx_class == cl) { - q->tx_class = NULL; - q->tx_borrowed = NULL; - } -#ifdef CONFIG_NET_CLS_ACT - if (q->rx_class == cl) - q->rx_class = NULL; -#endif - - cbq_unlink_class(cl); - cbq_adjust_levels(cl->tparent); - cl->defmap = 0; - cbq_sync_defmap(cl); - - cbq_rmprio(q, cl); - sch_tree_unlock(sch); - - cbq_destroy_class(sch, cl); - return 0; -} - -static struct tcf_block *cbq_tcf_block(struct Qdisc *sch, unsigned long arg, - struct netlink_ext_ack *extack) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl = (struct cbq_class *)arg; - - if (cl == NULL) - cl = &q->link; - - return cl->block; -} - -static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent, - u32 classid) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *p = (struct cbq_class *)parent; - struct cbq_class *cl = cbq_class_lookup(q, classid); - - if (cl) { - if (p && p->level <= cl->level) - return 0; - cl->filters++; - return (unsigned long)cl; - } - return 0; -} - -static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg) -{ - struct cbq_class *cl = (struct cbq_class *)arg; - - cl->filters--; -} - -static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl; - unsigned int h; - - if (arg->stop) - return; - - for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { - if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg)) - return; - } - } -} - -static const struct Qdisc_class_ops cbq_class_ops = { - .graft = cbq_graft, - .leaf = cbq_leaf, - .qlen_notify = cbq_qlen_notify, - .find = cbq_find, - .change = cbq_change_class, - .delete = cbq_delete, - .walk = cbq_walk, - .tcf_block = cbq_tcf_block, - .bind_tcf = cbq_bind_filter, - .unbind_tcf = cbq_unbind_filter, - .dump = cbq_dump_class, - .dump_stats = cbq_dump_class_stats, -}; - -static struct Qdisc_ops cbq_qdisc_ops __read_mostly = { - .next = NULL, - .cl_ops = &cbq_class_ops, - .id = "cbq", - .priv_size = sizeof(struct cbq_sched_data), - .enqueue = cbq_enqueue, - .dequeue = cbq_dequeue, - .peek = qdisc_peek_dequeued, - .init = cbq_init, - .reset = cbq_reset, - .destroy = cbq_destroy, - .change = NULL, - .dump = cbq_dump, - .dump_stats = cbq_dump_stats, - .owner = THIS_MODULE, -}; - -static int __init cbq_module_init(void) -{ - return register_qdisc(&cbq_qdisc_ops); -} -static void __exit cbq_module_exit(void) -{ - unregister_qdisc(&cbq_qdisc_ops); -} -module_init(cbq_module_init) -module_exit(cbq_module_exit) -MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c deleted file mode 100644 index 401ffaf87..000000000 --- a/net/sched/sch_dsmark.c +++ /dev/null @@ -1,518 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* net/sched/sch_dsmark.c - Differentiated Services field marker */ - -/* Written 1998-2000 by Werner Almesberger, EPFL ICA */ - - -#include <linux/module.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/types.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/skbuff.h> -#include <linux/rtnetlink.h> -#include <linux/bitops.h> -#include <net/pkt_sched.h> -#include <net/pkt_cls.h> -#include <net/dsfield.h> -#include <net/inet_ecn.h> -#include <asm/byteorder.h> - -/* - * classid class marking - * ------- ----- ------- - * n/a 0 n/a - * x:0 1 use entry [0] - * ... ... ... - * x:y y>0 y+1 use entry [y] - * ... ... ... - * x:indices-1 indices use entry [indices-1] - * ... ... ... - * x:y y+1 use entry [y & (indices-1)] - * ... ... ... - * 0xffff 0x10000 use entry [indices-1] - */ - - -#define NO_DEFAULT_INDEX (1 << 16) - -struct mask_value { - u8 mask; - u8 value; -}; - -struct dsmark_qdisc_data { - struct Qdisc *q; - struct tcf_proto __rcu *filter_list; - struct tcf_block *block; - struct mask_value *mv; - u16 indices; - u8 set_tc_index; - u32 default_index; /* index range is 0...0xffff */ -#define DSMARK_EMBEDDED_SZ 16 - struct mask_value embedded[DSMARK_EMBEDDED_SZ]; -}; - -static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index) -{ - return index <= p->indices && index > 0; -} - -/* ------------------------- Class/flow operations ------------------------- */ - -static int dsmark_graft(struct Qdisc *sch, unsigned long arg, - struct Qdisc *new, struct Qdisc **old, - struct netlink_ext_ack *extack) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - pr_debug("%s(sch %p,[qdisc %p],new %p,old %p)\n", - __func__, sch, p, new, old); - - if (new == NULL) { - new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - sch->handle, NULL); - if (new == NULL) - new = &noop_qdisc; - } - - *old = qdisc_replace(sch, new, &p->q); - return 0; -} - -static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - return p->q; -} - -static unsigned long dsmark_find(struct Qdisc *sch, u32 classid) -{ - return TC_H_MIN(classid) + 1; -} - -static unsigned long dsmark_bind_filter(struct Qdisc *sch, - unsigned long parent, u32 classid) -{ - pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", - __func__, sch, qdisc_priv(sch), classid); - - return dsmark_find(sch, classid); -} - -static void dsmark_unbind_filter(struct Qdisc *sch, unsigned long cl) -{ -} - -static const struct nla_policy dsmark_policy[TCA_DSMARK_MAX + 1] = { - [TCA_DSMARK_INDICES] = { .type = NLA_U16 }, - [TCA_DSMARK_DEFAULT_INDEX] = { .type = NLA_U16 }, - [TCA_DSMARK_SET_TC_INDEX] = { .type = NLA_FLAG }, - [TCA_DSMARK_MASK] = { .type = NLA_U8 }, - [TCA_DSMARK_VALUE] = { .type = NLA_U8 }, -}; - -static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent, - struct nlattr **tca, unsigned long *arg, - struct netlink_ext_ack *extack) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - struct nlattr *opt = tca[TCA_OPTIONS]; - struct nlattr *tb[TCA_DSMARK_MAX + 1]; - int err = -EINVAL; - - pr_debug("%s(sch %p,[qdisc %p],classid %x,parent %x), arg 0x%lx\n", - __func__, sch, p, classid, parent, *arg); - - if (!dsmark_valid_index(p, *arg)) { - err = -ENOENT; - goto errout; - } - - if (!opt) - goto errout; - - err = nla_parse_nested_deprecated(tb, TCA_DSMARK_MAX, opt, - dsmark_policy, NULL); - if (err < 0) - goto errout; - - if (tb[TCA_DSMARK_VALUE]) - p->mv[*arg - 1].value = nla_get_u8(tb[TCA_DSMARK_VALUE]); - - if (tb[TCA_DSMARK_MASK]) - p->mv[*arg - 1].mask = nla_get_u8(tb[TCA_DSMARK_MASK]); - - err = 0; - -errout: - return err; -} - -static int dsmark_delete(struct Qdisc *sch, unsigned long arg, - struct netlink_ext_ack *extack) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - if (!dsmark_valid_index(p, arg)) - return -EINVAL; - - p->mv[arg - 1].mask = 0xff; - p->mv[arg - 1].value = 0; - - return 0; -} - -static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - int i; - - pr_debug("%s(sch %p,[qdisc %p],walker %p)\n", - __func__, sch, p, walker); - - if (walker->stop) - return; - - for (i = 0; i < p->indices; i++) { - if (p->mv[i].mask == 0xff && !p->mv[i].value) { - walker->count++; - continue; - } - if (!tc_qdisc_stats_dump(sch, i + 1, walker)) - break; - } -} - -static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl, - struct netlink_ext_ack *extack) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - return p->block; -} - -/* --------------------------- Qdisc operations ---------------------------- */ - -static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, - struct sk_buff **to_free) -{ - unsigned int len = qdisc_pkt_len(skb); - struct dsmark_qdisc_data *p = qdisc_priv(sch); - int err; - - pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p); - - if (p->set_tc_index) { - int wlen = skb_network_offset(skb); - - switch (skb_protocol(skb, true)) { - case htons(ETH_P_IP): - wlen += sizeof(struct iphdr); - if (!pskb_may_pull(skb, wlen) || - skb_try_make_writable(skb, wlen)) - goto drop; - - skb->tc_index = ipv4_get_dsfield(ip_hdr(skb)) - & ~INET_ECN_MASK; - break; - - case htons(ETH_P_IPV6): - wlen += sizeof(struct ipv6hdr); - if (!pskb_may_pull(skb, wlen) || - skb_try_make_writable(skb, wlen)) - goto drop; - - skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb)) - & ~INET_ECN_MASK; - break; - default: - skb->tc_index = 0; - break; - } - } - - if (TC_H_MAJ(skb->priority) == sch->handle) - skb->tc_index = TC_H_MIN(skb->priority); - else { - struct tcf_result res; - struct tcf_proto *fl = rcu_dereference_bh(p->filter_list); - int result = tcf_classify(skb, NULL, fl, &res, false); - - pr_debug("result %d class 0x%04x\n", result, res.classid); - - switch (result) { -#ifdef CONFIG_NET_CLS_ACT - case TC_ACT_QUEUED: - case TC_ACT_STOLEN: - case TC_ACT_TRAP: - __qdisc_drop(skb, to_free); - return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - - case TC_ACT_SHOT: - goto drop; -#endif - case TC_ACT_OK: - skb->tc_index = TC_H_MIN(res.classid); - break; - - default: - if (p->default_index != NO_DEFAULT_INDEX) - skb->tc_index = p->default_index; - break; - } - } - - err = qdisc_enqueue(skb, p->q, to_free); - if (err != NET_XMIT_SUCCESS) { - if (net_xmit_drop_count(err)) - qdisc_qstats_drop(sch); - return err; - } - - sch->qstats.backlog += len; - sch->q.qlen++; - - return NET_XMIT_SUCCESS; - -drop: - qdisc_drop(skb, sch, to_free); - return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; -} - -static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - struct sk_buff *skb; - u32 index; - - pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); - - skb = qdisc_dequeue_peeked(p->q); - if (skb == NULL) - return NULL; - - qdisc_bstats_update(sch, skb); - qdisc_qstats_backlog_dec(sch, skb); - sch->q.qlen--; - - index = skb->tc_index & (p->indices - 1); - pr_debug("index %d->%d\n", skb->tc_index, index); - - switch (skb_protocol(skb, true)) { - case htons(ETH_P_IP): - ipv4_change_dsfield(ip_hdr(skb), p->mv[index].mask, - p->mv[index].value); - break; - case htons(ETH_P_IPV6): - ipv6_change_dsfield(ipv6_hdr(skb), p->mv[index].mask, - p->mv[index].value); - break; - default: - /* - * Only complain if a change was actually attempted. - * This way, we can send non-IP traffic through dsmark - * and don't need yet another qdisc as a bypass. - */ - if (p->mv[index].mask != 0xff || p->mv[index].value) - pr_warn("%s: unsupported protocol %d\n", - __func__, ntohs(skb_protocol(skb, true))); - break; - } - - return skb; -} - -static struct sk_buff *dsmark_peek(struct Qdisc *sch) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); - - return p->q->ops->peek(p->q); -} - -static int dsmark_init(struct Qdisc *sch, struct nlattr *opt, - struct netlink_ext_ack *extack) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - struct nlattr *tb[TCA_DSMARK_MAX + 1]; - int err = -EINVAL; - u32 default_index = NO_DEFAULT_INDEX; - u16 indices; - int i; - - pr_debug("%s(sch %p,[qdisc %p],opt %p)\n", __func__, sch, p, opt); - - if (!opt) - goto errout; - - err = tcf_block_get(&p->block, &p->filter_list, sch, extack); - if (err) - return err; - - err = nla_parse_nested_deprecated(tb, TCA_DSMARK_MAX, opt, - dsmark_policy, NULL); - if (err < 0) - goto errout; - - err = -EINVAL; - if (!tb[TCA_DSMARK_INDICES]) - goto errout; - indices = nla_get_u16(tb[TCA_DSMARK_INDICES]); - - if (hweight32(indices) != 1) - goto errout; - - if (tb[TCA_DSMARK_DEFAULT_INDEX]) - default_index = nla_get_u16(tb[TCA_DSMARK_DEFAULT_INDEX]); - - if (indices <= DSMARK_EMBEDDED_SZ) - p->mv = p->embedded; - else - p->mv = kmalloc_array(indices, sizeof(*p->mv), GFP_KERNEL); - if (!p->mv) { - err = -ENOMEM; - goto errout; - } - for (i = 0; i < indices; i++) { - p->mv[i].mask = 0xff; - p->mv[i].value = 0; - } - p->indices = indices; - p->default_index = default_index; - p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]); - - p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle, - NULL); - if (p->q == NULL) - p->q = &noop_qdisc; - else - qdisc_hash_add(p->q, true); - - pr_debug("%s: qdisc %p\n", __func__, p->q); - - err = 0; -errout: - return err; -} - -static void dsmark_reset(struct Qdisc *sch) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); - if (p->q) - qdisc_reset(p->q); -} - -static void dsmark_destroy(struct Qdisc *sch) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); - - tcf_block_put(p->block); - qdisc_put(p->q); - if (p->mv != p->embedded) - kfree(p->mv); -} - -static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl, - struct sk_buff *skb, struct tcmsg *tcm) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - struct nlattr *opts = NULL; - - pr_debug("%s(sch %p,[qdisc %p],class %ld\n", __func__, sch, p, cl); - - if (!dsmark_valid_index(p, cl)) - return -EINVAL; - - tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl - 1); - tcm->tcm_info = p->q->handle; - - opts = nla_nest_start_noflag(skb, TCA_OPTIONS); - if (opts == NULL) - goto nla_put_failure; - if (nla_put_u8(skb, TCA_DSMARK_MASK, p->mv[cl - 1].mask) || - nla_put_u8(skb, TCA_DSMARK_VALUE, p->mv[cl - 1].value)) - goto nla_put_failure; - - return nla_nest_end(skb, opts); - -nla_put_failure: - nla_nest_cancel(skb, opts); - return -EMSGSIZE; -} - -static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - struct nlattr *opts = NULL; - - opts = nla_nest_start_noflag(skb, TCA_OPTIONS); - if (opts == NULL) - goto nla_put_failure; - if (nla_put_u16(skb, TCA_DSMARK_INDICES, p->indices)) - goto nla_put_failure; - - if (p->default_index != NO_DEFAULT_INDEX && - nla_put_u16(skb, TCA_DSMARK_DEFAULT_INDEX, p->default_index)) - goto nla_put_failure; - - if (p->set_tc_index && - nla_put_flag(skb, TCA_DSMARK_SET_TC_INDEX)) - goto nla_put_failure; - - return nla_nest_end(skb, opts); - -nla_put_failure: - nla_nest_cancel(skb, opts); - return -EMSGSIZE; -} - -static const struct Qdisc_class_ops dsmark_class_ops = { - .graft = dsmark_graft, - .leaf = dsmark_leaf, - .find = dsmark_find, - .change = dsmark_change, - .delete = dsmark_delete, - .walk = dsmark_walk, - .tcf_block = dsmark_tcf_block, - .bind_tcf = dsmark_bind_filter, - .unbind_tcf = dsmark_unbind_filter, - .dump = dsmark_dump_class, -}; - -static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = { - .next = NULL, - .cl_ops = &dsmark_class_ops, - .id = "dsmark", - .priv_size = sizeof(struct dsmark_qdisc_data), - .enqueue = dsmark_enqueue, - .dequeue = dsmark_dequeue, - .peek = dsmark_peek, - .init = dsmark_init, - .reset = dsmark_reset, - .destroy = dsmark_destroy, - .change = NULL, - .dump = dsmark_dump, - .owner = THIS_MODULE, -}; - -static int __init dsmark_module_init(void) -{ - return register_qdisc(&dsmark_qdisc_ops); -} - -static void __exit dsmark_module_exit(void) -{ - unregister_qdisc(&dsmark_qdisc_ops); -} - -module_init(dsmark_module_init) -module_exit(dsmark_module_exit) - -MODULE_LICENSE("GPL"); diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 9b8999e2a..867df4522 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -155,10 +155,12 @@ static int smc_clc_ueid_remove(char *ueid) rc = 0; } } +#if IS_ENABLED(CONFIG_S390) if (!rc && !smc_clc_eid_table.ueid_cnt) { smc_clc_eid_table.seid_enabled = 1; rc = -EAGAIN; /* indicate success and enabling of seid */ } +#endif write_unlock(&smc_clc_eid_table.lock); return rc; } @@ -273,22 +275,30 @@ err: int smc_nl_enable_seid(struct sk_buff *skb, struct genl_info *info) { +#if IS_ENABLED(CONFIG_S390) write_lock(&smc_clc_eid_table.lock); smc_clc_eid_table.seid_enabled = 1; write_unlock(&smc_clc_eid_table.lock); return 0; +#else + return -EOPNOTSUPP; +#endif } int smc_nl_disable_seid(struct sk_buff *skb, struct genl_info *info) { int rc = 0; +#if IS_ENABLED(CONFIG_S390) write_lock(&smc_clc_eid_table.lock); if (!smc_clc_eid_table.ueid_cnt) rc = -ENOENT; else smc_clc_eid_table.seid_enabled = 0; write_unlock(&smc_clc_eid_table.lock); +#else + rc = -EOPNOTSUPP; +#endif return rc; } @@ -1168,7 +1178,11 @@ void __init smc_clc_init(void) INIT_LIST_HEAD(&smc_clc_eid_table.list); rwlock_init(&smc_clc_eid_table.lock); smc_clc_eid_table.ueid_cnt = 0; +#if IS_ENABLED(CONFIG_S390) smc_clc_eid_table.seid_enabled = 1; +#else + smc_clc_eid_table.seid_enabled = 0; +#endif } void smc_clc_exit(void) diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c index 74ee22712..720d3ba74 100644 --- a/net/sunrpc/xprtmultipath.c +++ b/net/sunrpc/xprtmultipath.c @@ -336,8 +336,9 @@ struct rpc_xprt *xprt_iter_current_entry_offline(struct rpc_xprt_iter *xpi) xprt_switch_find_current_entry_offline); } -bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, - const struct sockaddr *sap) +static +bool __rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, + const struct sockaddr *sap) { struct list_head *head; struct rpc_xprt *pos; @@ -356,6 +357,18 @@ bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, return false; } +bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, + const struct sockaddr *sap) +{ + bool res; + + rcu_read_lock(); + res = __rpc_xprt_switch_has_addr(xps, sap); + rcu_read_unlock(); + + return res; +} + static struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head, const struct rpc_xprt *cur, bool check_active) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 8cc42aea1..2e14d4c37 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -19,6 +19,35 @@ #include <linux/rtnetlink.h> #include <net/switchdev.h> +static bool switchdev_obj_eq(const struct switchdev_obj *a, + const struct switchdev_obj *b) +{ + const struct switchdev_obj_port_vlan *va, *vb; + const struct switchdev_obj_port_mdb *ma, *mb; + + if (a->id != b->id || a->orig_dev != b->orig_dev) + return false; + + switch (a->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + va = SWITCHDEV_OBJ_PORT_VLAN(a); + vb = SWITCHDEV_OBJ_PORT_VLAN(b); + return va->flags == vb->flags && + va->vid == vb->vid && + va->changed == vb->changed; + case SWITCHDEV_OBJ_ID_PORT_MDB: + case SWITCHDEV_OBJ_ID_HOST_MDB: + ma = SWITCHDEV_OBJ_PORT_MDB(a); + mb = SWITCHDEV_OBJ_PORT_MDB(b); + return ma->vid == mb->vid && + ether_addr_equal(ma->addr, mb->addr); + default: + break; + } + + BUG(); +} + static LIST_HEAD(deferred); static DEFINE_SPINLOCK(deferred_lock); @@ -307,6 +336,50 @@ int switchdev_port_obj_del(struct net_device *dev, } EXPORT_SYMBOL_GPL(switchdev_port_obj_del); +/** + * switchdev_port_obj_act_is_deferred - Is object action pending? + * + * @dev: port device + * @nt: type of action; add or delete + * @obj: object to test + * + * Returns true if a deferred item is pending, which is + * equivalent to the action @nt on an object @obj. + * + * rtnl_lock must be held. + */ +bool switchdev_port_obj_act_is_deferred(struct net_device *dev, + enum switchdev_notifier_type nt, + const struct switchdev_obj *obj) +{ + struct switchdev_deferred_item *dfitem; + bool found = false; + + ASSERT_RTNL(); + + spin_lock_bh(&deferred_lock); + + list_for_each_entry(dfitem, &deferred, list) { + if (dfitem->dev != dev) + continue; + + if ((dfitem->func == switchdev_port_obj_add_deferred && + nt == SWITCHDEV_PORT_OBJ_ADD) || + (dfitem->func == switchdev_port_obj_del_deferred && + nt == SWITCHDEV_PORT_OBJ_DEL)) { + if (switchdev_obj_eq((const void *)dfitem->data, obj)) { + found = true; + break; + } + } + } + + spin_unlock_bh(&deferred_lock); + + return found; +} +EXPORT_SYMBOL_GPL(switchdev_port_obj_act_is_deferred); + static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain); static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index cdcd27318..1cb993562 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -1088,6 +1088,12 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) #ifdef CONFIG_TIPC_MEDIA_UDP if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) { + if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) { + rtnl_unlock(); + NL_SET_ERR_MSG(info->extack, "UDP option is unsupported"); + return -EINVAL; + } + err = tipc_udp_nl_bearer_add(b, attrs[TIPC_NLA_BEARER_UDP_OPTS]); if (err) { diff --git a/net/tls/tls.h b/net/tls/tls.h index 0672acab2..4922668fe 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -97,6 +97,7 @@ void tls_update_rx_zc_capable(struct tls_context *tls_ctx); void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); void tls_sw_strparser_done(struct tls_context *tls_ctx); int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); +void tls_sw_splice_eof(struct socket *sock); int tls_sw_sendpage_locked(struct sock *sk, struct page *page, int offset, size_t size, int flags); int tls_sw_sendpage(struct sock *sk, struct page *page, diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 338a443fa..6b7189a52 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -922,6 +922,7 @@ static void build_proto_ops(struct proto_ops ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG] ops[TLS_BASE][TLS_BASE] = *base; ops[TLS_SW ][TLS_BASE] = ops[TLS_BASE][TLS_BASE]; + ops[TLS_SW ][TLS_BASE].splice_eof = tls_sw_splice_eof; ops[TLS_SW ][TLS_BASE].sendpage_locked = tls_sw_sendpage_locked; ops[TLS_BASE][TLS_SW ] = ops[TLS_BASE][TLS_BASE]; @@ -990,6 +991,7 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], prot[TLS_SW][TLS_BASE] = prot[TLS_BASE][TLS_BASE]; prot[TLS_SW][TLS_BASE].sendmsg = tls_sw_sendmsg; + prot[TLS_SW][TLS_BASE].splice_eof = tls_sw_splice_eof; prot[TLS_SW][TLS_BASE].sendpage = tls_sw_sendpage; prot[TLS_BASE][TLS_SW] = prot[TLS_BASE][TLS_BASE]; @@ -1096,7 +1098,7 @@ static u16 tls_user_config(struct tls_context *ctx, bool tx) return 0; } -static int tls_get_info(const struct sock *sk, struct sk_buff *skb) +static int tls_get_info(struct sock *sk, struct sk_buff *skb) { u16 version, cipher_type; struct tls_context *ctx; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 0323040d3..2bd27b777 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -62,6 +62,7 @@ struct tls_decrypt_ctx { u8 iv[MAX_IV_SIZE]; u8 aad[TLS_MAX_AAD_SIZE]; u8 tail; + bool free_sgout; struct scatterlist sg[]; }; @@ -186,7 +187,6 @@ static void tls_decrypt_done(crypto_completion_data_t *data, int err) struct aead_request *aead_req = crypto_get_completion_data(data); struct crypto_aead *aead = crypto_aead_reqtfm(aead_req); struct scatterlist *sgout = aead_req->dst; - struct scatterlist *sgin = aead_req->src; struct tls_sw_context_rx *ctx; struct tls_decrypt_ctx *dctx; struct tls_context *tls_ctx; @@ -212,7 +212,7 @@ static void tls_decrypt_done(crypto_completion_data_t *data, int err) } /* Free the destination pages if skb was not decrypted inplace */ - if (sgout != sgin) { + if (dctx->free_sgout) { /* Skip the first S/G entry as it points to AAD */ for_each_sg(sg_next(sgout), sg, UINT_MAX, pages) { if (!sg) @@ -223,10 +223,17 @@ static void tls_decrypt_done(crypto_completion_data_t *data, int err) kfree(aead_req); - spin_lock_bh(&ctx->decrypt_compl_lock); - if (!atomic_dec_return(&ctx->decrypt_pending)) + if (atomic_dec_and_test(&ctx->decrypt_pending)) complete(&ctx->async_wait.completion); - spin_unlock_bh(&ctx->decrypt_compl_lock); +} + +static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx) +{ + if (!atomic_dec_and_test(&ctx->decrypt_pending)) + crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + atomic_inc(&ctx->decrypt_pending); + + return ctx->async_wait.err; } static int tls_do_decryption(struct sock *sk, @@ -252,6 +259,7 @@ static int tls_do_decryption(struct sock *sk, aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, tls_decrypt_done, aead_req); + DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->decrypt_pending) < 1); atomic_inc(&ctx->decrypt_pending); } else { aead_request_set_callback(aead_req, @@ -265,6 +273,8 @@ static int tls_do_decryption(struct sock *sk, return 0; ret = crypto_wait_req(ret, &ctx->async_wait); + } else if (darg->async) { + atomic_dec(&ctx->decrypt_pending); } darg->async = false; @@ -441,7 +451,6 @@ static void tls_encrypt_done(crypto_completion_data_t *data, int err) struct tls_rec *rec; bool ready = false; struct sock *sk; - int pending; rec = container_of(aead_req, struct tls_rec, aead_req); msg_en = &rec->msg_encrypted; @@ -481,12 +490,8 @@ static void tls_encrypt_done(crypto_completion_data_t *data, int err) ready = true; } - spin_lock_bh(&ctx->encrypt_compl_lock); - pending = atomic_dec_return(&ctx->encrypt_pending); - - if (!pending && ctx->async_notify) + if (atomic_dec_and_test(&ctx->encrypt_pending)) complete(&ctx->async_wait.completion); - spin_unlock_bh(&ctx->encrypt_compl_lock); if (!ready) return; @@ -496,6 +501,15 @@ static void tls_encrypt_done(crypto_completion_data_t *data, int err) schedule_delayed_work(&ctx->tx_work.work, 1); } +static int tls_encrypt_async_wait(struct tls_sw_context_tx *ctx) +{ + if (!atomic_dec_and_test(&ctx->encrypt_pending)) + crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + atomic_inc(&ctx->encrypt_pending); + + return ctx->async_wait.err; +} + static int tls_do_encryption(struct sock *sk, struct tls_context *tls_ctx, struct tls_sw_context_tx *ctx, @@ -542,6 +556,7 @@ static int tls_do_encryption(struct sock *sk, /* Add the record in tx_list */ list_add_tail((struct list_head *)&rec->list, &ctx->tx_list); + DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->encrypt_pending) < 1); atomic_inc(&ctx->encrypt_pending); rc = crypto_aead_encrypt(aead_req); @@ -953,7 +968,6 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) int num_zc = 0; int orig_size; int ret = 0; - int pending; if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_CMSG_COMPAT)) @@ -1122,24 +1136,12 @@ trim_sgl: if (!num_async) { goto send_end; } else if (num_zc) { - /* Wait for pending encryptions to get completed */ - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - if (pending) - crypto_wait_req(-EINPROGRESS, &ctx->async_wait); - else - reinit_completion(&ctx->async_wait.completion); - - /* There can be no concurrent accesses, since we have no - * pending encrypt operations - */ - WRITE_ONCE(ctx->async_notify, false); + int err; - if (ctx->async_wait.err) { - ret = ctx->async_wait.err; + /* Wait for pending encryptions to get completed */ + err = tls_encrypt_async_wait(ctx); + if (err) { + ret = err; copied = 0; } } @@ -1158,6 +1160,67 @@ send_end: return copied > 0 ? copied : ret; } +/* + * Handle unexpected EOF during splice without SPLICE_F_MORE set. + */ +void tls_sw_splice_eof(struct socket *sock) +{ + struct sock *sk = sock->sk; + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); + struct tls_rec *rec; + struct sk_msg *msg_pl; + ssize_t copied = 0; + bool retrying = false; + int ret = 0; + + if (!ctx->open_rec) + return; + + mutex_lock(&tls_ctx->tx_lock); + lock_sock(sk); + +retry: + /* same checks as in tls_sw_push_pending_record() */ + rec = ctx->open_rec; + if (!rec) + goto unlock; + + msg_pl = &rec->msg_plaintext; + if (msg_pl->sg.size == 0) + goto unlock; + + /* Check the BPF advisor and perform transmission. */ + ret = bpf_exec_tx_verdict(msg_pl, sk, false, TLS_RECORD_TYPE_DATA, + &copied, 0); + switch (ret) { + case 0: + case -EAGAIN: + if (retrying) + goto unlock; + retrying = true; + goto retry; + case -EINPROGRESS: + break; + default: + goto unlock; + } + + /* Wait for pending encryptions to get completed */ + if (tls_encrypt_async_wait(ctx)) + goto unlock; + + /* Transmit if any encryptions have completed */ + if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) { + cancel_delayed_work(&ctx->tx_work.work); + tls_tx_records(sk, 0); + } + +unlock: + release_sock(sk); + mutex_unlock(&tls_ctx->tx_lock); +} + static int tls_sw_do_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags) { @@ -1595,6 +1658,7 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, } else if (out_sg) { memcpy(sgout, out_sg, n_sgout * sizeof(*sgout)); } + dctx->free_sgout = !!pages; /* Prepare and submit AEAD request */ err = tls_do_decryption(sk, sgin, sgout, dctx->iv, @@ -1783,7 +1847,8 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, u8 *control, size_t skip, size_t len, - bool is_peek) + bool is_peek, + bool *more) { struct sk_buff *skb = skb_peek(&ctx->rx_list); struct tls_msg *tlm; @@ -1796,7 +1861,7 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, err = tls_record_content_type(msg, tlm, control); if (err <= 0) - goto out; + goto more; if (skip < rxm->full_len) break; @@ -1814,12 +1879,12 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, err = tls_record_content_type(msg, tlm, control); if (err <= 0) - goto out; + goto more; err = skb_copy_datagram_msg(skb, rxm->offset + skip, msg, chunk); if (err < 0) - goto out; + goto more; len = len - chunk; copied = copied + chunk; @@ -1855,6 +1920,10 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, out: return copied ? : err; +more: + if (more) + *more = true; + goto out; } static bool @@ -1954,10 +2023,12 @@ int tls_sw_recvmsg(struct sock *sk, struct strp_msg *rxm; struct tls_msg *tlm; ssize_t copied = 0; + ssize_t peeked = 0; bool async = false; int target, err; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); bool is_peek = flags & MSG_PEEK; + bool rx_more = false; bool released = true; bool bpf_strp_enabled; bool zc_capable; @@ -1977,12 +2048,12 @@ int tls_sw_recvmsg(struct sock *sk, goto end; /* Process pending decrypted records. It must be non-zero-copy */ - err = process_rx_list(ctx, msg, &control, 0, len, is_peek); + err = process_rx_list(ctx, msg, &control, 0, len, is_peek, &rx_more); if (err < 0) goto end; copied = err; - if (len <= copied) + if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA) || rx_more) goto end; target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); @@ -2075,6 +2146,8 @@ put_on_rx_list: decrypted += chunk; len -= chunk; __skb_queue_tail(&ctx->rx_list, skb); + if (unlikely(control != TLS_RECORD_TYPE_DATA)) + break; continue; } @@ -2098,8 +2171,10 @@ put_on_rx_list: if (err < 0) goto put_on_rx_list_err; - if (is_peek) + if (is_peek) { + peeked += chunk; goto put_on_rx_list; + } if (partially_consumed) { rxm->offset += chunk; @@ -2123,16 +2198,10 @@ put_on_rx_list: recv_end: if (async) { - int ret, pending; + int ret; /* Wait for all previously submitted records to be decrypted */ - spin_lock_bh(&ctx->decrypt_compl_lock); - reinit_completion(&ctx->async_wait.completion); - pending = atomic_read(&ctx->decrypt_pending); - spin_unlock_bh(&ctx->decrypt_compl_lock); - ret = 0; - if (pending) - ret = crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + ret = tls_decrypt_async_wait(ctx); __skb_queue_purge(&ctx->async_hold); if (ret) { @@ -2144,12 +2213,11 @@ recv_end: /* Drain records from the rx_list & copy if required */ if (is_peek || is_kvec) - err = process_rx_list(ctx, msg, &control, copied, - decrypted, is_peek); + err = process_rx_list(ctx, msg, &control, copied + peeked, + decrypted - peeked, is_peek, NULL); else err = process_rx_list(ctx, msg, &control, 0, - async_copy_bytes, is_peek); - decrypted += max(err, 0); + async_copy_bytes, is_peek, NULL); } copied += decrypted; @@ -2351,16 +2419,9 @@ void tls_sw_release_resources_tx(struct sock *sk) struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec, *tmp; - int pending; /* Wait for any pending async encryptions to complete */ - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - - if (pending) - crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + tls_encrypt_async_wait(ctx); tls_tx_records(sk, -1); @@ -2513,6 +2574,48 @@ void tls_update_rx_zc_capable(struct tls_context *tls_ctx) tls_ctx->prot_info.version != TLS_1_3_VERSION; } +static struct tls_sw_context_tx *init_ctx_tx(struct tls_context *ctx, struct sock *sk) +{ + struct tls_sw_context_tx *sw_ctx_tx; + + if (!ctx->priv_ctx_tx) { + sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL); + if (!sw_ctx_tx) + return NULL; + } else { + sw_ctx_tx = ctx->priv_ctx_tx; + } + + crypto_init_wait(&sw_ctx_tx->async_wait); + atomic_set(&sw_ctx_tx->encrypt_pending, 1); + INIT_LIST_HEAD(&sw_ctx_tx->tx_list); + INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler); + sw_ctx_tx->tx_work.sk = sk; + + return sw_ctx_tx; +} + +static struct tls_sw_context_rx *init_ctx_rx(struct tls_context *ctx) +{ + struct tls_sw_context_rx *sw_ctx_rx; + + if (!ctx->priv_ctx_rx) { + sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL); + if (!sw_ctx_rx) + return NULL; + } else { + sw_ctx_rx = ctx->priv_ctx_rx; + } + + crypto_init_wait(&sw_ctx_rx->async_wait); + atomic_set(&sw_ctx_rx->decrypt_pending, 1); + init_waitqueue_head(&sw_ctx_rx->wq); + skb_queue_head_init(&sw_ctx_rx->rx_list); + skb_queue_head_init(&sw_ctx_rx->async_hold); + + return sw_ctx_rx; +} + int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) { struct tls_context *tls_ctx = tls_get_ctx(sk); @@ -2534,48 +2637,22 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) } if (tx) { - if (!ctx->priv_ctx_tx) { - sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL); - if (!sw_ctx_tx) { - rc = -ENOMEM; - goto out; - } - ctx->priv_ctx_tx = sw_ctx_tx; - } else { - sw_ctx_tx = - (struct tls_sw_context_tx *)ctx->priv_ctx_tx; - } - } else { - if (!ctx->priv_ctx_rx) { - sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL); - if (!sw_ctx_rx) { - rc = -ENOMEM; - goto out; - } - ctx->priv_ctx_rx = sw_ctx_rx; - } else { - sw_ctx_rx = - (struct tls_sw_context_rx *)ctx->priv_ctx_rx; - } - } + ctx->priv_ctx_tx = init_ctx_tx(ctx, sk); + if (!ctx->priv_ctx_tx) + return -ENOMEM; - if (tx) { - crypto_init_wait(&sw_ctx_tx->async_wait); - spin_lock_init(&sw_ctx_tx->encrypt_compl_lock); + sw_ctx_tx = ctx->priv_ctx_tx; crypto_info = &ctx->crypto_send.info; cctx = &ctx->tx; aead = &sw_ctx_tx->aead_send; - INIT_LIST_HEAD(&sw_ctx_tx->tx_list); - INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler); - sw_ctx_tx->tx_work.sk = sk; } else { - crypto_init_wait(&sw_ctx_rx->async_wait); - spin_lock_init(&sw_ctx_rx->decrypt_compl_lock); - init_waitqueue_head(&sw_ctx_rx->wq); + ctx->priv_ctx_rx = init_ctx_rx(ctx); + if (!ctx->priv_ctx_rx) + return -ENOMEM; + + sw_ctx_rx = ctx->priv_ctx_rx; crypto_info = &ctx->crypto_recv.info; cctx = &ctx->rx; - skb_queue_head_init(&sw_ctx_rx->rx_list); - skb_queue_head_init(&sw_ctx_rx->async_hold); aead = &sw_ctx_rx->aead_recv; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index be2ed7b0f..e1af94393 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1322,13 +1322,11 @@ static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) unix_state_lock(sk1); return; } - if (sk1 < sk2) { - unix_state_lock(sk1); - unix_state_lock_nested(sk2); - } else { - unix_state_lock(sk2); - unix_state_lock_nested(sk1); - } + if (sk1 > sk2) + swap(sk1, sk2); + + unix_state_lock(sk1); + unix_state_lock_nested(sk2, U_LOCK_SECOND); } static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) @@ -1559,7 +1557,7 @@ restart: goto out_unlock; } - unix_state_lock_nested(sk); + unix_state_lock_nested(sk, U_LOCK_SECOND); if (sk->sk_state != st) { unix_state_unlock(sk); diff --git a/net/unix/diag.c b/net/unix/diag.c index 616b55c5b..3438b7af0 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -84,7 +84,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb) * queue lock. With the other's queue locked it's * OK to lock the state. */ - unix_state_lock_nested(req); + unix_state_lock_nested(req, U_LOCK_DIAG); peer = unix_sk(req)->peer; buf[i++] = (peer ? sock_i_ino(peer) : 0); unix_state_unlock(req); diff --git a/net/unix/garbage.c b/net/unix/garbage.c index dc2763540..ab2c83d58 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -284,9 +284,17 @@ void unix_gc(void) * which are creating the cycle(s). */ skb_queue_head_init(&hitlist); - list_for_each_entry(u, &gc_candidates, link) + list_for_each_entry(u, &gc_candidates, link) { scan_children(&u->sk, inc_inflight, &hitlist); +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (u->oob_skb) { + kfree_skb(u->oob_skb); + u->oob_skb = NULL; + } +#endif + } + /* not_cycle_list contains those sockets which do not make up a * cycle. Restore these to the inflight list. */ diff --git a/net/wireless/core.c b/net/wireless/core.c index 8809e668e..3fcddc868 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1671,6 +1671,7 @@ void wiphy_delayed_work_queue(struct wiphy *wiphy, unsigned long delay) { if (!delay) { + del_timer(&dwork->timer); wiphy_work_queue(wiphy, &dwork->work); return; } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 70fb14b8b..1a3bd554e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3960,6 +3960,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * if_idx++; } + if_start = 0; wp_idx++; } out: @@ -4136,6 +4137,8 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) if (ntype != NL80211_IFTYPE_MESH_POINT) return -EINVAL; + if (otype != NL80211_IFTYPE_MESH_POINT) + return -EINVAL; if (netif_running(dev)) return -EBUSY; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index b7e1631b3..3ad4c1032 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1810,8 +1810,12 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, list_add(&new->hidden_list, &hidden->hidden_list); hidden->refcount++; + + ies = (void *)rcu_access_pointer(new->pub.beacon_ies); rcu_assign_pointer(new->pub.beacon_ies, hidden->pub.beacon_ies); + if (ies) + kfree_rcu(ies, rcu_head); } } else { /* diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index ac1a645af..d0320e35a 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -180,6 +180,8 @@ static int xfrm4_remove_beet_encap(struct xfrm_state *x, struct sk_buff *skb) int optlen = 0; int err = -EINVAL; + skb->protocol = htons(ETH_P_IP); + if (unlikely(XFRM_MODE_SKB_CB(skb)->protocol == IPPROTO_BEETPH)) { struct ip_beet_phdr *ph; int phlen; @@ -232,8 +234,7 @@ static int xfrm4_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb) { int err = -EINVAL; - if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP) - goto out; + skb->protocol = htons(ETH_P_IP); if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto out; @@ -270,8 +271,8 @@ static int xfrm6_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb) { int err = -EINVAL; - if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6) - goto out; + skb->protocol = htons(ETH_P_IPV6); + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; @@ -301,6 +302,8 @@ static int xfrm6_remove_beet_encap(struct xfrm_state *x, struct sk_buff *skb) int size = sizeof(struct ipv6hdr); int err; + skb->protocol = htons(ETH_P_IPV6); + err = skb_cow_head(skb, size + skb->mac_len); if (err) goto out; @@ -332,22 +335,26 @@ out: */ static int xfrm_inner_mode_encap_remove(struct xfrm_state *x, - const struct xfrm_mode *inner_mode, struct sk_buff *skb) { - switch (inner_mode->encap) { + switch (x->props.mode) { case XFRM_MODE_BEET: - if (inner_mode->family == AF_INET) + switch (x->sel.family) { + case AF_INET: return xfrm4_remove_beet_encap(x, skb); - if (inner_mode->family == AF_INET6) + case AF_INET6: return xfrm6_remove_beet_encap(x, skb); + } break; case XFRM_MODE_TUNNEL: - if (inner_mode->family == AF_INET) + switch (XFRM_MODE_SKB_CB(skb)->protocol) { + case IPPROTO_IPIP: return xfrm4_remove_tunnel_encap(x, skb); - if (inner_mode->family == AF_INET6) + case IPPROTO_IPV6: return xfrm6_remove_tunnel_encap(x, skb); break; + } + return -EINVAL; } WARN_ON_ONCE(1); @@ -356,9 +363,7 @@ xfrm_inner_mode_encap_remove(struct xfrm_state *x, static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) { - const struct xfrm_mode *inner_mode = &x->inner_mode; - - switch (x->outer_mode.family) { + switch (x->props.family) { case AF_INET: xfrm4_extract_header(skb); break; @@ -370,25 +375,7 @@ static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) return -EAFNOSUPPORT; } - if (x->sel.family == AF_UNSPEC) { - inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); - if (!inner_mode) - return -EAFNOSUPPORT; - } - - switch (inner_mode->family) { - case AF_INET: - skb->protocol = htons(ETH_P_IP); - break; - case AF_INET6: - skb->protocol = htons(ETH_P_IPV6); - break; - default: - WARN_ON_ONCE(1); - break; - } - - return xfrm_inner_mode_encap_remove(x, inner_mode, skb); + return xfrm_inner_mode_encap_remove(x, skb); } /* Remove encapsulation header. @@ -434,17 +421,16 @@ static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) } static int xfrm_inner_mode_input(struct xfrm_state *x, - const struct xfrm_mode *inner_mode, struct sk_buff *skb) { - switch (inner_mode->encap) { + switch (x->props.mode) { case XFRM_MODE_BEET: case XFRM_MODE_TUNNEL: return xfrm_prepare_input(x, skb); case XFRM_MODE_TRANSPORT: - if (inner_mode->family == AF_INET) + if (x->props.family == AF_INET) return xfrm4_transport_input(x, skb); - if (inner_mode->family == AF_INET6) + if (x->props.family == AF_INET6) return xfrm6_transport_input(x, skb); break; case XFRM_MODE_ROUTEOPTIMIZATION: @@ -462,7 +448,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) { const struct xfrm_state_afinfo *afinfo; struct net *net = dev_net(skb->dev); - const struct xfrm_mode *inner_mode; int err; __be32 seq; __be32 seq_hi; @@ -492,7 +477,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto drop; } - family = x->outer_mode.family; + family = x->props.family; /* An encap_type of -1 indicates async resumption. */ if (encap_type == -1) { @@ -676,17 +661,7 @@ resume: XFRM_MODE_SKB_CB(skb)->protocol = nexthdr; - inner_mode = &x->inner_mode; - - if (x->sel.family == AF_UNSPEC) { - inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); - if (inner_mode == NULL) { - XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); - goto drop; - } - } - - if (xfrm_inner_mode_input(x, inner_mode, skb)) { + if (xfrm_inner_mode_input(x, skb)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); goto drop; } @@ -701,7 +676,7 @@ resume: * transport mode so the outer address is identical. */ daddr = &x->id.daddr; - family = x->outer_mode.family; + family = x->props.family; err = xfrm_parse_spi(skb, nexthdr, &spi, &seq); if (err < 0) { @@ -732,7 +707,7 @@ resume: err = -EAFNOSUPPORT; rcu_read_lock(); - afinfo = xfrm_state_afinfo_get_rcu(x->inner_mode.family); + afinfo = xfrm_state_afinfo_get_rcu(x->props.family); if (likely(afinfo)) err = afinfo->transport_finish(skb, xfrm_gro || async); rcu_read_unlock(); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 9a5e79a38..07a7ee43b 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -414,7 +414,7 @@ static int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; skb->protocol = htons(ETH_P_IP); - switch (x->outer_mode.encap) { + switch (x->props.mode) { case XFRM_MODE_BEET: return xfrm4_beet_encap_add(x, skb); case XFRM_MODE_TUNNEL: @@ -437,7 +437,7 @@ static int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) skb->ignore_df = 1; skb->protocol = htons(ETH_P_IPV6); - switch (x->outer_mode.encap) { + switch (x->props.mode) { case XFRM_MODE_BEET: return xfrm6_beet_encap_add(x, skb); case XFRM_MODE_TUNNEL: @@ -453,22 +453,22 @@ static int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb) { - switch (x->outer_mode.encap) { + switch (x->props.mode) { case XFRM_MODE_BEET: case XFRM_MODE_TUNNEL: - if (x->outer_mode.family == AF_INET) + if (x->props.family == AF_INET) return xfrm4_prepare_output(x, skb); - if (x->outer_mode.family == AF_INET6) + if (x->props.family == AF_INET6) return xfrm6_prepare_output(x, skb); break; case XFRM_MODE_TRANSPORT: - if (x->outer_mode.family == AF_INET) + if (x->props.family == AF_INET) return xfrm4_transport_output(x, skb); - if (x->outer_mode.family == AF_INET6) + if (x->props.family == AF_INET6) return xfrm6_transport_output(x, skb); break; case XFRM_MODE_ROUTEOPTIMIZATION: - if (x->outer_mode.family == AF_INET6) + if (x->props.family == AF_INET6) return xfrm6_ro_output(x, skb); WARN_ON_ONCE(1); break; @@ -866,21 +866,10 @@ static int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) { - const struct xfrm_mode *inner_mode; - - if (x->sel.family == AF_UNSPEC) - inner_mode = xfrm_ip2inner_mode(x, - xfrm_af2proto(skb_dst(skb)->ops->family)); - else - inner_mode = &x->inner_mode; - - if (inner_mode == NULL) - return -EAFNOSUPPORT; - - switch (inner_mode->family) { - case AF_INET: + switch (skb->protocol) { + case htons(ETH_P_IP): return xfrm4_extract_output(x, skb); - case AF_INET6: + case htons(ETH_P_IPV6): return xfrm6_extract_output(x, skb); } diff --git a/samples/bpf/asm_goto_workaround.h b/samples/bpf/asm_goto_workaround.h index 7048bb359..634e81d83 100644 --- a/samples/bpf/asm_goto_workaround.h +++ b/samples/bpf/asm_goto_workaround.h @@ -4,14 +4,14 @@ #define __ASM_GOTO_WORKAROUND_H /* - * This will bring in asm_volatile_goto and asm_inline macro definitions + * This will bring in asm_goto_output and asm_inline macro definitions * if enabled by compiler and config options. */ #include <linux/types.h> -#ifdef asm_volatile_goto -#undef asm_volatile_goto -#define asm_volatile_goto(x...) asm volatile("invalid use of asm_volatile_goto") +#ifdef asm_goto_output +#undef asm_goto_output +#define asm_goto_output(x...) asm volatile("invalid use of asm_goto_output") #endif /* diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index e41dee64d..39aea753d 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -44,6 +44,7 @@ modpost-args = \ $(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E) \ $(if $(KBUILD_NSDEPS),-d $(MODULES_NSDEPS)) \ $(if $(CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS)$(KBUILD_NSDEPS),-N) \ + $(if $(findstring 1, $(KBUILD_EXTRA_WARN)),-W) \ -o $@ # 'make -i -k' ignores compile errors, and builds as many modules as possible. diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index d5c389df6..4de98b7bb 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -495,7 +495,7 @@ eBPF programs can have an associated license, passed along with the bytecode instructions to the kernel when the programs are loaded. The format for that string is identical to the one in use for kernel modules (Dual licenses, such as "Dual BSD/GPL", may be used). Some helper functions are only accessible to -programs that are compatible with the GNU Privacy License (GPL). +programs that are compatible with the GNU General Public License (GNU GPL). In order to use such helpers, the eBPF program must be loaded with the correct license string passed (via **attr**) to the **bpf**\ () system call, and this diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 1e5e66ae5..ecf4250b0 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -4971,7 +4971,7 @@ sub process { if|for|while|switch|return|case| volatile|__volatile__| __attribute__|format|__extension__| - asm|__asm__)$/x) + asm|__asm__|scoped_guard)$/x) { # cpp #define statements have non-optional spaces, ie # if there is a space between the name and the open diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 32e573943..458b2948b 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -135,8 +135,13 @@ gen_btf() ${OBJCOPY} --only-section=.BTF --set-section-flags .BTF=alloc,readonly \ --strip-all ${1} ${2} 2>/dev/null # Change e_type to ET_REL so that it can be used to link final vmlinux. - # Unlike GNU ld, lld does not allow an ET_EXEC input. - printf '\1' | dd of=${2} conv=notrunc bs=1 seek=16 status=none + # GNU ld 2.35+ and lld do not allow an ET_EXEC input. + if is_enabled CONFIG_CPU_BIG_ENDIAN; then + et_rel='\0\1' + else + et_rel='\1\0' + fi + printf "${et_rel}" | dd of=${2} conv=notrunc bs=1 seek=16 status=none } # Create ${2} .S file with all symbols from the ${1} object file diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index e6be7fc26..686eed37f 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -41,6 +41,8 @@ static bool allow_missing_ns_imports; static bool error_occurred; +static bool extra_warn; + /* * Cut off the warnings when there are too many. This typically occurs when * vmlinux is missing. ('make modules' without building vmlinux.) @@ -809,7 +811,7 @@ static void check_section(const char *modname, struct elf_info *elf, #define ALL_INIT_TEXT_SECTIONS \ ".init.text", ".meminit.text" #define ALL_EXIT_TEXT_SECTIONS \ - ".exit.text", ".memexit.text" + ".exit.text" #define ALL_PCI_INIT_SECTIONS \ ".pci_fixup_early", ".pci_fixup_header", ".pci_fixup_final", \ @@ -817,23 +819,22 @@ static void check_section(const char *modname, struct elf_info *elf, ".pci_fixup_resume_early", ".pci_fixup_suspend" #define ALL_XXXINIT_SECTIONS MEM_INIT_SECTIONS -#define ALL_XXXEXIT_SECTIONS MEM_EXIT_SECTIONS #define ALL_INIT_SECTIONS INIT_SECTIONS, ALL_XXXINIT_SECTIONS -#define ALL_EXIT_SECTIONS EXIT_SECTIONS, ALL_XXXEXIT_SECTIONS +#define ALL_EXIT_SECTIONS EXIT_SECTIONS #define DATA_SECTIONS ".data", ".data.rel" -#define TEXT_SECTIONS ".text", ".text.unlikely", ".sched.text", \ - ".kprobes.text", ".cpuidle.text", ".noinstr.text" +#define TEXT_SECTIONS ".text", ".text.*", ".sched.text", \ + ".kprobes.text", ".cpuidle.text", ".noinstr.text", \ + ".ltext", ".ltext.*" #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \ - ".fixup", ".entry.text", ".exception.text", ".text.*", \ + ".fixup", ".entry.text", ".exception.text", \ ".coldtext", ".softirqentry.text" #define INIT_SECTIONS ".init.*" #define MEM_INIT_SECTIONS ".meminit.*" #define EXIT_SECTIONS ".exit.*" -#define MEM_EXIT_SECTIONS ".memexit.*" #define ALL_TEXT_SECTIONS ALL_INIT_TEXT_SECTIONS, ALL_EXIT_TEXT_SECTIONS, \ TEXT_SECTIONS, OTHER_TEXT_SECTIONS @@ -862,7 +863,6 @@ enum mismatch { TEXT_TO_ANY_EXIT, DATA_TO_ANY_EXIT, XXXINIT_TO_SOME_INIT, - XXXEXIT_TO_SOME_EXIT, ANY_INIT_TO_ANY_EXIT, ANY_EXIT_TO_ANY_INIT, EXPORT_TO_INIT_EXIT, @@ -937,12 +937,6 @@ static const struct sectioncheck sectioncheck[] = { .bad_tosec = { INIT_SECTIONS, NULL }, .mismatch = XXXINIT_TO_SOME_INIT, }, -/* Do not reference exit code/data from memexit code/data */ -{ - .fromsec = { ALL_XXXEXIT_SECTIONS, NULL }, - .bad_tosec = { EXIT_SECTIONS, NULL }, - .mismatch = XXXEXIT_TO_SOME_EXIT, -}, /* Do not use exit code/data from init code */ { .fromsec = { ALL_INIT_SECTIONS, NULL }, @@ -1085,9 +1079,20 @@ static int secref_whitelist(const struct sectioncheck *mismatch, "*_console"))) return 0; - /* symbols in data sections that may refer to meminit/exit sections */ + /* symbols in data sections that may refer to meminit sections */ if (match(fromsec, PATTERNS(DATA_SECTIONS)) && - match(tosec, PATTERNS(ALL_XXXINIT_SECTIONS, ALL_EXIT_SECTIONS)) && + match(tosec, PATTERNS(ALL_XXXINIT_SECTIONS)) && + match(fromsym, PATTERNS("*driver"))) + return 0; + + /* + * symbols in data sections must not refer to .exit.*, but there are + * quite a few offenders, so hide these unless for W=1 builds until + * these are fixed. + */ + if (!extra_warn && + match(fromsec, PATTERNS(DATA_SECTIONS)) && + match(tosec, PATTERNS(EXIT_SECTIONS)) && match(fromsym, PATTERNS("*driver"))) return 0; @@ -1254,7 +1259,6 @@ static void report_sec_mismatch(const char *modname, case TEXT_TO_ANY_EXIT: case DATA_TO_ANY_EXIT: case XXXINIT_TO_SOME_INIT: - case XXXEXIT_TO_SOME_EXIT: case ANY_INIT_TO_ANY_EXIT: case ANY_EXIT_TO_ANY_INIT: warn("%s: section mismatch in reference: %s (section: %s) -> %s (section: %s)\n", @@ -2290,7 +2294,7 @@ int main(int argc, char **argv) LIST_HEAD(dump_lists); struct dump_list *dl, *dl2; - while ((opt = getopt(argc, argv, "ei:mnT:o:awENd:")) != -1) { + while ((opt = getopt(argc, argv, "ei:mnT:o:aWwENd:")) != -1) { switch (opt) { case 'e': external_module = true; @@ -2315,6 +2319,9 @@ int main(int argc, char **argv) case 'T': files_source = optarg; break; + case 'W': + extra_warn = true; + break; case 'w': warn_unresolved = true; break; diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c index 6bf9caca0..a72e6cf61 100644 --- a/scripts/mod/sumversion.c +++ b/scripts/mod/sumversion.c @@ -326,7 +326,12 @@ static int parse_source_files(const char *objfile, struct md4_ctx *md) /* Sum all files in the same dir or subdirs. */ while ((line = get_line(&pos))) { - char* p = line; + char* p; + + /* trim the leading spaces away */ + while (isspace(*line)) + line++; + p = line; if (strncmp(line, "source_", sizeof("source_")-1) == 0) { p = strrchr(line, ' '); diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h index f42359f58..d468c8b90 100644 --- a/security/apparmor/include/lib.h +++ b/security/apparmor/include/lib.h @@ -226,7 +226,7 @@ void aa_policy_destroy(struct aa_policy *policy); */ #define fn_label_build(L, P, GFP, FN) \ ({ \ - __label__ __cleanup, __done; \ + __label__ __do_cleanup, __done; \ struct aa_label *__new_; \ \ if ((L)->size > 1) { \ @@ -244,7 +244,7 @@ void aa_policy_destroy(struct aa_policy *policy); __new_ = (FN); \ AA_BUG(!__new_); \ if (IS_ERR(__new_)) \ - goto __cleanup; \ + goto __do_cleanup; \ __lvec[__j++] = __new_; \ } \ for (__j = __count = 0; __j < (L)->size; __j++) \ @@ -266,7 +266,7 @@ void aa_policy_destroy(struct aa_policy *policy); vec_cleanup(profile, __pvec, __count); \ } else \ __new_ = NULL; \ -__cleanup: \ +__do_cleanup: \ vec_cleanup(label, __lvec, (L)->size); \ } else { \ (P) = labels_profile(L); \ diff --git a/security/landlock/fs.c b/security/landlock/fs.c index 64ed76654..d328965f3 100644 --- a/security/landlock/fs.c +++ b/security/landlock/fs.c @@ -824,8 +824,8 @@ static int current_check_refer_path(struct dentry *const old_dentry, bool allow_parent1, allow_parent2; access_mask_t access_request_parent1, access_request_parent2; struct path mnt_dir; - layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS], - layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS]; + layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS] = {}, + layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS] = {}; if (!dom) return 0; diff --git a/security/security.c b/security/security.c index fc15b963e..1b504c296 100644 --- a/security/security.c +++ b/security/security.c @@ -2186,7 +2186,19 @@ EXPORT_SYMBOL(security_inode_setsecctx); int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen) { - return call_int_hook(inode_getsecctx, -EOPNOTSUPP, inode, ctx, ctxlen); + struct security_hook_list *hp; + int rc; + + /* + * Only one module will provide a security context. + */ + hlist_for_each_entry(hp, &security_hook_heads.inode_getsecctx, list) { + rc = hp->hook.inode_getsecctx(inode, ctx, ctxlen); + if (rc != LSM_RET_DEFAULT(inode_getsecctx)) + return rc; + } + + return LSM_RET_DEFAULT(inode_getsecctx); } EXPORT_SYMBOL(security_inode_getsecctx); diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c index f4cd9b58b..a7af08555 100644 --- a/security/tomoyo/common.c +++ b/security/tomoyo/common.c @@ -2648,13 +2648,14 @@ ssize_t tomoyo_write_control(struct tomoyo_io_buffer *head, { int error = buffer_len; size_t avail_len = buffer_len; - char *cp0 = head->write_buf; + char *cp0; int idx; if (!head->write) return -EINVAL; if (mutex_lock_interruptible(&head->io_sem)) return -EINTR; + cp0 = head->write_buf; head->read_user_buf_avail = 0; idx = tomoyo_read_lock(); /* Read a line and dispatch it to the policy handler. */ diff --git a/sound/core/Makefile b/sound/core/Makefile index 2762f03d9..a7a1590b2 100644 --- a/sound/core/Makefile +++ b/sound/core/Makefile @@ -30,7 +30,6 @@ snd-ctl-led-objs := control_led.o snd-rawmidi-objs := rawmidi.o snd-timer-objs := timer.o snd-hrtimer-objs := hrtimer.o -snd-rtctimer-objs := rtctimer.o snd-hwdep-objs := hwdep.o snd-seq-device-objs := seq_device.o diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c index 9be2260e4..f8b644cb9 100644 --- a/sound/firewire/amdtp-stream.c +++ b/sound/firewire/amdtp-stream.c @@ -934,7 +934,7 @@ static int generate_device_pkt_descs(struct amdtp_stream *s, // to the reason. unsigned int safe_cycle = increment_ohci_cycle_count(next_cycle, IR_JUMBO_PAYLOAD_MAX_SKIP_CYCLES); - lost = (compare_ohci_cycle_count(safe_cycle, cycle) > 0); + lost = (compare_ohci_cycle_count(safe_cycle, cycle) < 0); } if (lost) { dev_err(&s->unit->device, "Detect discontinuity of cycle: %d %d\n", diff --git a/sound/hda/hdac_stream.c b/sound/hda/hdac_stream.c index 741a5d17a..ecd3aec57 100644 --- a/sound/hda/hdac_stream.c +++ b/sound/hda/hdac_stream.c @@ -629,17 +629,15 @@ void snd_hdac_stream_timecounter_init(struct hdac_stream *azx_dev, struct hdac_stream *s; bool inited = false; u64 cycle_last = 0; - int i = 0; list_for_each_entry(s, &bus->stream_list, list) { - if (streams & (1 << i)) { + if ((streams & (1 << s->index))) { azx_timecounter_init(s, inited, cycle_last); if (!inited) { inited = true; cycle_last = s->tc.cycle_last; } } - i++; } snd_pcm_gettime(runtime, &runtime->trigger_tstamp); @@ -684,14 +682,13 @@ void snd_hdac_stream_sync(struct hdac_stream *azx_dev, bool start, unsigned int streams) { struct hdac_bus *bus = azx_dev->bus; - int i, nwait, timeout; + int nwait, timeout; struct hdac_stream *s; for (timeout = 5000; timeout; timeout--) { nwait = 0; - i = 0; list_for_each_entry(s, &bus->stream_list, list) { - if (!(streams & (1 << i++))) + if (!(streams & (1 << s->index))) continue; if (start) { diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 5aaf3dcec..a26f2a2d4 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2549,6 +2549,8 @@ static const struct pci_device_id azx_ids[] = { /* Lunarlake-P */ { PCI_DEVICE(0x8086, 0xa828), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, + /* Arrow Lake */ + { PCI_DEVICE_DATA(INTEL, HDA_ARL, AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE) }, /* Broxton-P(Apollolake) */ { PCI_DEVICE(0x8086, 0x5a98), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_BROXTON }, diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index a889cccdd..e8209178d 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -21,6 +21,12 @@ #include "hda_jack.h" #include "hda_generic.h" +enum { + CX_HEADSET_NOPRESENT = 0, + CX_HEADSET_PARTPRESENT, + CX_HEADSET_ALLPRESENT, +}; + struct conexant_spec { struct hda_gen_spec gen; @@ -42,7 +48,8 @@ struct conexant_spec { unsigned int gpio_led; unsigned int gpio_mute_led_mask; unsigned int gpio_mic_led_mask; - + unsigned int headset_present_flag; + bool is_cx8070_sn6140; }; @@ -164,6 +171,27 @@ static void cxt_init_gpio_led(struct hda_codec *codec) } } +static void cx_fixup_headset_recog(struct hda_codec *codec) +{ + unsigned int mic_persent; + + /* fix some headset type recognize fail issue, such as EDIFIER headset */ + /* set micbiasd output current comparator threshold from 66% to 55%. */ + snd_hda_codec_write(codec, 0x1c, 0, 0x320, 0x010); + /* set OFF voltage for DFET from -1.2V to -0.8V, set headset micbias registor + * value adjustment trim from 2.2K ohms to 2.0K ohms. + */ + snd_hda_codec_write(codec, 0x1c, 0, 0x3b0, 0xe10); + /* fix reboot headset type recognize fail issue */ + mic_persent = snd_hda_codec_read(codec, 0x19, 0, AC_VERB_GET_PIN_SENSE, 0x0); + if (mic_persent & AC_PINSENSE_PRESENCE) + /* enable headset mic VREF */ + snd_hda_codec_write(codec, 0x19, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24); + else + /* disable headset mic VREF */ + snd_hda_codec_write(codec, 0x19, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20); +} + static int cx_auto_init(struct hda_codec *codec) { struct conexant_spec *spec = codec->spec; @@ -174,6 +202,9 @@ static int cx_auto_init(struct hda_codec *codec) cxt_init_gpio_led(codec); snd_hda_apply_fixup(codec, HDA_FIXUP_ACT_INIT); + if (spec->is_cx8070_sn6140) + cx_fixup_headset_recog(codec); + return 0; } @@ -192,6 +223,77 @@ static void cx_auto_free(struct hda_codec *codec) snd_hda_gen_free(codec); } +static void cx_process_headset_plugin(struct hda_codec *codec) +{ + unsigned int val; + unsigned int count = 0; + + /* Wait headset detect done. */ + do { + val = snd_hda_codec_read(codec, 0x1c, 0, 0xca0, 0x0); + if (val & 0x080) { + codec_dbg(codec, "headset type detect done!\n"); + break; + } + msleep(20); + count++; + } while (count < 3); + val = snd_hda_codec_read(codec, 0x1c, 0, 0xcb0, 0x0); + if (val & 0x800) { + codec_dbg(codec, "headset plugin, type is CTIA\n"); + snd_hda_codec_write(codec, 0x19, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24); + } else if (val & 0x400) { + codec_dbg(codec, "headset plugin, type is OMTP\n"); + snd_hda_codec_write(codec, 0x19, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24); + } else { + codec_dbg(codec, "headphone plugin\n"); + } +} + +static void cx_update_headset_mic_vref(struct hda_codec *codec, unsigned int res) +{ + unsigned int phone_present, mic_persent, phone_tag, mic_tag; + struct conexant_spec *spec = codec->spec; + + /* In cx8070 and sn6140, the node 16 can only be config to headphone or disabled, + * the node 19 can only be config to microphone or disabled. + * Check hp&mic tag to process headset pulgin&plugout. + */ + phone_tag = snd_hda_codec_read(codec, 0x16, 0, AC_VERB_GET_UNSOLICITED_RESPONSE, 0x0); + mic_tag = snd_hda_codec_read(codec, 0x19, 0, AC_VERB_GET_UNSOLICITED_RESPONSE, 0x0); + if ((phone_tag & (res >> AC_UNSOL_RES_TAG_SHIFT)) || + (mic_tag & (res >> AC_UNSOL_RES_TAG_SHIFT))) { + phone_present = snd_hda_codec_read(codec, 0x16, 0, AC_VERB_GET_PIN_SENSE, 0x0); + if (!(phone_present & AC_PINSENSE_PRESENCE)) {/* headphone plugout */ + spec->headset_present_flag = CX_HEADSET_NOPRESENT; + snd_hda_codec_write(codec, 0x19, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20); + return; + } + if (spec->headset_present_flag == CX_HEADSET_NOPRESENT) { + spec->headset_present_flag = CX_HEADSET_PARTPRESENT; + } else if (spec->headset_present_flag == CX_HEADSET_PARTPRESENT) { + mic_persent = snd_hda_codec_read(codec, 0x19, 0, + AC_VERB_GET_PIN_SENSE, 0x0); + /* headset is present */ + if ((phone_present & AC_PINSENSE_PRESENCE) && + (mic_persent & AC_PINSENSE_PRESENCE)) { + cx_process_headset_plugin(codec); + spec->headset_present_flag = CX_HEADSET_ALLPRESENT; + } + } + } +} + +static void cx_jack_unsol_event(struct hda_codec *codec, unsigned int res) +{ + struct conexant_spec *spec = codec->spec; + + if (spec->is_cx8070_sn6140) + cx_update_headset_mic_vref(codec, res); + + snd_hda_jack_unsol_event(codec, res); +} + #ifdef CONFIG_PM static int cx_auto_suspend(struct hda_codec *codec) { @@ -205,7 +307,7 @@ static const struct hda_codec_ops cx_auto_patch_ops = { .build_pcms = snd_hda_gen_build_pcms, .init = cx_auto_init, .free = cx_auto_free, - .unsol_event = snd_hda_jack_unsol_event, + .unsol_event = cx_jack_unsol_event, #ifdef CONFIG_PM .suspend = cx_auto_suspend, .check_power_status = snd_hda_gen_check_power_status, @@ -242,6 +344,7 @@ enum { CXT_FIXUP_HP_ZBOOK_MUTE_LED, CXT_FIXUP_HEADSET_MIC, CXT_FIXUP_HP_MIC_NO_PRESENCE, + CXT_PINCFG_SWS_JS201D, }; /* for hda_fixup_thinkpad_acpi() */ @@ -739,6 +842,17 @@ static const struct hda_pintbl cxt_pincfg_lemote[] = { {} }; +/* SuoWoSi/South-holding JS201D with sn6140 */ +static const struct hda_pintbl cxt_pincfg_sws_js201d[] = { + { 0x16, 0x03211040 }, /* hp out */ + { 0x17, 0x91170110 }, /* SPK/Class_D */ + { 0x18, 0x95a70130 }, /* Internal mic */ + { 0x19, 0x03a11020 }, /* Headset Mic */ + { 0x1a, 0x40f001f0 }, /* Not used */ + { 0x21, 0x40f001f0 }, /* Not used */ + {} +}; + static const struct hda_fixup cxt_fixups[] = { [CXT_PINCFG_LENOVO_X200] = { .type = HDA_FIXUP_PINS, @@ -894,6 +1008,10 @@ static const struct hda_fixup cxt_fixups[] = { .chained = true, .chain_id = CXT_FIXUP_HEADSET_MIC, }, + [CXT_PINCFG_SWS_JS201D] = { + .type = HDA_FIXUP_PINS, + .v.pins = cxt_pincfg_sws_js201d, + }, }; static const struct snd_pci_quirk cxt5045_fixups[] = { @@ -967,6 +1085,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x8457, "HP Z2 G4 mini", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x8458, "HP Z2 G4 mini premium", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN), + SND_PCI_QUIRK(0x14f1, 0x0265, "SWS JS201D", CXT_PINCFG_SWS_JS201D), SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO), SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x215e, "Lenovo T410", CXT_PINCFG_LENOVO_TP410), @@ -1007,6 +1126,7 @@ static const struct hda_model_fixup cxt5066_fixup_models[] = { { .id = CXT_FIXUP_HP_ZBOOK_MUTE_LED, .name = "hp-zbook-mute-led" }, { .id = CXT_FIXUP_HP_MIC_NO_PRESENCE, .name = "hp-mic-fix" }, { .id = CXT_PINCFG_LENOVO_NOTEBOOK, .name = "lenovo-20149" }, + { .id = CXT_PINCFG_SWS_JS201D, .name = "sws-js201d" }, {} }; @@ -1042,6 +1162,15 @@ static int patch_conexant_auto(struct hda_codec *codec) codec->spec = spec; codec->patch_ops = cx_auto_patch_ops; + /* init cx8070/sn6140 flag and reset headset_present_flag */ + switch (codec->core.vendor_id) { + case 0x14f11f86: + case 0x14f11f87: + spec->is_cx8070_sn6140 = true; + spec->headset_present_flag = CX_HEADSET_NOPRESENT; + break; + } + cx_auto_parse_eapd(codec); spec->gen.own_eapd_ctl = 1; diff --git a/sound/pci/hda/patch_cs8409.c b/sound/pci/hda/patch_cs8409.c index 627899959..e41316e2e 100644 --- a/sound/pci/hda/patch_cs8409.c +++ b/sound/pci/hda/patch_cs8409.c @@ -1371,6 +1371,7 @@ void dolphin_fixups(struct hda_codec *codec, const struct hda_fixup *fix, int ac spec->scodecs[CS8409_CODEC1] = &dolphin_cs42l42_1; spec->scodecs[CS8409_CODEC1]->codec = codec; spec->num_scodecs = 2; + spec->gen.suppress_vmaster = 1; codec->patch_ops = cs8409_dolphin_patch_ops; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 62f213704..75bd7b2fa 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9377,7 +9377,7 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = cs35l41_fixup_i2c_two, .chained = true, - .chain_id = ALC269_FIXUP_THINKPAD_ACPI, + .chain_id = ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK, }, [ALC245_FIXUP_HP_MUTE_LED_COEFBIT] = { .type = HDA_FIXUP_FUNC, @@ -9392,6 +9392,8 @@ static const struct hda_fixup alc269_fixups[] = { [ALC287_FIXUP_THINKPAD_I2S_SPK] = { .type = HDA_FIXUP_FUNC, .v.func = alc287_fixup_bind_dacs, + .chained = true, + .chain_id = ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK, }, [ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD] = { .type = HDA_FIXUP_FUNC, @@ -9431,6 +9433,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x1247, "Acer vCopperbox", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS), SND_PCI_QUIRK(0x1025, 0x1248, "Acer Veriton N4660G", ALC269VC_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1269, "Acer SWIFT SF314-54", ALC256_FIXUP_ACER_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x126a, "Acer Swift SF114-32", ALC256_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x128f, "Acer Veriton Z6860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), @@ -9617,6 +9620,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8786, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8787, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8788, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), + SND_PCI_QUIRK(0x103c, 0x87b7, "HP Laptop 14-fq0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87e5, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87e7, "HP ProBook 450 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), @@ -9658,6 +9662,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8973, "HP EliteBook 860 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8974, "HP EliteBook 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8975, "HP EliteBook x360 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x897d, "HP mt440 Mobile Thin Client U74", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8981, "HP Elite Dragonfly G3", ALC245_FIXUP_CS35L41_SPI_4), SND_PCI_QUIRK(0x103c, 0x898e, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x898f, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2), @@ -9683,16 +9688,20 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8aa3, "HP ProBook 450 G9 (MB 8AA1)", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8aa8, "HP EliteBook 640 G9 (MB 8AA6)", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8aab, "HP EliteBook 650 G9 (MB 8AA9)", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8ab9, "HP EliteBook 840 G8 (MB 8AB8)", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8abb, "HP ZBook Firefly 14 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ad1, "HP EliteBook 840 14 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b0f, "HP Elite mt645 G7 Mobile Thin Client U81", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b2f, "HP 255 15.6 inch G10 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), + SND_PCI_QUIRK(0x103c, 0x8b3f, "HP mt440 Mobile Thin Client U91", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b42, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b43, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b44, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b45, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b46, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b47, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b59, "HP Elite mt645 G7 Mobile Thin Client U89", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b5d, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b5e, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b63, "HP Elite Dragonfly 13.5 inch G4", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), @@ -9722,6 +9731,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8c72, "HP EliteBook 865 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c96, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8c97, "HP ZBook", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8ca1, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8ca2, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ca4, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), @@ -10049,6 +10060,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x2782, 0x0232, "CHUWI CoreBook XPro", ALC269VB_FIXUP_CHUWI_COREBOOK_XPRO), + SND_PCI_QUIRK(0x2782, 0x1707, "Vaio VJFE-ADL", ALC298_FIXUP_SPK_VOLUME), SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC), SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED), SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10), diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 808d00282..28da4e185 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -245,6 +245,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { .driver_data = &acp6x_card, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83AS"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "82UG"), } }, @@ -300,6 +307,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { { .driver_data = &acp6x_card, .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Micro-Star International Co., Ltd."), + DMI_MATCH(DMI_PRODUCT_NAME, "Bravo 15 C7VF"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "Razer"), DMI_MATCH(DMI_PRODUCT_NAME, "Blade 14 (2022) - RZ09-0427"), } diff --git a/sound/soc/codecs/lpass-wsa-macro.c b/sound/soc/codecs/lpass-wsa-macro.c index 2ccc68513..bb9de5767 100644 --- a/sound/soc/codecs/lpass-wsa-macro.c +++ b/sound/soc/codecs/lpass-wsa-macro.c @@ -1581,7 +1581,6 @@ static int wsa_macro_enable_interpolator(struct snd_soc_dapm_widget *w, u16 gain_reg; u16 reg; int val; - int offset_val = 0; struct wsa_macro *wsa = snd_soc_component_get_drvdata(component); if (w->shift == WSA_MACRO_COMP1) { @@ -1620,10 +1619,8 @@ static int wsa_macro_enable_interpolator(struct snd_soc_dapm_widget *w, CDC_WSA_RX1_RX_PATH_MIX_SEC0, CDC_WSA_RX_PGA_HALF_DB_MASK, CDC_WSA_RX_PGA_HALF_DB_ENABLE); - offset_val = -2; } val = snd_soc_component_read(component, gain_reg); - val += offset_val; snd_soc_component_write(component, gain_reg, val); wsa_macro_config_ear_spkr_gain(component, wsa, event, gain_reg); @@ -1651,10 +1648,6 @@ static int wsa_macro_enable_interpolator(struct snd_soc_dapm_widget *w, CDC_WSA_RX1_RX_PATH_MIX_SEC0, CDC_WSA_RX_PGA_HALF_DB_MASK, CDC_WSA_RX_PGA_HALF_DB_DISABLE); - offset_val = 2; - val = snd_soc_component_read(component, gain_reg); - val += offset_val; - snd_soc_component_write(component, gain_reg, val); } wsa_macro_config_ear_spkr_gain(component, wsa, event, gain_reg); diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index fd3dca084..844d14d4c 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3288,6 +3288,7 @@ static void rt5645_jack_detect_work(struct work_struct *work) report, SND_JACK_HEADPHONE); snd_soc_jack_report(rt5645->mic_jack, report, SND_JACK_MICROPHONE); + mutex_unlock(&rt5645->jd_mutex); return; case 4: val = snd_soc_component_read(rt5645->component, RT5645_A_JD_CTRL1) & 0x0020; diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index a2abd1a11..555b74e71 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -210,7 +210,7 @@ struct wcd938x_priv { }; static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(ear_pa_gain, 600, -1800); -static const DECLARE_TLV_DB_SCALE(line_gain, -3000, 150, -3000); +static const DECLARE_TLV_DB_SCALE(line_gain, -3000, 150, 0); static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(analog_gain, 0, 3000); struct wcd938x_mbhc_zdet_param { @@ -3588,7 +3588,7 @@ static int wcd938x_probe(struct platform_device *pdev) ret = wcd938x_populate_dt_data(wcd938x, dev); if (ret) { dev_err(dev, "%s: Fail to obtain platform data\n", __func__); - return -EINVAL; + return ret; } ret = wcd938x_add_slave_components(wcd938x, dev, &match); diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 2cfca78f0..47a4c3632 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -740,19 +740,25 @@ static int wm_adsp_request_firmware_file(struct wm_adsp *dsp, const char *filetype) { struct cs_dsp *cs_dsp = &dsp->cs_dsp; + const char *fwf; char *s, c; int ret = 0; + if (dsp->fwf_name) + fwf = dsp->fwf_name; + else + fwf = dsp->cs_dsp.name; + if (system_name && asoc_component_prefix) *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s-%s-%s.%s", dir, dsp->part, - dsp->fwf_name, wm_adsp_fw[dsp->fw].file, system_name, + fwf, wm_adsp_fw[dsp->fw].file, system_name, asoc_component_prefix, filetype); else if (system_name) *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s-%s.%s", dir, dsp->part, - dsp->fwf_name, wm_adsp_fw[dsp->fw].file, system_name, + fwf, wm_adsp_fw[dsp->fw].file, system_name, filetype); else - *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s.%s", dir, dsp->part, dsp->fwf_name, + *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s.%s", dir, dsp->part, fwf, wm_adsp_fw[dsp->fw].file, filetype); if (*filename == NULL) @@ -842,29 +848,18 @@ static int wm_adsp_request_firmware_files(struct wm_adsp *dsp, } adsp_err(dsp, "Failed to request firmware <%s>%s-%s-%s<-%s<%s>>.wmfw\n", - cirrus_dir, dsp->part, dsp->fwf_name, wm_adsp_fw[dsp->fw].file, - system_name, asoc_component_prefix); + cirrus_dir, dsp->part, + dsp->fwf_name ? dsp->fwf_name : dsp->cs_dsp.name, + wm_adsp_fw[dsp->fw].file, system_name, asoc_component_prefix); return -ENOENT; } static int wm_adsp_common_init(struct wm_adsp *dsp) { - char *p; - INIT_LIST_HEAD(&dsp->compr_list); INIT_LIST_HEAD(&dsp->buffer_list); - if (!dsp->fwf_name) { - p = devm_kstrdup(dsp->cs_dsp.dev, dsp->cs_dsp.name, GFP_KERNEL); - if (!p) - return -ENOMEM; - - dsp->fwf_name = p; - for (; *p != 0; ++p) - *p = tolower(*p); - } - return 0; } diff --git a/sound/soc/codecs/wsa883x.c b/sound/soc/codecs/wsa883x.c index b152f4e5c..cd96c35a1 100644 --- a/sound/soc/codecs/wsa883x.c +++ b/sound/soc/codecs/wsa883x.c @@ -1102,7 +1102,11 @@ static int wsa_dev_mode_put(struct snd_kcontrol *kcontrol, return 1; } -static const DECLARE_TLV_DB_SCALE(pa_gain, -300, 150, -300); +static const SNDRV_CTL_TLVD_DECLARE_DB_RANGE(pa_gain, + 0, 14, TLV_DB_SCALE_ITEM(-300, 0, 0), + 15, 29, TLV_DB_SCALE_ITEM(-300, 150, 0), + 30, 31, TLV_DB_SCALE_ITEM(1800, 0, 0), +); static int wsa883x_get_swr_port(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) diff --git a/sound/soc/sunxi/sun4i-spdif.c b/sound/soc/sunxi/sun4i-spdif.c index bcceebca9..484b0e7c2 100644 --- a/sound/soc/sunxi/sun4i-spdif.c +++ b/sound/soc/sunxi/sun4i-spdif.c @@ -578,6 +578,11 @@ static const struct of_device_id sun4i_spdif_of_match[] = { .compatible = "allwinner,sun50i-h6-spdif", .data = &sun50i_h6_spdif_quirks, }, + { + .compatible = "allwinner,sun50i-h616-spdif", + /* Essentially the same as the H6, but without RX */ + .data = &sun50i_h6_spdif_quirks, + }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, sun4i_spdif_of_match); diff --git a/sound/usb/clock.c b/sound/usb/clock.c index 33db334e6..a676ad093 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -328,8 +328,16 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, if (chip->quirk_flags & QUIRK_FLAG_SKIP_CLOCK_SELECTOR) return ret; err = uac_clock_selector_set_val(chip, entity_id, cur); - if (err < 0) + if (err < 0) { + if (pins == 1) { + usb_audio_dbg(chip, + "%s(): selector returned an error, " + "assuming a firmware bug, id %d, ret %d\n", + __func__, clock_id, err); + return ret; + } return err; + } } if (!validate || ret > 0 || !chip->autoclock) diff --git a/sound/usb/format.c b/sound/usb/format.c index ab5fed9f5..3b45d0ee7 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -470,9 +470,11 @@ static int validate_sample_rate_table_v2v3(struct snd_usb_audio *chip, int clock) { struct usb_device *dev = chip->dev; + struct usb_host_interface *alts; unsigned int *table; unsigned int nr_rates; int i, err; + u32 bmControls; /* performing the rate verification may lead to unexpected USB bus * behavior afterwards by some unknown reason. Do this only for the @@ -481,6 +483,24 @@ static int validate_sample_rate_table_v2v3(struct snd_usb_audio *chip, if (!(chip->quirk_flags & QUIRK_FLAG_VALIDATE_RATES)) return 0; /* don't perform the validation as default */ + alts = snd_usb_get_host_interface(chip, fp->iface, fp->altsetting); + if (!alts) + return 0; + + if (fp->protocol == UAC_VERSION_3) { + struct uac3_as_header_descriptor *as = snd_usb_find_csint_desc( + alts->extra, alts->extralen, NULL, UAC_AS_GENERAL); + bmControls = le32_to_cpu(as->bmControls); + } else { + struct uac2_as_header_descriptor *as = snd_usb_find_csint_desc( + alts->extra, alts->extralen, NULL, UAC_AS_GENERAL); + bmControls = as->bmControls; + } + + if (!uac_v2v3_control_is_readable(bmControls, + UAC2_AS_VAL_ALT_SETTINGS)) + return 0; + table = kcalloc(fp->nr_rates, sizeof(*table), GFP_KERNEL); if (!table) return -ENOMEM; diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 33380cad3..b8a474a2e 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2029,10 +2029,14 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_CTL_MSG_DELAY_1M | QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x0499, 0x1509, /* Steinberg UR22 */ QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x0499, 0x3108, /* Yamaha YIT-W12TX */ + QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x04d8, 0xfeea, /* Benchmark DAC1 Pre */ QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x04e8, 0xa051, /* Samsung USBC Headset (AKG) */ QUIRK_FLAG_SKIP_CLOCK_SELECTOR | QUIRK_FLAG_CTL_MSG_DELAY_5M), + DEVICE_FLG(0x0525, 0xa4ad, /* Hamedal C20 usb camero */ + QUIRK_FLAG_IFACE_SKIP_CLOSE), DEVICE_FLG(0x054c, 0x0b8c, /* Sony WALKMAN NW-A45 DAC */ QUIRK_FLAG_SET_IFACE_FIRST), DEVICE_FLG(0x0556, 0x0014, /* Phoenix Audio TMX320VC */ @@ -2071,14 +2075,22 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x0763, 0x2031, /* M-Audio Fast Track C600 */ QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x07fd, 0x000b, /* MOTU M Series 2nd hardware revision */ + QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x08bb, 0x2702, /* LineX FM Transmitter */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x0951, 0x16ad, /* Kingston HyperX */ QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x0b0e, 0x0349, /* Jabra 550a */ QUIRK_FLAG_CTL_MSG_DELAY_1M), + DEVICE_FLG(0x0ecb, 0x205c, /* JBL Quantum610 Wireless */ + QUIRK_FLAG_FIXED_RATE), + DEVICE_FLG(0x0ecb, 0x2069, /* JBL Quantum810 Wireless */ + QUIRK_FLAG_FIXED_RATE), DEVICE_FLG(0x0fd9, 0x0008, /* Hauppauge HVR-950Q */ QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), + DEVICE_FLG(0x1224, 0x2a25, /* Jieli Technology USB PHY 2.0 */ + QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x1395, 0x740a, /* Sennheiser DECT */ QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x1397, 0x0507, /* Behringer UMC202HD */ @@ -2111,6 +2123,10 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_ITF_USB_DSD_DAC | QUIRK_FLAG_CTL_MSG_DELAY), DEVICE_FLG(0x1901, 0x0191, /* GE B850V3 CP2114 audio interface */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x19f7, 0x0035, /* RODE NT-USB+ */ + QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x1bcf, 0x2283, /* NexiGo N930AF FHD Webcam */ + QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x2040, 0x7200, /* Hauppauge HVR-950Q */ QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x2040, 0x7201, /* Hauppauge HVR-950Q-MXL */ @@ -2153,6 +2169,12 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x2912, 0x30c8, /* Audioengine D1 */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x2b53, 0x0023, /* Fiero SC-01 (firmware v1.0.0 @ 48 kHz) */ + QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x2b53, 0x0024, /* Fiero SC-01 (firmware v1.0.0 @ 96 kHz) */ + QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */ + QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x30be, 0x0101, /* Schiit Hel */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x413c, 0xa506, /* Dell AE515 sound bar */ @@ -2161,22 +2183,6 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x534d, 0x2109, /* MacroSilicon MS2109 */ QUIRK_FLAG_ALIGN_TRANSFER), - DEVICE_FLG(0x1224, 0x2a25, /* Jieli Technology USB PHY 2.0 */ - QUIRK_FLAG_GET_SAMPLE_RATE), - DEVICE_FLG(0x2b53, 0x0023, /* Fiero SC-01 (firmware v1.0.0 @ 48 kHz) */ - QUIRK_FLAG_GENERIC_IMPLICIT_FB), - DEVICE_FLG(0x2b53, 0x0024, /* Fiero SC-01 (firmware v1.0.0 @ 96 kHz) */ - QUIRK_FLAG_GENERIC_IMPLICIT_FB), - DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */ - QUIRK_FLAG_GENERIC_IMPLICIT_FB), - DEVICE_FLG(0x0525, 0xa4ad, /* Hamedal C20 usb camero */ - QUIRK_FLAG_IFACE_SKIP_CLOSE), - DEVICE_FLG(0x0ecb, 0x205c, /* JBL Quantum610 Wireless */ - QUIRK_FLAG_FIXED_RATE), - DEVICE_FLG(0x0ecb, 0x2069, /* JBL Quantum810 Wireless */ - QUIRK_FLAG_FIXED_RATE), - DEVICE_FLG(0x1bcf, 0x2283, /* NexiGo N930AF FHD Webcam */ - QUIRK_FLAG_GET_SAMPLE_RATE), /* Vendor matches */ VENDOR_FLG(0x045e, /* MS Lifecam */ diff --git a/tools/arch/x86/include/asm/rmwcc.h b/tools/arch/x86/include/asm/rmwcc.h index 11ff97524..e2ff22b37 100644 --- a/tools/arch/x86/include/asm/rmwcc.h +++ b/tools/arch/x86/include/asm/rmwcc.h @@ -4,7 +4,7 @@ #define __GEN_RMWcc(fullop, var, cc, ...) \ do { \ - asm_volatile_goto (fullop "; j" cc " %l[cc_label]" \ + asm goto (fullop "; j" cc " %l[cc_label]" \ : : "m" (var), ## __VA_ARGS__ \ : "memory" : cc_label); \ return 0; \ diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c index eb6303ff4..4cfcef9da 100644 --- a/tools/build/feature/test-libopencsd.c +++ b/tools/build/feature/test-libopencsd.c @@ -4,9 +4,9 @@ /* * Check OpenCSD library version is sufficient to provide required features */ -#define OCSD_MIN_VER ((1 << 16) | (1 << 8) | (1)) +#define OCSD_MIN_VER ((1 << 16) | (2 << 8) | (1)) #if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER) -#error "OpenCSD >= 1.1.1 is required" +#error "OpenCSD >= 1.2.1 is required" #endif int main(void) diff --git a/tools/include/linux/compiler_types.h b/tools/include/linux/compiler_types.h index 1bdd834bd..d09f9dc17 100644 --- a/tools/include/linux/compiler_types.h +++ b/tools/include/linux/compiler_types.h @@ -36,8 +36,8 @@ #include <linux/compiler-gcc.h> #endif -#ifndef asm_volatile_goto -#define asm_volatile_goto(x...) asm goto(x) +#ifndef asm_goto_output +#define asm_goto_output(x...) asm goto(x) #endif #endif /* __LINUX_COMPILER_TYPES_H */ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 201dc77eb..d5d218373 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3109,6 +3109,10 @@ union bpf_attr { * **BPF_FIB_LOOKUP_DIRECT** * Do a direct table lookup vs full lookup using FIB * rules. + * **BPF_FIB_LOOKUP_TBID** + * Used with BPF_FIB_LOOKUP_DIRECT. + * Use the routing table ID present in *params*->tbid + * for the fib lookup. * **BPF_FIB_LOOKUP_OUTPUT** * Perform lookup from an egress perspective (default is * ingress). @@ -3117,6 +3121,11 @@ union bpf_attr { * and *params*->smac will not be set as output. A common * use case is to call **bpf_redirect_neigh**\ () after * doing **bpf_fib_lookup**\ (). + * **BPF_FIB_LOOKUP_SRC** + * Derive and set source IP addr in *params*->ipv{4,6}_src + * for the nexthop. If the src addr cannot be derived, + * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this + * case, *params*->dmac and *params*->smac are not set either. * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. @@ -6687,6 +6696,8 @@ enum { BPF_FIB_LOOKUP_DIRECT = (1U << 0), BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), + BPF_FIB_LOOKUP_TBID = (1U << 3), + BPF_FIB_LOOKUP_SRC = (1U << 4), }; enum { @@ -6699,6 +6710,7 @@ enum { BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ + BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */ }; struct bpf_fib_lookup { @@ -6733,6 +6745,9 @@ struct bpf_fib_lookup { __u32 rt_metric; }; + /* input: source address to consider for lookup + * output: source address result from lookup + */ union { __be32 ipv4_src; __u32 ipv6_src[4]; /* in6_addr; network order */ @@ -6747,9 +6762,19 @@ struct bpf_fib_lookup { __u32 ipv6_dst[4]; /* in6_addr; network order */ }; - /* output */ - __be16 h_vlan_proto; - __be16 h_vlan_TCI; + union { + struct { + /* output */ + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + }; + /* input: when accompanied with the + * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a + * specific routing table to use for the fib lookup. + */ + __u32 tbid; + }; + __u8 smac[6]; /* ETH_ALEN */ __u8 dmac[6]; /* ETH_ALEN */ }; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 10f15a3e3..e2014b125 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4157,6 +4157,8 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat scn = elf_sec_by_idx(obj, sec_idx); scn_data = elf_sec_data(obj, scn); + if (!scn_data) + return -LIBBPF_ERRNO__FORMAT; relo_sec_name = elf_sec_str(obj, shdr->sh_name); sec_name = elf_sec_name(obj, scn); diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c index bf02d62a3..42f57b640 100644 --- a/tools/lib/subcmd/help.c +++ b/tools/lib/subcmd/help.c @@ -50,11 +50,21 @@ void uniq(struct cmdnames *cmds) if (!cmds->cnt) return; - for (i = j = 1; i < cmds->cnt; i++) - if (strcmp(cmds->names[i]->name, cmds->names[i-1]->name)) - cmds->names[j++] = cmds->names[i]; - + for (i = 1; i < cmds->cnt; i++) { + if (!strcmp(cmds->names[i]->name, cmds->names[i-1]->name)) + zfree(&cmds->names[i - 1]); + } + for (i = 0, j = 0; i < cmds->cnt; i++) { + if (cmds->names[i]) { + if (i == j) + j++; + else + cmds->names[j++] = cmds->names[i]; + } + } cmds->cnt = j; + while (j < i) + cmds->names[j++] = NULL; } void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes) diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index e914cc45b..6f00bee91 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -467,10 +467,20 @@ int setup_classid_environment(void) return 1; } - if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") && - errno != EBUSY) { - log_err("mount cgroup net_cls"); - return 1; + if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls")) { + if (errno != EBUSY) { + log_err("mount cgroup net_cls"); + return 1; + } + + if (rmdir(NETCLS_MOUNT_PATH)) { + log_err("rmdir cgroup net_cls"); + return 1; + } + if (umount(CGROUP_MOUNT_DFLT)) { + log_err("umount cgroup base"); + return 1; + } } cleanup_classid_environment(); diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index d711f4bea..47cb753ef 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -5211,6 +5211,7 @@ static size_t get_pprint_mapv_size(enum pprint_mapv_kind_t mapv_kind) #endif assert(0); + return 0; } static void set_pprint_mapv(enum pprint_mapv_kind_t mapv_kind, diff --git a/tools/testing/selftests/bpf/progs/pyperf180.c b/tools/testing/selftests/bpf/progs/pyperf180.c index c39f559d3..42c4a8b62 100644 --- a/tools/testing/selftests/bpf/progs/pyperf180.c +++ b/tools/testing/selftests/bpf/progs/pyperf180.c @@ -1,4 +1,26 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook #define STACK_MAX_LEN 180 + +/* llvm upstream commit at clang18 + * https://github.com/llvm/llvm-project/commit/1a2e77cf9e11dbf56b5720c607313a566eebb16e + * changed inlining behavior and caused compilation failure as some branch + * target distance exceeded 16bit representation which is the maximum for + * cpu v1/v2/v3. Macro __BPF_CPU_VERSION__ is later implemented in clang18 + * to specify which cpu version is used for compilation. So a smaller + * unroll_count can be set if __BPF_CPU_VERSION__ is less than 4, which + * reduced some branch target distances and resolved the compilation failure. + * + * To capture the case where a developer/ci uses clang18 but the corresponding + * repo checkpoint does not have __BPF_CPU_VERSION__, a smaller unroll_count + * will be set as well to prevent potential compilation failures. + */ +#ifdef __BPF_CPU_VERSION__ +#if __BPF_CPU_VERSION__ < 4 +#define UNROLL_COUNT 90 +#endif +#elif __clang_major__ == 18 +#define UNROLL_COUNT 90 +#endif + #include "pyperf.h" diff --git a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh index 16c7fb858..696ef9bf3 100644 --- a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh +++ b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh @@ -46,6 +46,17 @@ test_LAG_cleanup() ip link add mv0 link "$name" up address "$ucaddr" type macvlan # Used to test dev->mc handling ip address add "$addr6" dev "$name" + + # Check that addresses were added as expected + (grep_bridge_fdb "$ucaddr" bridge fdb show dev dummy1 || + grep_bridge_fdb "$ucaddr" bridge fdb show dev dummy2) >/dev/null + check_err $? "macvlan unicast address not found on a slave" + + # mcaddr is added asynchronously by addrconf_dad_work(), use busywait + (busywait 10000 grep_bridge_fdb "$mcaddr" bridge fdb show dev dummy1 || + grep_bridge_fdb "$mcaddr" bridge fdb show dev dummy2) >/dev/null + check_err $? "IPv6 solicited-node multicast mac address not found on a slave" + ip link set dev "$name" down ip link del "$name" diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh index 7bf56ea16..616d35814 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh @@ -11,7 +11,7 @@ ALL_TESTS="single_mask_test identical_filters_test two_masks_test \ multiple_masks_test ctcam_edge_cases_test delta_simple_test \ delta_two_masks_one_key_test delta_simple_rehash_test \ bloom_simple_test bloom_complex_test bloom_delta_test \ - max_erp_entries_test" + max_erp_entries_test max_group_size_test" NUM_NETIFS=2 source $lib_dir/lib.sh source $lib_dir/tc_common.sh @@ -1033,6 +1033,60 @@ max_erp_entries_test() "max chain $chain_failed, mask $mask_failed" } +max_group_size_test() +{ + # The number of ACLs in an ACL group is limited. Once the maximum + # number of ACLs has been reached, filters cannot be added. This test + # verifies that when this limit is reached, insertion fails without + # crashing. + + RET=0 + + local num_acls=32 + local max_size + local ret + + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + + for ((i=1; i < $num_acls; i++)); do + if [[ $(( i % 2 )) == 1 ]]; then + tc filter add dev $h2 ingress pref $i proto ipv4 \ + flower $tcflags dst_ip 198.51.100.1/32 \ + ip_proto tcp tcp_flags 0x01/0x01 \ + action drop &> /dev/null + else + tc filter add dev $h2 ingress pref $i proto ipv6 \ + flower $tcflags dst_ip 2001:db8:1::1/128 \ + action drop &> /dev/null + fi + + ret=$? + [[ $ret -ne 0 ]] && max_size=$((i - 1)) && break + done + + # We expect to exceed the maximum number of ACLs in a group, so that + # insertion eventually fails. Otherwise, the test should be adjusted to + # add more filters. + check_fail $ret "expected to exceed number of ACLs in a group" + + for ((; i >= 1; i--)); do + if [[ $(( i % 2 )) == 1 ]]; then + tc filter del dev $h2 ingress pref $i proto ipv4 \ + flower $tcflags dst_ip 198.51.100.1/32 \ + ip_proto tcp tcp_flags 0x01/0x01 \ + action drop &> /dev/null + else + tc filter del dev $h2 ingress pref $i proto ipv6 \ + flower $tcflags dst_ip 2001:db8:1::1/128 \ + action drop &> /dev/null + fi + done + + log_test "max ACL group size test ($tcflags). max size $max_size" +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/drivers/net/team/config b/tools/testing/selftests/drivers/net/team/config index 265b6882c..b5e3a3aad 100644 --- a/tools/testing/selftests/drivers/net/team/config +++ b/tools/testing/selftests/drivers/net/team/config @@ -1,3 +1,5 @@ +CONFIG_DUMMY=y +CONFIG_IPV6=y +CONFIG_MACVLAN=y CONFIG_NET_TEAM=y CONFIG_NET_TEAM_MODE_LOADBALANCE=y -CONFIG_MACVLAN=y diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index b5234d6ef..ec40a33c2 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -226,13 +226,15 @@ static void clear_log_create_vm_done(struct kvm_vm *vm) } static void dirty_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages) + void *bitmap, uint32_t num_pages, + uint32_t *unused) { kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap); } static void clear_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages) + void *bitmap, uint32_t num_pages, + uint32_t *unused) { kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap); kvm_vm_clear_dirty_log(vcpu->vm, slot, bitmap, 0, num_pages); @@ -329,10 +331,9 @@ static void dirty_ring_continue_vcpu(void) } static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages) + void *bitmap, uint32_t num_pages, + uint32_t *ring_buf_idx) { - /* We only have one vcpu */ - static uint32_t fetch_index = 0; uint32_t count = 0, cleared; bool continued_vcpu = false; @@ -349,11 +350,15 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, /* Only have one vcpu */ count = dirty_ring_collect_one(vcpu_map_dirty_ring(vcpu), - slot, bitmap, num_pages, &fetch_index); + slot, bitmap, num_pages, + ring_buf_idx); cleared = kvm_vm_reset_dirty_ring(vcpu->vm); - /* Cleared pages should be the same as collected */ + /* + * Cleared pages should be the same as collected, as KVM is supposed to + * clear only the entries that have been harvested. + */ TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch " "with collected (%u)", cleared, count); @@ -392,12 +397,6 @@ static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) } } -static void dirty_ring_before_vcpu_join(void) -{ - /* Kick another round of vcpu just to make sure it will quit */ - sem_post(&sem_vcpu_cont); -} - struct log_mode { const char *name; /* Return true if this mode is supported, otherwise false */ @@ -406,10 +405,10 @@ struct log_mode { void (*create_vm_done)(struct kvm_vm *vm); /* Hook to collect the dirty pages into the bitmap provided */ void (*collect_dirty_pages) (struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages); + void *bitmap, uint32_t num_pages, + uint32_t *ring_buf_idx); /* Hook to call when after each vcpu run */ void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err); - void (*before_vcpu_join) (void); } log_modes[LOG_MODE_NUM] = { { .name = "dirty-log", @@ -428,7 +427,6 @@ struct log_mode { .supported = dirty_ring_supported, .create_vm_done = dirty_ring_create_vm_done, .collect_dirty_pages = dirty_ring_collect_dirty_pages, - .before_vcpu_join = dirty_ring_before_vcpu_join, .after_vcpu_run = dirty_ring_after_vcpu_run, }, }; @@ -471,13 +469,14 @@ static void log_mode_create_vm_done(struct kvm_vm *vm) } static void log_mode_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages) + void *bitmap, uint32_t num_pages, + uint32_t *ring_buf_idx) { struct log_mode *mode = &log_modes[host_log_mode]; TEST_ASSERT(mode->collect_dirty_pages != NULL, "collect_dirty_pages() is required for any log mode!"); - mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages); + mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages, ring_buf_idx); } static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) @@ -488,14 +487,6 @@ static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) mode->after_vcpu_run(vcpu, ret, err); } -static void log_mode_before_vcpu_join(void) -{ - struct log_mode *mode = &log_modes[host_log_mode]; - - if (mode->before_vcpu_join) - mode->before_vcpu_join(); -} - static void generate_random_array(uint64_t *guest_array, uint64_t size) { uint64_t i; @@ -696,6 +687,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct kvm_vcpu *vcpu; struct kvm_vm *vm; unsigned long *bmap; + uint32_t ring_buf_idx = 0; + int sem_val; if (!log_mode_supported()) { print_skip("Log mode '%s' not supported", @@ -767,10 +760,21 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Start the iterations */ iteration = 1; sync_global_to_guest(vm, iteration); - host_quit = false; + WRITE_ONCE(host_quit, false); host_dirty_count = 0; host_clear_count = 0; host_track_next_count = 0; + WRITE_ONCE(dirty_ring_vcpu_ring_full, false); + + /* + * Ensure the previous iteration didn't leave a dangling semaphore, i.e. + * that the main task and vCPU worker were synchronized and completed + * verification of all iterations. + */ + sem_getvalue(&sem_vcpu_stop, &sem_val); + TEST_ASSERT_EQ(sem_val, 0); + sem_getvalue(&sem_vcpu_cont, &sem_val); + TEST_ASSERT_EQ(sem_val, 0); pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu); @@ -778,7 +782,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Give the vcpu thread some time to dirty some pages */ usleep(p->interval * 1000); log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX, - bmap, host_num_pages); + bmap, host_num_pages, + &ring_buf_idx); /* * See vcpu_sync_stop_requested definition for details on why @@ -796,15 +801,21 @@ static void run_test(enum vm_guest_mode mode, void *arg) assert(host_log_mode == LOG_MODE_DIRTY_RING || atomic_read(&vcpu_sync_stop_requested) == false); vm_dirty_log_verify(mode, bmap); - sem_post(&sem_vcpu_cont); - iteration++; + /* + * Set host_quit before sem_vcpu_cont in the final iteration to + * ensure that the vCPU worker doesn't resume the guest. As + * above, the dirty ring test may stop and wait even when not + * explicitly request to do so, i.e. would hang waiting for a + * "continue" if it's allowed to resume the guest. + */ + if (++iteration == p->iterations) + WRITE_ONCE(host_quit, true); + + sem_post(&sem_vcpu_cont); sync_global_to_guest(vm, iteration); } - /* Tell the vcpu thread to quit */ - host_quit = true; - log_mode_before_vcpu_join(); pthread_join(vcpu_thread, NULL); pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), " diff --git a/tools/testing/selftests/net/cmsg_ipv6.sh b/tools/testing/selftests/net/cmsg_ipv6.sh index 330d0b1ce..c921750ca 100755 --- a/tools/testing/selftests/net/cmsg_ipv6.sh +++ b/tools/testing/selftests/net/cmsg_ipv6.sh @@ -91,7 +91,7 @@ for ovr in setsock cmsg both diff; do check_result $? 0 "TCLASS $prot $ovr - pass" while [ -d /proc/$BG ]; do - $NSEXE ./cmsg_sender -6 -p u $TGT6 1234 + $NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234 done tcpdump -r $TMPF -v 2>&1 | grep "class $TOS2" >> /dev/null @@ -128,7 +128,7 @@ for ovr in setsock cmsg both diff; do check_result $? 0 "HOPLIMIT $prot $ovr - pass" while [ -d /proc/$BG ]; do - $NSEXE ./cmsg_sender -6 -p u $TGT6 1234 + $NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234 done tcpdump -r $TMPF -v 2>&1 | grep "hlim $LIM[^0-9]" >> /dev/null diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index bd89198cd..ec097f245 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -13,6 +13,7 @@ CONFIG_IPV6_VTI=y CONFIG_DUMMY=y CONFIG_BRIDGE=y CONFIG_VLAN_8021Q=y +CONFIG_GENEVE=m CONFIG_IFB=y CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index e317c2e44..4f80014ca 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -22,8 +22,11 @@ CONFIG_NFT_TPROXY=m CONFIG_NFT_SOCKET=m CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IP6_NF_FILTER=m CONFIG_NET_ACT_CSUM=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_CLS_ACT=y diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 2107579e2..a20dca9d2 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -144,6 +144,11 @@ check_tools() exit $ksft_skip fi + if ! ss -h | grep -q MPTCP; then + echo "SKIP: ss tool does not support MPTCP" + exit $ksft_skip + fi + # Use the legacy version if available to support old kernel versions if iptables-legacy -V &> /dev/null; then iptables="iptables-legacy" diff --git a/tools/testing/selftests/net/mptcp/settings b/tools/testing/selftests/net/mptcp/settings index 79b65bdf0..abc5648b5 100644 --- a/tools/testing/selftests/net/mptcp/settings +++ b/tools/testing/selftests/net/mptcp/settings @@ -1 +1 @@ -timeout=1200 +timeout=1800 diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 4a417f9d5..06ad05104 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -301,10 +301,10 @@ done setup run_test 10 10 0 0 "balanced bwidth" -run_test 10 10 1 50 "balanced bwidth with unbalanced delay" +run_test 10 10 1 25 "balanced bwidth with unbalanced delay" # we still need some additional infrastructure to pass the following test-cases -run_test 30 10 0 0 "unbalanced bwidth" -run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay" -run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay" +run_test 10 3 0 0 "unbalanced bwidth" +run_test 10 3 1 25 "unbalanced bwidth with unbalanced delay" +run_test 10 3 25 1 "unbalanced bwidth with opposed, unbalanced delay" exit $ret diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 0d705fdcf..00ab4c6e4 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -713,23 +713,23 @@ setup_xfrm6() { } setup_xfrm4udp() { - setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0" - setup_nettest_xfrm 4 4500 + setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 4 4500 } setup_xfrm6udp() { - setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0" - setup_nettest_xfrm 6 4500 + setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 6 4500 } setup_xfrm4udprouted() { - setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0" - setup_nettest_xfrm 4 4500 + setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 4 4500 } setup_xfrm6udprouted() { - setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0" - setup_nettest_xfrm 6 4500 + setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 6 4500 } setup_routing_old() { @@ -1928,6 +1928,13 @@ check_command() { return 0 } +check_running() { + pid=${1} + cmd=${2} + + [ "$(cat /proc/${pid}/cmdline 2>/dev/null | tr -d '\0')" = "{cmd}" ] +} + test_cleanup_vxlanX_exception() { outer="${1}" encap="vxlan" @@ -1958,11 +1965,12 @@ test_cleanup_vxlanX_exception() { ${ns_a} ip link del dev veth_A-R1 & iplink_pid=$! - sleep 1 - if [ "$(cat /proc/${iplink_pid}/cmdline 2>/dev/null | tr -d '\0')" = "iplinkdeldevveth_A-R1" ]; then - err " can't delete veth device in a timely manner, PMTU dst likely leaked" - return 1 - fi + for i in $(seq 1 20); do + check_running ${iplink_pid} "iplinkdeldevveth_A-R1" || return 0 + sleep 0.1 + done + err " can't delete veth device in a timely manner, PMTU dst likely leaked" + return 1 } test_cleanup_ipv6_exception() { diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh index 1003ddf7b..227fd1076 100644 --- a/tools/testing/selftests/net/setup_veth.sh +++ b/tools/testing/selftests/net/setup_veth.sh @@ -8,7 +8,7 @@ setup_veth_ns() { local -r ns_mac="$4" [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}" - echo 100000 > "/sys/class/net/${ns_dev}/gro_flush_timeout" + echo 1000000 > "/sys/class/net/${ns_dev}/gro_flush_timeout" ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535 ip -netns "${ns_name}" link set dev "${ns_dev}" up diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index c079565ad..9690a5d7f 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -37,6 +37,10 @@ create_ns() { for ns in $NS_SRC $NS_DST; do ip netns add $ns ip -n $ns link set dev lo up + + # disable route solicitations to decrease 'noise' traffic + ip netns exec $ns sysctl -qw net.ipv6.conf.default.router_solicitations=0 + ip netns exec $ns sysctl -qw net.ipv6.conf.all.router_solicitations=0 done ip link add name veth$SRC type veth peer name veth$DST @@ -78,6 +82,12 @@ create_vxlan_pair() { create_vxlan_endpoint $BASE$ns veth$ns $BM_NET_V6$((3 - $ns)) vxlan6$ns 6 ip -n $BASE$ns addr add dev vxlan6$ns $OL_NET_V6$ns/24 nodad done + + # preload neighbur cache, do avoid some noisy traffic + local addr_dst=$(ip -j -n $BASE$DST link show dev vxlan6$DST |jq -r '.[]["address"]') + local addr_src=$(ip -j -n $BASE$SRC link show dev vxlan6$SRC |jq -r '.[]["address"]') + ip -n $BASE$DST neigh add dev vxlan6$DST lladdr $addr_src $OL_NET_V6$SRC + ip -n $BASE$SRC neigh add dev vxlan6$SRC lladdr $addr_dst $OL_NET_V6$DST } is_ipv6() { @@ -117,7 +127,7 @@ run_test() { # not enable GRO ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 4789 ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 8000 - ip netns exec $NS_DST ./udpgso_bench_rx -C 1000 -R 10 -n 10 -l 1300 $rx_args & + ip netns exec $NS_DST ./udpgso_bench_rx -C 2000 -R 100 -n 10 -l 1300 $rx_args & local spid=$! sleep 0.1 ip netns exec $NS_SRC ./udpgso_bench_tx $family -M 1 -s 13000 -S 1300 -D $dst @@ -166,7 +176,7 @@ run_bench() { # bind the sender and the receiver to different CPUs to try # get reproducible results ip netns exec $NS_DST bash -c "echo 2 > /sys/class/net/veth$DST/queues/rx-0/rps_cpus" - ip netns exec $NS_DST taskset 0x2 ./udpgso_bench_rx -C 1000 -R 10 & + ip netns exec $NS_DST taskset 0x2 ./udpgso_bench_rx -C 2000 -R 100 & local spid=$! sleep 0.1 ip netns exec $NS_SRC taskset 0x1 ./udpgso_bench_tx $family -l 3 -S 1300 -D $dst diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c index f35a924d4..1cbadd267 100644 --- a/tools/testing/selftests/net/udpgso_bench_rx.c +++ b/tools/testing/selftests/net/udpgso_bench_rx.c @@ -375,7 +375,7 @@ static void do_recv(void) do_flush_udp(fd); tnow = gettimeofday_ms(); - if (tnow > treport) { + if (!cfg_expected_pkt_nr && tnow > treport) { if (packets) fprintf(stderr, "%s rx: %6lu MB/s %8lu calls/s\n", diff --git a/tools/testing/selftests/sgx/test_encl.lds b/tools/testing/selftests/sgx/test_encl.lds index a1ec64f7d..108bc11d1 100644 --- a/tools/testing/selftests/sgx/test_encl.lds +++ b/tools/testing/selftests/sgx/test_encl.lds @@ -34,8 +34,4 @@ SECTIONS } } -ASSERT(!DEFINED(.altinstructions), "ALTERNATIVES are not supported in enclaves") -ASSERT(!DEFINED(.altinstr_replacement), "ALTERNATIVES are not supported in enclaves") -ASSERT(!DEFINED(.discard.retpoline_safe), "RETPOLINE ALTERNATIVES are not supported in enclaves") -ASSERT(!DEFINED(.discard.nospec), "RETPOLINE ALTERNATIVES are not supported in enclaves") -ASSERT(!DEFINED(.got.plt), "Libcalls are not supported in enclaves") +ASSERT(!DEFINED(_GLOBAL_OFFSET_TABLE_), "Libcalls through GOT are not supported in enclaves") diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/atm.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/atm.json deleted file mode 100644 index f5bc8670a..000000000 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/atm.json +++ /dev/null @@ -1,94 +0,0 @@ -[ - { - "id": "7628", - "name": "Create ATM with default setting", - "category": [ - "qdisc", - "atm" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root atm", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc atm 1: root refcnt", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "390a", - "name": "Delete ATM with valid handle", - "category": [ - "qdisc", - "atm" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true", - "$TC qdisc add dev $DUMMY handle 1: root atm" - ], - "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc atm 1: root refcnt", - "matchCount": "0", - "teardown": [ - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "32a0", - "name": "Show ATM class", - "category": [ - "qdisc", - "atm" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root atm", - "expExitCode": "0", - "verifyCmd": "$TC class show dev $DUMMY", - "matchPattern": "class atm 1: parent 1:", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "6310", - "name": "Dump ATM stats", - "category": [ - "qdisc", - "atm" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root atm", - "expExitCode": "0", - "verifyCmd": "$TC -s qdisc show dev $DUMMY", - "matchPattern": "qdisc atm 1: root refcnt", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - } -] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbq.json deleted file mode 100644 index 1ab21c83a..000000000 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbq.json +++ /dev/null @@ -1,184 +0,0 @@ -[ - { - "id": "3460", - "name": "Create CBQ with default setting", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "0592", - "name": "Create CBQ with mpu", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 mpu 1000", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "4684", - "name": "Create CBQ with valid cell num", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 cell 128", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "4345", - "name": "Create CBQ with invalid cell num", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 cell 100", - "expExitCode": "1", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "0", - "teardown": [ - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "4525", - "name": "Create CBQ with valid ewma", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 ewma 16", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "6784", - "name": "Create CBQ with invalid ewma", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 ewma 128", - "expExitCode": "1", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "0", - "teardown": [ - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "5468", - "name": "Delete CBQ with handle", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true", - "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000" - ], - "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "0", - "teardown": [ - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "492a", - "name": "Show CBQ class", - "category": [ - "qdisc", - "cbq" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000", - "expExitCode": "0", - "verifyCmd": "$TC class show dev $DUMMY", - "matchPattern": "class cbq 1: root rate 10Kbit \\(bounded,isolated\\) prio no-transmit", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - } -] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dsmark.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dsmark.json deleted file mode 100644 index c030795f9..000000000 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dsmark.json +++ /dev/null @@ -1,140 +0,0 @@ -[ - { - "id": "6345", - "name": "Create DSMARK with default setting", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "3462", - "name": "Create DSMARK with default_index setting", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 default_index 512", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400 default_index 0x0200", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "ca95", - "name": "Create DSMARK with set_tc_index flag", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 set_tc_index", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400 set_tc_index", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "a950", - "name": "Create DSMARK with multiple setting", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 default_index 1024 set_tc_index", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400 default_index 0x0400 set_tc_index", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "4092", - "name": "Delete DSMARK with handle", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true", - "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 default_index 1024" - ], - "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root", - "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400", - "matchCount": "0", - "teardown": [ - "$IP link del dev $DUMMY type dummy" - ] - }, - { - "id": "5930", - "name": "Show DSMARK class", - "category": [ - "qdisc", - "dsmark" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$IP link add dev $DUMMY type dummy || /bin/true" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024", - "expExitCode": "0", - "verifyCmd": "$TC class show dev $DUMMY", - "matchPattern": "class dsmark 1:", - "matchCount": "0", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP link del dev $DUMMY type dummy" - ] - } -] diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh index 0899019a7..e14bdd445 100644 --- a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh +++ b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # Kselftest framework requirement - SKIP code is 4. diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c index 0d85be235..a81165930 100644 --- a/tools/testing/selftests/vm/ksm_tests.c +++ b/tools/testing/selftests/vm/ksm_tests.c @@ -470,7 +470,7 @@ static int ksm_merge_hugepages_time(int mapping, int prot, int timeout, size_t m if (map_ptr_orig == MAP_FAILED) err(2, "initial mmap"); - if (madvise(map_ptr, len + HPAGE_SIZE, MADV_HUGEPAGE)) + if (madvise(map_ptr, len, MADV_HUGEPAGE)) err(2, "MADV_HUGEPAGE"); pagemap_fd = open("/proc/self/pagemap", O_RDONLY); diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c index 312889edb..c65c55b7a 100644 --- a/tools/testing/selftests/vm/map_hugetlb.c +++ b/tools/testing/selftests/vm/map_hugetlb.c @@ -15,6 +15,7 @@ #include <unistd.h> #include <sys/mman.h> #include <fcntl.h> +#include "vm_util.h" #define LENGTH (256UL*1024*1024) #define PROTECTION (PROT_READ | PROT_WRITE) @@ -70,10 +71,16 @@ int main(int argc, char **argv) { void *addr; int ret; + size_t hugepage_size; size_t length = LENGTH; int flags = FLAGS; int shift = 0; + hugepage_size = default_huge_page_size(); + /* munmap with fail if the length is not page aligned */ + if (hugepage_size > length) + length = hugepage_size; + if (argc > 1) length = atol(argv[1]) << 20; if (argc > 2) { diff --git a/tools/testing/selftests/vm/va_128TBswitch.sh b/tools/testing/selftests/vm/va_128TBswitch.sh index 41580751d..231622b3a 100755 --- a/tools/testing/selftests/vm/va_128TBswitch.sh +++ b/tools/testing/selftests/vm/va_128TBswitch.sh @@ -29,9 +29,15 @@ check_supported_x86_64() # See man 1 gzip under '-f'. local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2) + local cpu_supports_pl5=$(awk '/^flags/ {if (/la57/) {print 0;} + else {print 1}; exit}' /proc/cpuinfo 2>/dev/null) + if [[ "${pg_table_levels}" -lt 5 ]]; then echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test" exit $ksft_skip + elif [[ "${cpu_supports_pl5}" -ne 0 ]]; then + echo "$0: CPU does not have the necessary la57 flag to support page table level 5" + exit $ksft_skip fi } diff --git a/tools/testing/selftests/vm/write_hugetlb_memory.sh b/tools/testing/selftests/vm/write_hugetlb_memory.sh index 70a02301f..3d2d2eb9d 100644 --- a/tools/testing/selftests/vm/write_hugetlb_memory.sh +++ b/tools/testing/selftests/vm/write_hugetlb_memory.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 set -e diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile index 22e28b76f..6912e9577 100644 --- a/tools/tracing/rtla/Makefile +++ b/tools/tracing/rtla/Makefile @@ -28,10 +28,15 @@ FOPTS := -flto=auto -ffat-lto-objects -fexceptions -fstack-protector-strong \ -fasynchronous-unwind-tables -fstack-clash-protection WOPTS := -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized +ifeq ($(CC),clang) + FOPTS := $(filter-out -ffat-lto-objects, $(FOPTS)) + WOPTS := $(filter-out -Wno-maybe-uninitialized, $(WOPTS)) +endif + TRACEFS_HEADERS := $$($(PKG_CONFIG) --cflags libtracefs) CFLAGS := -O -g -DVERSION=\"$(VERSION)\" $(FOPTS) $(MOPTS) $(WOPTS) $(TRACEFS_HEADERS) $(EXTRA_CFLAGS) -LDFLAGS := -ggdb $(EXTRA_LDFLAGS) +LDFLAGS := -flto=auto -ggdb $(EXTRA_LDFLAGS) LIBS := $$($(PKG_CONFIG) --libs libtracefs) SRC := $(wildcard src/*.c) diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c index fe34452fc..b9658f213 100644 --- a/tools/tracing/rtla/src/osnoise_hist.c +++ b/tools/tracing/rtla/src/osnoise_hist.c @@ -129,8 +129,7 @@ static void osnoise_hist_update_multiple(struct osnoise_tool *tool, int cpu, if (params->output_divisor) duration = duration / params->output_divisor; - if (data->bucket_size) - bucket = duration / data->bucket_size; + bucket = duration / data->bucket_size; total_duration = duration * count; @@ -472,7 +471,11 @@ static void osnoise_hist_usage(char *usage) for (i = 0; msg[i]; i++) fprintf(stderr, "%s\n", msg[i]); - exit(1); + + if (usage) + exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); } /* diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c index 76479bfb2..6c07f360d 100644 --- a/tools/tracing/rtla/src/osnoise_top.c +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -282,7 +282,11 @@ void osnoise_top_usage(char *usage) for (i = 0; msg[i]; i++) fprintf(stderr, "%s\n", msg[i]); - exit(1); + + if (usage) + exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); } /* diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index 4b48af8a8..ed08295bf 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -151,8 +151,7 @@ timerlat_hist_update(struct osnoise_tool *tool, int cpu, if (params->output_divisor) latency = latency / params->output_divisor; - if (data->bucket_size) - bucket = latency / data->bucket_size; + bucket = latency / data->bucket_size; if (!thread) { hist = data->hist[cpu].irq; @@ -475,7 +474,11 @@ static void timerlat_hist_usage(char *usage) for (i = 0; msg[i]; i++) fprintf(stderr, "%s\n", msg[i]); - exit(1); + + if (usage) + exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); } /* diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index 334271935..8fc0f6aa1 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -305,7 +305,11 @@ static void timerlat_top_usage(char *usage) for (i = 0; msg[i]; i++) fprintf(stderr, "%s\n", msg[i]); - exit(1); + + if (usage) + exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); } /* diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c index 663a047f7..8c8d63c71 100644 --- a/tools/tracing/rtla/src/utils.c +++ b/tools/tracing/rtla/src/utils.c @@ -243,12 +243,6 @@ static inline int sched_setattr(pid_t pid, const struct sched_attr *attr, return syscall(__NR_sched_setattr, pid, attr, flags); } -static inline int sched_getattr(pid_t pid, struct sched_attr *attr, - unsigned int size, unsigned int flags) -{ - return syscall(__NR_sched_getattr, pid, attr, size, flags); -} - int __set_sched_attr(int pid, struct sched_attr *attr) { int flags = 0; @@ -484,13 +478,13 @@ int parse_prio(char *arg, struct sched_attr *sched_param) if (prio == INVALID_VAL) return -1; - if (prio < sched_get_priority_min(SCHED_OTHER)) + if (prio < MIN_NICE) return -1; - if (prio > sched_get_priority_max(SCHED_OTHER)) + if (prio > MAX_NICE) return -1; sched_param->sched_policy = SCHED_OTHER; - sched_param->sched_priority = prio; + sched_param->sched_nice = prio; break; default: return -1; diff --git a/tools/tracing/rtla/src/utils.h b/tools/tracing/rtla/src/utils.h index 5571afd3b..92da41aaf 100644 --- a/tools/tracing/rtla/src/utils.h +++ b/tools/tracing/rtla/src/utils.h @@ -7,6 +7,8 @@ */ #define BUFF_U64_STR_SIZE 24 #define MAX_PATH 1024 +#define MAX_NICE 20 +#define MIN_NICE -19 #define container_of(ptr, type, member)({ \ const typeof(((type *)0)->member) *__mptr = (ptr); \ |