From dc50eab76b709d68175a358d6e23a5a3890764d3 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 18 May 2024 19:39:57 +0200 Subject: Merging upstream version 6.7.7. Signed-off-by: Daniel Baumann --- Documentation/ABI/stable/sysfs-driver-dma-idxd | 6 + Documentation/ABI/testing/configfs-tsm | 82 + Documentation/ABI/testing/configfs-usb-gadget-uac2 | 2 + .../ABI/testing/debugfs-driver-habanalabs | 82 +- Documentation/ABI/testing/debugfs-driver-qat | 24 +- Documentation/ABI/testing/sysfs-bus-cdx | 66 +- Documentation/ABI/testing/sysfs-bus-cxl | 30 + Documentation/ABI/testing/sysfs-bus-i3c | 4 +- Documentation/ABI/testing/sysfs-bus-iio | 75 + .../ABI/testing/sysfs-bus-iio-adc-mcp3564 | 53 + .../ABI/testing/sysfs-bus-iio-resolver-ad2s1210 | 27 + Documentation/ABI/testing/sysfs-bus-papr-pmem | 2 +- Documentation/ABI/testing/sysfs-bus-usb | 9 + Documentation/ABI/testing/sysfs-bus-vdpa | 4 +- .../ABI/testing/sysfs-class-firmware-attributes | 30 + .../testing/sysfs-class-led-driver-turris-omnia | 14 + Documentation/ABI/testing/sysfs-class-net-queues | 22 +- .../ABI/testing/sysfs-class-net-statistics | 48 +- .../ABI/testing/sysfs-class-usb_power_delivery | 7 + Documentation/ABI/testing/sysfs-devices-system-cpu | 3 - Documentation/ABI/testing/sysfs-driver-habanalabs | 64 +- .../ABI/testing/sysfs-driver-intel-m10-bmc | 4 +- Documentation/ABI/testing/sysfs-driver-qat | 46 + Documentation/ABI/testing/sysfs-driver-qat_ras | 41 + Documentation/ABI/testing/sysfs-driver-qat_rl | 226 +++ .../ABI/testing/sysfs-firmware-dmi-entries | 2 +- Documentation/ABI/testing/sysfs-kernel-mm-damon | 7 + Documentation/ABI/testing/sysfs-tty | 15 +- Documentation/PCI/pci-error-recovery.rst | 4 +- .../Expedited-Grace-Periods.rst | 2 +- .../Memory-Ordering/TreeRCU-callback-registry.svg | 9 - .../RCU/Design/Memory-Ordering/TreeRCU-gp-fqs.svg | 4 +- .../RCU/Design/Memory-Ordering/TreeRCU-gp.svg | 13 +- .../RCU/Design/Memory-Ordering/TreeRCU-hotplug.svg | 4 +- .../RCU/Design/Requirements/Requirements.rst | 4 +- Documentation/RCU/listRCU.rst | 9 + Documentation/RCU/whatisRCU.rst | 4 +- Documentation/accel/qaic/qaic.rst | 10 + Documentation/admin-guide/cgroup-v1/memory.rst | 1 + Documentation/admin-guide/cgroup-v2.rst | 178 ++- Documentation/admin-guide/dynamic-debug-howto.rst | 2 +- Documentation/admin-guide/efi-stub.rst | 2 +- Documentation/admin-guide/hw-vuln/mds.rst | 34 +- .../hw-vuln/processor_mmio_stale_data.rst | 13 +- .../admin-guide/hw-vuln/tsx_async_abort.rst | 33 +- Documentation/admin-guide/kdump/kdump.rst | 37 +- Documentation/admin-guide/kdump/vmcoreinfo.rst | 30 - Documentation/admin-guide/kernel-parameters.txt | 118 +- .../admin-guide/laptops/thinkpad-acpi.rst | 20 + Documentation/admin-guide/media/mgb4.rst | 374 +++++ Documentation/admin-guide/media/pci-cardlist.rst | 1 + Documentation/admin-guide/media/v4l-drivers.rst | 1 + Documentation/admin-guide/media/visl.rst | 6 +- Documentation/admin-guide/mm/damon/usage.rst | 124 +- Documentation/admin-guide/mm/ksm.rst | 11 + Documentation/admin-guide/mm/memory-hotplug.rst | 12 +- Documentation/admin-guide/mm/pagemap.rst | 89 ++ Documentation/admin-guide/mm/userfaultfd.rst | 35 + Documentation/admin-guide/module-signing.rst | 17 +- Documentation/admin-guide/perf/ampere_cspmu.rst | 29 + Documentation/admin-guide/perf/index.rst | 1 + Documentation/admin-guide/pm/intel_idle.rst | 17 +- Documentation/admin-guide/pstore-blk.rst | 8 +- Documentation/admin-guide/spkguide.txt | 11 +- Documentation/admin-guide/sysctl/fs.rst | 16 +- Documentation/admin-guide/sysctl/kernel.rst | 26 +- Documentation/admin-guide/sysctl/net.rst | 1 + Documentation/admin-guide/sysctl/vm.rst | 4 +- Documentation/arch/arm64/cpu-feature-registers.rst | 2 + Documentation/arch/arm64/elf_hwcaps.rst | 15 +- Documentation/arch/arm64/silicon-errata.rst | 7 + Documentation/arch/ia64/aliasing.rst | 246 --- Documentation/arch/ia64/efirtc.rst | 144 -- Documentation/arch/ia64/err_inject.rst | 1067 ------------- Documentation/arch/ia64/features.rst | 3 - Documentation/arch/ia64/fsys.rst | 303 ---- Documentation/arch/ia64/ia64.rst | 49 - Documentation/arch/ia64/index.rst | 19 - Documentation/arch/ia64/irq-redir.rst | 80 - Documentation/arch/ia64/mca.rst | 198 --- Documentation/arch/ia64/serial.rst | 165 -- Documentation/arch/index.rst | 5 +- Documentation/arch/loongarch/introduction.rst | 4 +- Documentation/arch/powerpc/associativity.rst | 105 ++ Documentation/arch/powerpc/booting.rst | 110 ++ Documentation/arch/powerpc/bootwrapper.rst | 131 ++ Documentation/arch/powerpc/cpu_families.rst | 237 +++ Documentation/arch/powerpc/cpu_features.rst | 60 + Documentation/arch/powerpc/cxl.rst | 469 ++++++ Documentation/arch/powerpc/cxlflash.rst | 433 ++++++ Documentation/arch/powerpc/dawr-power9.rst | 101 ++ Documentation/arch/powerpc/dexcr.rst | 58 + Documentation/arch/powerpc/dscr.rst | 87 ++ .../arch/powerpc/eeh-pci-error-recovery.rst | 336 ++++ Documentation/arch/powerpc/elf_hwcaps.rst | 231 +++ Documentation/arch/powerpc/elfnote.rst | 41 + Documentation/arch/powerpc/features.rst | 3 + .../arch/powerpc/firmware-assisted-dump.rst | 381 +++++ Documentation/arch/powerpc/hvcs.rst | 581 +++++++ Documentation/arch/powerpc/imc.rst | 199 +++ Documentation/arch/powerpc/index.rst | 49 + Documentation/arch/powerpc/isa-versions.rst | 101 ++ Documentation/arch/powerpc/kasan.txt | 58 + Documentation/arch/powerpc/kaslr-booke32.rst | 42 + Documentation/arch/powerpc/kvm-nested.rst | 634 ++++++++ Documentation/arch/powerpc/mpc52xx.rst | 43 + Documentation/arch/powerpc/papr_hcalls.rst | 302 ++++ .../arch/powerpc/pci_iov_resource_on_powernv.rst | 312 ++++ Documentation/arch/powerpc/pmu-ebb.rst | 138 ++ Documentation/arch/powerpc/ptrace.rst | 157 ++ Documentation/arch/powerpc/qe_firmware.rst | 296 ++++ Documentation/arch/powerpc/syscall64-abi.rst | 153 ++ .../arch/powerpc/transactional_memory.rst | 274 ++++ Documentation/arch/powerpc/ultravisor.rst | 1117 ++++++++++++++ Documentation/arch/powerpc/vas-api.rst | 305 ++++ Documentation/arch/powerpc/vcpudispatch_stats.rst | 75 + Documentation/arch/powerpc/vmemmap_dedup.rst | 101 ++ Documentation/arch/riscv/acpi.rst | 10 + Documentation/arch/riscv/boot-image-header.rst | 59 + Documentation/arch/riscv/boot.rst | 169 ++ Documentation/arch/riscv/features.rst | 3 + Documentation/arch/riscv/hwprobe.rst | 104 ++ Documentation/arch/riscv/index.rst | 24 + Documentation/arch/riscv/patch-acceptance.rst | 59 + Documentation/arch/riscv/uabi.rst | 68 + Documentation/arch/riscv/vector.rst | 140 ++ Documentation/arch/riscv/vm-layout.rst | 157 ++ Documentation/arch/sh/index.rst | 6 - Documentation/arch/x86/amd-memory-encryption.rst | 2 +- Documentation/arch/x86/amd_hsmp.rst | 18 + Documentation/arch/x86/boot.rst | 2 +- Documentation/arch/x86/iommu.rst | 2 +- Documentation/arch/x86/resctrl.rst | 38 +- Documentation/arch/x86/topology.rst | 12 +- Documentation/block/blk-mq.rst | 2 +- Documentation/block/ioprio.rst | 3 - Documentation/bpf/kfuncs.rst | 6 +- Documentation/bpf/libbpf/program_types.rst | 10 + Documentation/bpf/prog_flow_dissector.rst | 2 +- .../bpf/standardization/instruction-set.rst | 8 + Documentation/conf.py | 6 + Documentation/core-api/cpu_hotplug.rst | 6 - Documentation/core-api/debugging-via-ohci1394.rst | 6 +- Documentation/core-api/maple_tree.rst | 2 +- Documentation/crypto/devel-algos.rst | 4 +- Documentation/dev-tools/kasan.rst | 7 +- Documentation/dev-tools/kcsan.rst | 4 +- Documentation/dev-tools/kmsan.rst | 6 +- Documentation/dev-tools/kselftest.rst | 6 +- Documentation/dev-tools/kunit/usage.rst | 10 +- Documentation/dev-tools/ubsan.rst | 6 +- Documentation/devicetree/bindings/Makefile | 2 +- .../devicetree/bindings/arm/amd,pensando.yaml | 26 + Documentation/devicetree/bindings/arm/amlogic.yaml | 3 + .../devicetree/bindings/arm/arm,coresight-cti.yaml | 34 +- .../devicetree/bindings/arm/arm,integrator.yaml | 39 - .../devicetree/bindings/arm/arm,realview.yaml | 37 - .../devicetree/bindings/arm/arm,versatile.yaml | 40 +- .../devicetree/bindings/arm/aspeed/aspeed.yaml | 1 + .../devicetree/bindings/arm/atmel-at91.yaml | 7 + Documentation/devicetree/bindings/arm/cpus.yaml | 5 +- Documentation/devicetree/bindings/arm/fsl.yaml | 43 +- .../devicetree/bindings/arm/intel-ixp4xx.yaml | 16 + .../devicetree/bindings/arm/mediatek.yaml | 16 + .../bindings/arm/mediatek/mediatek,mt7622-wed.yaml | 1 + Documentation/devicetree/bindings/arm/psci.yaml | 1 + Documentation/devicetree/bindings/arm/qcom.yaml | 45 + .../devicetree/bindings/arm/rockchip.yaml | 25 + Documentation/devicetree/bindings/arm/sti.yaml | 23 +- .../devicetree/bindings/arm/stm32/stm32.yaml | 1 + Documentation/devicetree/bindings/arm/sunxi.yaml | 16 + .../bindings/arm/tegra/nvidia,tegra20-pmc.yaml | 393 ----- .../devicetree/bindings/ata/nvidia,tegra-ahci.yaml | 2 +- .../devicetree/bindings/cache/qcom,llcc.yaml | 10 + .../clock/amlogic,s4-peripherals-clkc.yaml | 96 ++ .../bindings/clock/amlogic,s4-pll-clkc.yaml | 49 + .../devicetree/bindings/clock/qcom,hfpll.txt | 3 + .../devicetree/bindings/clock/qcom,rpmhcc.yaml | 1 + .../devicetree/bindings/clock/qcom,sm4450-gcc.yaml | 55 + .../bindings/clock/qcom,sm8450-camcc.yaml | 8 +- .../bindings/clock/renesas,rzg2l-cpg.yaml | 1 + .../bindings/cpufreq/cpufreq-qcom-hw.yaml | 4 + .../bindings/cpufreq/qcom-cpufreq-nvmem.yaml | 8 +- .../devicetree/bindings/crypto/fsl-imx-sahara.yaml | 43 +- .../bindings/crypto/qcom,inline-crypto-engine.yaml | 1 + .../devicetree/bindings/crypto/qcom,prng.yaml | 28 +- .../bindings/devfreq/event/rockchip,dfi.yaml | 74 + .../bindings/devfreq/event/rockchip-dfi.txt | 18 - .../bindings/display/bridge/adi,adv7533.yaml | 6 + .../bindings/display/bridge/analogix,anx7814.yaml | 1 + .../display/bridge/fsl,imx93-mipi-dsi.yaml | 115 ++ .../devicetree/bindings/display/fsl,lcdif.yaml | 20 +- .../bindings/display/ilitek,ili9486.yaml | 4 - .../bindings/display/lvds-data-mapping.yaml | 84 + .../devicetree/bindings/display/lvds.yaml | 77 +- .../bindings/display/mediatek/mediatek,dp.yaml | 2 + .../bindings/display/mediatek/mediatek,dsi.yaml | 2 +- .../bindings/display/msm/dp-controller.yaml | 1 + .../devicetree/bindings/display/msm/gmu.yaml | 47 +- .../devicetree/bindings/display/msm/gpu.yaml | 4 +- .../bindings/display/msm/qcom,msm8998-mdss.yaml | 6 + .../bindings/display/msm/qcom,qcm2290-mdss.yaml | 6 + .../bindings/display/msm/qcom,sc7180-mdss.yaml | 8 + .../bindings/display/msm/qcom,sc7280-mdss.yaml | 10 + .../bindings/display/msm/qcom,sc8280xp-mdss.yaml | 4 + .../bindings/display/msm/qcom,sdm845-mdss.yaml | 8 + .../bindings/display/msm/qcom,sm6115-mdss.yaml | 6 + .../bindings/display/msm/qcom,sm6125-mdss.yaml | 6 + .../bindings/display/msm/qcom,sm6350-mdss.yaml | 6 + .../bindings/display/msm/qcom,sm6375-mdss.yaml | 6 + .../bindings/display/msm/qcom,sm8150-mdss.yaml | 6 + .../bindings/display/msm/qcom,sm8250-mdss.yaml | 6 + .../bindings/display/msm/qcom,sm8350-mdss.yaml | 8 + .../bindings/display/msm/qcom,sm8450-mdss.yaml | 8 + .../bindings/display/msm/qcom,sm8550-mdss.yaml | 8 + .../bindings/display/panel/ilitek,ili9163.yaml | 4 - .../bindings/display/panel/jdi,lpm102a188a.yaml | 94 ++ .../display/panel/leadtek,ltk050h3146w.yaml | 1 + .../bindings/display/panel/newvision,nv3051d.yaml | 5 +- .../bindings/display/panel/panel-simple-dsi.yaml | 2 + .../panel/panel-simple-lvds-dual-ports.yaml | 118 ++ .../bindings/display/panel/panel-simple.yaml | 40 +- .../bindings/display/panel/raydium,rm692e5.yaml | 73 + .../display/panel/rocktech,jh057n00900.yaml | 2 + .../bindings/display/renesas,shmobile-lcdc.yaml | 130 ++ .../display/rockchip/rockchip,dw-mipi-dsi.yaml | 2 + .../bindings/display/rockchip/rockchip-vop.yaml | 1 + .../bindings/display/sitronix,st7735r.yaml | 5 - .../bindings/display/solomon,ssd-common.yaml | 42 + .../bindings/display/solomon,ssd1307fb.yaml | 28 +- .../bindings/display/solomon,ssd132x.yaml | 89 ++ .../devicetree/bindings/dma/qcom,gpi.yaml | 2 + Documentation/devicetree/bindings/eeprom/at24.yaml | 5 + .../devicetree/bindings/firmware/arm,scmi.yaml | 15 +- .../devicetree/bindings/firmware/qcom,scm.yaml | 10 + .../devicetree/bindings/gpio/fsl-imx-gpio.yaml | 8 + .../devicetree/bindings/gpio/gpio-vf610.yaml | 40 +- .../devicetree/bindings/gpio/intel,ixp4xx-gpio.txt | 38 - .../bindings/gpio/intel,ixp4xx-gpio.yaml | 73 + .../devicetree/bindings/gpio/loongson,ls-gpio.yaml | 21 +- .../devicetree/bindings/hwmon/adi,ltc2991.yaml | 128 ++ .../devicetree/bindings/hwmon/adi,max31827.yaml | 66 + .../devicetree/bindings/hwmon/ina3221.txt | 54 - .../devicetree/bindings/hwmon/npcm750-pwm-fan.txt | 6 +- .../bindings/hwmon/pmbus/infineon,tda38640.yaml | 49 + .../devicetree/bindings/hwmon/ti,ina2xx.yaml | 1 + .../devicetree/bindings/hwmon/ti,ina3221.yaml | 121 ++ .../devicetree/bindings/i2c/i2c-demux-pinctrl.txt | 135 -- .../devicetree/bindings/i2c/i2c-demux-pinctrl.yaml | 172 +++ .../devicetree/bindings/i2c/qcom,i2c-cci.yaml | 2 + Documentation/devicetree/bindings/i3c/i3c.yaml | 10 +- .../bindings/iio/accel/kionix,kx022a.yaml | 14 +- .../devicetree/bindings/iio/adc/lltc,ltc2497.yaml | 20 +- .../bindings/iio/adc/microchip,mcp3564.yaml | 205 +++ .../bindings/iio/adc/microchip,mcp3911.yaml | 6 + .../devicetree/bindings/iio/adc/ti,ads1015.yaml | 3 + .../bindings/iio/adc/ti,twl6030-gpadc.yaml | 43 + .../bindings/iio/amplifiers/adi,hmc425a.yaml | 12 +- .../bindings/iio/imu/invensense,mpu6050.yaml | 5 + .../devicetree/bindings/iio/imu/st,lsm6dsx.yaml | 3 + .../bindings/iio/pressure/rohm,bm1390.yaml | 52 + .../bindings/iio/resolver/adi,ad2s1210.yaml | 177 +++ .../devicetree/bindings/input/fsl,scu-key.yaml | 2 + .../bindings/input/qcom,pm8921-keypad.yaml | 89 ++ .../bindings/input/qcom,pm8xxx-keypad.txt | 90 -- .../devicetree/bindings/input/syna,rmi4.yaml | 2 + .../input/touchscreen/cypress,tt21000.yaml | 3 + .../bindings/input/twl4030-pwrbutton.txt | 2 +- .../bindings/interconnect/qcom,msm8939.yaml | 74 + .../bindings/interconnect/qcom,msm8996.yaml | 126 ++ .../bindings/interconnect/qcom,qcm2290.yaml | 60 +- .../bindings/interconnect/qcom,rpm-common.yaml | 28 + .../devicetree/bindings/interconnect/qcom,rpm.yaml | 250 +-- .../bindings/interconnect/qcom,rpmh.yaml | 1 + .../bindings/interconnect/qcom,sdm660.yaml | 108 ++ .../bindings/interconnect/qcom,sdx75-rpmh.yaml | 92 ++ .../bindings/interrupt-controller/qcom,pdc.yaml | 1 + .../interrupt-controller/sifive,plic-1.0.0.yaml | 2 + .../thead,c900-aclint-mswi.yaml | 43 + .../devicetree/bindings/iommu/arm,smmu.yaml | 2 + .../devicetree/bindings/leds/backlight/common.yaml | 17 + .../bindings/leds/backlight/led-backlight.yaml | 19 +- .../bindings/leds/backlight/max8925-backlight.txt | 10 - .../bindings/leds/backlight/mps,mp3309c.yaml | 73 + .../bindings/leds/backlight/pwm-backlight.yaml | 20 +- Documentation/devicetree/bindings/leds/common.yaml | 4 +- .../devicetree/bindings/leds/irled/pwm-ir-tx.yaml | 5 +- .../devicetree/bindings/leds/kinetic,ktd202x.yaml | 171 ++ .../devicetree/bindings/leds/register-bit-led.yaml | 2 +- .../devicetree/bindings/mailbox/fsl,mu.yaml | 5 +- .../bindings/mailbox/qcom,apcs-kpss-global.yaml | 2 + .../devicetree/bindings/mailbox/qcom-ipcc.yaml | 1 + .../bindings/mailbox/xlnx,zynqmp-ipi-mailbox.yaml | 6 + .../bindings/media/amlogic,meson6-ir.yaml | 1 + .../devicetree/bindings/media/cdns,csi2rx.yaml | 1 + .../devicetree/bindings/media/i2c/hynix,hi846.yaml | 7 +- .../bindings/media/i2c/onnn,mt9m114.yaml | 114 ++ .../bindings/media/i2c/ovti,ov02a10.yaml | 8 +- .../devicetree/bindings/media/i2c/ovti,ov4689.yaml | 6 +- .../devicetree/bindings/media/i2c/ovti,ov5640.yaml | 7 +- .../devicetree/bindings/media/i2c/ovti,ov5642.yaml | 141 ++ .../devicetree/bindings/media/i2c/ovti,ov5693.yaml | 2 +- .../devicetree/bindings/media/i2c/sony,imx214.yaml | 2 +- .../devicetree/bindings/media/i2c/sony,imx415.yaml | 10 +- .../bindings/media/i2c/ti,ds90ub960.yaml | 1 + .../devicetree/bindings/media/nokia,n900-ir | 20 - .../bindings/media/nuvoton,npcm-ece.yaml | 43 + .../bindings/media/nuvoton,npcm-vcd.yaml | 72 + .../bindings/media/qcom,sdm845-venus-v2.yaml | 8 + .../devicetree/bindings/media/rockchip-vpu.yaml | 7 + .../bindings/media/samsung,exynos4212-fimc-is.yaml | 15 +- .../devicetree/bindings/media/samsung,fimc.yaml | 27 +- .../bindings/media/ti,j721e-csi2rx-shim.yaml | 100 ++ .../bindings/media/video-interfaces.yaml | 1 + .../bindings/memory-controllers/ingenic,nemc.yaml | 1 + .../memory-controllers/renesas,rpc-if.yaml | 2 + .../memory-controllers/rockchip,rk3399-dmc.yaml | 2 +- .../bindings/memory-controllers/ti,gpmc.yaml | 2 +- .../memory-controllers/xlnx,versal-ddrmc-edac.yaml | 57 + .../bindings/mfd/arm,dev-platforms-syscon.yaml | 67 + .../bindings/mfd/brcm,bcm63268-gpio-sysctl.yaml | 18 +- .../bindings/mfd/brcm,bcm6362-gpio-sysctl.yaml | 2 +- .../bindings/mfd/brcm,bcm6368-gpio-sysctl.yaml | 2 +- Documentation/devicetree/bindings/mfd/max8925.txt | 64 - Documentation/devicetree/bindings/mfd/max8998.txt | 125 -- .../devicetree/bindings/mfd/maxim,max5970.yaml | 5 + .../devicetree/bindings/mfd/maxim,max8925.yaml | 145 ++ .../devicetree/bindings/mfd/maxim,max8998.yaml | 324 ++++ .../devicetree/bindings/mfd/mediatek,mt6357.yaml | 2 + .../devicetree/bindings/mfd/qcom,spmi-pmic.yaml | 11 +- .../devicetree/bindings/mfd/qcom,tcsr.yaml | 1 + .../devicetree/bindings/mfd/qcom-pm8xxx.yaml | 26 +- .../devicetree/bindings/mfd/rockchip,rk805.yaml | 4 + .../devicetree/bindings/mfd/rockchip,rk806.yaml | 2 + .../devicetree/bindings/mfd/rockchip,rk808.yaml | 4 + .../devicetree/bindings/mfd/rockchip,rk809.yaml | 6 +- .../devicetree/bindings/mfd/rockchip,rk817.yaml | 3 + .../devicetree/bindings/mfd/rockchip,rk818.yaml | 4 + .../bindings/mfd/stericsson,db8500-prcmu.yaml | 2 +- Documentation/devicetree/bindings/mfd/syscon.yaml | 4 + .../devicetree/bindings/mfd/ti,lp87524-q1.yaml | 1 + .../devicetree/bindings/mfd/ti,lp87561-q1.yaml | 1 + .../devicetree/bindings/mfd/ti,lp87565-q1.yaml | 1 + Documentation/devicetree/bindings/mfd/ti,twl.yaml | 67 + .../devicetree/bindings/mfd/twl-family.txt | 46 - .../devicetree/bindings/mfd/x-powers,axp152.yaml | 5 +- .../devicetree/bindings/mmc/npcm,sdhci.yaml | 45 + .../devicetree/bindings/mmc/renesas,sdhi.yaml | 2 + .../devicetree/bindings/mmc/sdhci-msm.yaml | 9 +- .../bindings/mmc/starfive,jh7110-mmc.yaml | 2 - Documentation/devicetree/bindings/mtd/mtd.yaml | 7 +- .../bindings/mtd/partitions/fixed-partitions.yaml | 19 + .../bindings/mtd/partitions/nvmem-cells.yaml | 1 + .../bindings/net/allwinner,sun8i-a83t-emac.yaml | 2 + .../devicetree/bindings/net/brcm,asp-v2.0.yaml | 2 +- .../devicetree/bindings/net/dsa/brcm,sf2.yaml | 1 + Documentation/devicetree/bindings/net/dsa/dsa.yaml | 11 +- .../bindings/net/dsa/mediatek,mt7530.yaml | 10 +- .../devicetree/bindings/net/dsa/microchip,ksz.yaml | 22 + .../bindings/net/dsa/microchip,lan937x.yaml | 3 +- .../devicetree/bindings/net/dsa/nxp,sja1105.yaml | 4 +- .../devicetree/bindings/net/dsa/qca8k.yaml | 1 + .../devicetree/bindings/net/dsa/realtek.yaml | 2 + .../bindings/net/dsa/renesas,rzn1-a5psw.yaml | 10 +- .../devicetree/bindings/net/engleder,tsnep.yaml | 1 + .../bindings/net/ethernet-controller.yaml | 4 +- .../devicetree/bindings/net/ethernet-switch.yaml | 14 +- Documentation/devicetree/bindings/net/fsl,fec.yaml | 1 + .../bindings/net/loongson,ls1b-gmac.yaml | 114 ++ .../bindings/net/loongson,ls1c-emac.yaml | 113 ++ .../devicetree/bindings/net/microchip,lan95xx.yaml | 2 + .../bindings/net/mscc,vsc7514-switch.yaml | 46 +- .../devicetree/bindings/net/nxp,tja11xx.yaml | 1 + .../devicetree/bindings/net/renesas,ether.yaml | 3 +- .../devicetree/bindings/net/renesas,etheravb.yaml | 3 +- .../devicetree/bindings/net/ti,cc1352p7.yaml | 51 + .../devicetree/bindings/net/ti,cpsw-switch.yaml | 2 +- .../devicetree/bindings/net/ti,icssg-prueth.yaml | 8 + .../bindings/nvmem/allwinner,sun4i-a10-sid.yaml | 5 +- .../bindings/nvmem/amlogic,meson-gxbb-efuse.yaml | 1 + .../bindings/nvmem/amlogic,meson6-efuse.yaml | 1 + .../devicetree/bindings/nvmem/apple,efuses.yaml | 1 + .../devicetree/bindings/nvmem/imx-ocotp.yaml | 1 + .../devicetree/bindings/nvmem/mediatek,efuse.yaml | 1 + .../bindings/nvmem/microchip,sama7g5-otpc.yaml | 1 + .../devicetree/bindings/nvmem/mxs-ocotp.yaml | 1 + .../bindings/nvmem/nvmem-deprecated-cells.yaml | 28 + Documentation/devicetree/bindings/nvmem/nvmem.yaml | 9 - .../devicetree/bindings/nvmem/qcom,qfprom.yaml | 1 + .../devicetree/bindings/nvmem/qcom,sec-qfprom.yaml | 1 + .../devicetree/bindings/nvmem/qcom,spmi-sdam.yaml | 1 + .../devicetree/bindings/nvmem/rockchip,otp.yaml | 1 + .../devicetree/bindings/nvmem/rockchip-efuse.yaml | 1 + .../bindings/nvmem/socionext,uniphier-efuse.yaml | 1 + .../bindings/nvmem/sunplus,sp7021-ocotp.yaml | 1 + .../devicetree/bindings/nvmem/u-boot,env.yaml | 2 + .../devicetree/bindings/opp/opp-v2-kryo-cpu.yaml | 36 +- .../devicetree/bindings/pci/rcar-gen4-pci-ep.yaml | 115 ++ .../bindings/pci/rcar-gen4-pci-host.yaml | 127 ++ .../bindings/pci/snps,dw-pcie-common.yaml | 4 +- .../devicetree/bindings/pci/snps,dw-pcie-ep.yaml | 4 +- .../devicetree/bindings/pci/snps,dw-pcie.yaml | 4 +- .../devicetree/bindings/pci/xlnx,nwl-pcie.yaml | 2 +- .../devicetree/bindings/pci/xlnx,xdma-host.yaml | 114 ++ .../devicetree/bindings/perf/riscv,pmu.yaml | 2 +- .../bindings/phy/marvell,pxa1928-usb-phy.yaml | 47 + .../devicetree/bindings/phy/mediatek,dsi-phy.yaml | 1 + .../bindings/phy/mediatek,mt7628-usbphy.yaml | 74 + .../devicetree/bindings/phy/phy-stih407-usb.txt | 24 - .../devicetree/bindings/phy/pxa1928-usb-phy.txt | 18 - .../bindings/phy/qcom,ipq5332-usb-hsphy.yaml | 4 +- .../bindings/phy/qcom,msm8996-qmp-usb3-phy.yaml | 287 ---- .../bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml | 2 + .../phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml | 57 +- .../bindings/phy/qcom,snps-eusb2-phy.yaml | 7 +- .../devicetree/bindings/phy/ralink-usb-phy.txt | 23 - .../bindings/phy/st,stih407-usb2-phy.yaml | 63 + .../bindings/pinctrl/amlogic,meson-pinctrl-a1.yaml | 1 + .../bindings/pinctrl/brcm,bcm6318-pinctrl.yaml | 5 + .../bindings/pinctrl/brcm,bcm63268-pinctrl.yaml | 25 +- .../bindings/pinctrl/brcm,bcm6328-pinctrl.yaml | 5 + .../bindings/pinctrl/brcm,bcm6358-pinctrl.yaml | 5 +- .../bindings/pinctrl/brcm,bcm6362-pinctrl.yaml | 7 +- .../bindings/pinctrl/brcm,bcm6368-pinctrl.yaml | 7 +- .../bindings/pinctrl/nuvoton,npcm845-pinctrl.yaml | 217 +++ .../bindings/pinctrl/nxp,s32g2-siul2-pinctrl.yaml | 2 +- .../bindings/pinctrl/qcom,mdm9607-tlmm.yaml | 3 +- .../bindings/pinctrl/qcom,msm8226-pinctrl.yaml | 4 +- .../bindings/pinctrl/qcom,sa8775p-tlmm.yaml | 1 + .../bindings/pinctrl/qcom,sc7280-pinctrl.yaml | 4 + .../bindings/pinctrl/realtek,rtd1315e-pinctrl.yaml | 188 +++ .../bindings/pinctrl/realtek,rtd1319d-pinctrl.yaml | 187 +++ .../bindings/pinctrl/realtek,rtd1619b-pinctrl.yaml | 186 +++ .../bindings/pinctrl/renesas,rzg2l-pinctrl.yaml | 21 + .../bindings/pinctrl/renesas,rzv2m-pinctrl.yaml | 1 + .../bindings/pinctrl/rockchip,pinctrl.yaml | 2 + .../bindings/pinctrl/st,stm32-pinctrl.yaml | 3 +- .../bindings/power/amlogic,meson-sec-pwrc.yaml | 3 +- .../bindings/power/mediatek,power-controller.yaml | 6 + .../devicetree/bindings/power/power-domain.yaml | 17 +- .../devicetree/bindings/power/qcom,rpmpd.yaml | 82 +- .../bindings/power/reset/gpio-poweroff.yaml | 6 + .../bindings/power/reset/syscon-poweroff.yaml | 10 +- .../bindings/power/supply/max8925_battery.txt | 18 - .../bindings/power/supply/maxim,max17040.yaml | 31 + .../bindings/power/supply/mitsumi,mm8013.yaml | 38 + .../bindings/power/supply/qcom,pm8916-bms-vm.yaml | 83 + .../bindings/power/supply/qcom,pm8916-lbc.yaml | 128 ++ .../bindings/power/supply/sbs,sbs-manager.yaml | 6 + .../power/supply/stericsson,ab8500-battery.txt | 34 - Documentation/devicetree/bindings/pwm/imx-pwm.yaml | 10 +- Documentation/devicetree/bindings/pwm/mxs-pwm.yaml | 13 +- .../devicetree/bindings/pwm/pwm-rockchip.yaml | 1 + .../bindings/regulator/adi,max77503-regulator.yaml | 50 + .../devicetree/bindings/regulator/da9210.txt | 29 - .../devicetree/bindings/regulator/dlg,da9210.yaml | 52 + .../bindings/regulator/fixed-regulator.yaml | 5 + .../bindings/regulator/maxim,max20086.yaml | 1 + .../bindings/regulator/maxim,max77826.yaml | 2 + .../regulator/mediatek,mt6358-regulator.yaml | 250 +++ .../devicetree/bindings/regulator/mps,mpq2286.yaml | 59 + .../bindings/regulator/mt6358-regulator.txt | 350 ----- .../bindings/regulator/onnn,fan53880.yaml | 2 + .../bindings/regulator/qcom,rpmh-regulator.yaml | 12 + .../bindings/regulator/qcom,spmi-regulator.yaml | 68 +- .../devicetree/bindings/remoteproc/mtk,scp.yaml | 176 ++- .../devicetree/bindings/remoteproc/qcom,adsp.yaml | 20 +- .../bindings/remoteproc/qcom,msm8996-mss-pil.yaml | 2 - .../bindings/remoteproc/qcom,sc7180-pas.yaml | 81 +- .../bindings/remoteproc/qcom,sm6375-pas.yaml | 145 ++ .../bindings/remoteproc/renesas,rcar-rproc.yaml | 2 +- .../bindings/remoteproc/ti,pru-rproc.yaml | 23 + .../bindings/reserved-memory/framebuffer.yaml | 52 - .../bindings/reserved-memory/memory-region.yaml | 40 - .../bindings/reserved-memory/qcom,rmtfs-mem.yaml | 11 + .../bindings/reserved-memory/reserved-memory.txt | 2 +- .../bindings/reserved-memory/reserved-memory.yaml | 181 --- .../bindings/reserved-memory/shared-dma-pool.yaml | 97 -- Documentation/devicetree/bindings/riscv/cpus.yaml | 1 + .../devicetree/bindings/riscv/extensions.yaml | 12 + .../devicetree/bindings/riscv/sophgo.yaml | 32 + .../devicetree/bindings/rng/amlogic,meson-rng.yaml | 1 + .../devicetree/bindings/rng/st,stm32-rng.yaml | 20 +- .../bindings/rtc/amlogic,meson6-rtc.yaml | 1 + .../devicetree/bindings/rtc/cirrus,ep9301-rtc.yaml | 42 + .../devicetree/bindings/rtc/epson,rtc7301.txt | 16 - .../devicetree/bindings/rtc/epson,rtc7301.yaml | 51 + .../devicetree/bindings/rtc/maxim,mcp795.txt | 11 - .../bindings/rtc/microcrystal,rv3032.yaml | 2 + .../devicetree/bindings/rtc/mstar,ssd202d-rtc.yaml | 35 + .../devicetree/bindings/rtc/nxp,pcf2123.yaml | 47 + .../devicetree/bindings/rtc/nxp,pcf8523.txt | 18 - .../devicetree/bindings/rtc/nxp,pcf8523.yaml | 45 + .../devicetree/bindings/rtc/nxp,rtc-2123.txt | 17 - .../devicetree/bindings/rtc/trivial-rtc.yaml | 2 + .../devicetree/bindings/serial/esp,esp32-acm.yaml | 42 + .../devicetree/bindings/serial/esp,esp32-uart.yaml | 51 + .../devicetree/bindings/serial/fsl-imx-uart.yaml | 2 + .../devicetree/bindings/serial/fsl-mxs-auart.yaml | 13 +- .../devicetree/bindings/serial/maxim,max310x.txt | 48 - .../devicetree/bindings/serial/maxim,max310x.yaml | 74 + .../bindings/serial/nvidia,tegra20-hsuart.yaml | 10 +- .../devicetree/bindings/serial/nxp,sc16is7xx.txt | 118 -- .../devicetree/bindings/serial/nxp,sc16is7xx.yaml | 127 ++ .../devicetree/bindings/serial/qcom,msm-uart.yaml | 4 +- .../bindings/serial/qcom,msm-uartdm.yaml | 4 +- .../bindings/serial/renesas,em-uart.yaml | 14 +- .../devicetree/bindings/serial/renesas,hscif.yaml | 4 +- .../devicetree/bindings/serial/renesas,scif.yaml | 1 + .../devicetree/bindings/serial/renesas,scifa.yaml | 4 +- .../devicetree/bindings/serial/renesas,scifb.yaml | 4 +- .../devicetree/bindings/serial/samsung_uart.yaml | 8 +- .../devicetree/bindings/serial/serial.yaml | 18 +- .../bindings/soc/fsl/cpm_qe/fsl,cpm1-scc-qmc.yaml | 37 +- .../soc/mediatek/mediatek,mt7986-wo-ccif.yaml | 1 + .../devicetree/bindings/soc/mediatek/mtk-svs.yaml | 1 + .../bindings/soc/nuvoton/nuvoton,gfxi.yaml | 39 + .../devicetree/bindings/soc/qcom/qcom,geni-se.yaml | 2 + .../bindings/soc/qcom/qcom,pmic-glink.yaml | 19 + .../bindings/soc/renesas/renesas,rzg2l-sysc.yaml | 1 + .../devicetree/bindings/soc/renesas/renesas.yaml | 28 +- .../devicetree/bindings/soc/rockchip/grf.yaml | 1 + .../devicetree/bindings/soc/sti/st,sti-syscon.yaml | 46 + .../bindings/soc/tegra/nvidia,tegra20-pmc.yaml | 416 +++++ .../bindings/sound/audio-graph-port.yaml | 20 +- .../devicetree/bindings/sound/audio-graph.yaml | 9 +- .../devicetree/bindings/sound/awinic,aw87390.yaml | 58 + .../devicetree/bindings/sound/awinic,aw88395.yaml | 33 +- .../devicetree/bindings/sound/cirrus,cs42l43.yaml | 4 +- .../devicetree/bindings/sound/dai-params.yaml | 11 +- .../devicetree/bindings/sound/dialog,da7219.yaml | 1 + .../devicetree/bindings/sound/fsl,qmc-audio.yaml | 1 + .../bindings/sound/mediatek,mt8188-afe.yaml | 2 +- .../bindings/sound/mediatek,mt8188-mt6359.yaml | 1 + .../sound/mt8186-mt6366-rt1019-rt5682s.yaml | 1 + .../devicetree/bindings/sound/nxp,tfa9879.yaml | 44 + .../devicetree/bindings/sound/pcm512x.txt | 9 +- .../bindings/sound/qcom,lpass-tx-macro.yaml | 18 + .../devicetree/bindings/sound/qcom,sm8250.yaml | 1 + .../devicetree/bindings/sound/realtek,rt5616.yaml | 49 + .../devicetree/bindings/sound/richtek,rtq9128.yaml | 61 + Documentation/devicetree/bindings/sound/rt5616.txt | 32 - .../bindings/sound/starfive,jh7110-pwmdac.yaml | 76 + .../devicetree/bindings/sound/tas5805m.yaml | 5 +- .../devicetree/bindings/sound/tfa9879.txt | 23 - .../devicetree/bindings/sound/ti,pcm3168a.yaml | 1 + Documentation/devicetree/bindings/sound/wm8782.txt | 7 + .../bindings/soundwire/qcom,soundwire.yaml | 16 +- .../bindings/spi/allwinner,sun4i-a10-spi.yaml | 2 + .../bindings/spi/allwinner,sun6i-a31-spi.yaml | 2 + .../bindings/spi/arm,pl022-peripheral-props.yaml | 61 + .../bindings/spi/nvidia,tegra210-quad.yaml | 1 + .../devicetree/bindings/spi/qcom,spi-qup.yaml | 13 + .../devicetree/bindings/spi/renesas,rzv2m-csi.yaml | 9 + .../devicetree/bindings/spi/rockchip-sfc.yaml | 2 + .../devicetree/bindings/spi/snps,dw-apb-ssi.yaml | 2 + .../bindings/spi/spi-peripheral-props.yaml | 6 + .../devicetree/bindings/spi/spi-pl022.yaml | 51 - .../devicetree/bindings/spi/st,stm32-spi.yaml | 20 - .../bindings/thermal/fsl,scu-thermal.yaml | 4 +- .../devicetree/bindings/thermal/imx-thermal.yaml | 7 + .../bindings/thermal/mediatek,lvts-thermal.yaml | 1 + .../bindings/thermal/nvidia,tegra124-soctherm.yaml | 5 + .../devicetree/bindings/thermal/qcom-tsens.yaml | 1 + .../devicetree/bindings/thermal/thermal-zones.yaml | 2 +- .../bindings/timer/cirrus,ep9301-timer.yaml | 49 + .../devicetree/bindings/timer/fsl,imxgpt.yaml | 31 +- .../devicetree/bindings/timer/renesas,rz-mtu3.yaml | 29 +- .../devicetree/bindings/timer/sifive,clint.yaml | 1 + .../bindings/timer/thead,c900-aclint-mtimer.yaml | 50 + .../devicetree/bindings/trivial-devices.yaml | 6 +- .../devicetree/bindings/ufs/qcom,ufs.yaml | 2 + .../devicetree/bindings/ufs/ufs-common.yaml | 35 +- .../devicetree/bindings/usb/ci-hdrc-usb2.yaml | 7 + .../devicetree/bindings/usb/fcs,fsa4480.yaml | 43 +- .../devicetree/bindings/usb/genesys,gl850g.yaml | 3 +- .../devicetree/bindings/usb/gpio-sbu-mux.yaml | 2 +- .../devicetree/bindings/usb/nxp,ptn36502.yaml | 94 ++ .../devicetree/bindings/usb/qcom,dwc3.yaml | 11 +- .../devicetree/bindings/usb/realtek,rtd-dwc3.yaml | 80 + .../bindings/usb/realtek,rtd-type-c.yaml | 82 + .../devicetree/bindings/usb/rockchip,dwc3.yaml | 62 +- .../devicetree/bindings/usb/snps,dwc3.yaml | 56 + .../devicetree/bindings/usb/ti,tps6598x.yaml | 86 +- Documentation/devicetree/bindings/usb/usb-hcd.yaml | 2 +- .../devicetree/bindings/usb/vialab,vl817.yaml | 1 - .../devicetree/bindings/vendor-prefixes.yaml | 28 + .../bindings/watchdog/amlogic,meson-gxbb-wdt.yaml | 12 +- .../devicetree/bindings/watchdog/aspeed-wdt.txt | 18 +- .../bindings/watchdog/atmel,at91rm9200-wdt.yaml | 33 + .../bindings/watchdog/atmel-at91rm9200-wdt.txt | 9 - .../bindings/watchdog/cnxt,cx92755-wdt.yaml | 45 + .../devicetree/bindings/watchdog/da9062-wdt.txt | 34 - .../devicetree/bindings/watchdog/digicolor-wdt.txt | 25 - .../bindings/watchdog/dlg,da9062-watchdog.yaml | 50 + .../devicetree/bindings/watchdog/fsl,scu-wdt.yaml | 4 +- .../bindings/watchdog/fsl-imx7ulp-wdt.yaml | 5 + .../devicetree/bindings/watchdog/qcom-wdt.yaml | 2 + .../devicetree/bindings/writing-schema.rst | 5 +- Documentation/doc-guide/contributing.rst | 4 + Documentation/driver-api/80211/mac80211.rst | 2 +- Documentation/driver-api/dma-buf.rst | 32 +- Documentation/driver-api/dpll.rst | 551 +++++++ Documentation/driver-api/driver-model/devres.rst | 14 +- Documentation/driver-api/gpio/consumer.rst | 4 + Documentation/driver-api/i3c/protocol.rst | 4 +- Documentation/driver-api/index.rst | 1 + Documentation/driver-api/media/camera-sensor.rst | 192 +-- Documentation/driver-api/media/drivers/ccs/ccs.rst | 10 +- Documentation/driver-api/media/v4l2-core.rst | 1 - Documentation/driver-api/media/v4l2-dev.rst | 8 - Documentation/driver-api/media/v4l2-videobuf.rst | 403 ----- Documentation/driver-api/pps.rst | 16 +- Documentation/driver-api/pwm.rst | 6 +- Documentation/driver-api/thermal/intel_dptf.rst | 64 + Documentation/driver-api/tty/index.rst | 1 + Documentation/driver-api/tty/tty_ioctl.rst | 10 + Documentation/driver-api/usb/dma.rst | 48 +- .../features/core/cBPF-JIT/arch-support.txt | 1 - .../features/core/eBPF-JIT/arch-support.txt | 1 - .../core/generic-idle-thread/arch-support.txt | 1 - .../features/core/jump-labels/arch-support.txt | 1 - .../core/thread-info-in-task/arch-support.txt | 1 - .../features/core/tracehook/arch-support.txt | 1 - .../features/debug/KASAN/arch-support.txt | 1 - .../debug/debug-vm-pgtable/arch-support.txt | 1 - .../debug/gcov-profile-all/arch-support.txt | 1 - Documentation/features/debug/kcov/arch-support.txt | 1 - Documentation/features/debug/kgdb/arch-support.txt | 1 - .../features/debug/kmemleak/arch-support.txt | 1 - .../debug/kprobes-on-ftrace/arch-support.txt | 1 - .../features/debug/kprobes/arch-support.txt | 1 - .../features/debug/kretprobes/arch-support.txt | 1 - .../features/debug/optprobes/arch-support.txt | 1 - .../features/debug/stackprotector/arch-support.txt | 1 - .../features/debug/uprobes/arch-support.txt | 1 - .../debug/user-ret-profiler/arch-support.txt | 1 - .../features/io/dma-contiguous/arch-support.txt | 1 - .../locking/cmpxchg-local/arch-support.txt | 1 - .../features/locking/lockdep/arch-support.txt | 1 - .../locking/queued-rwlocks/arch-support.txt | 1 - .../locking/queued-spinlocks/arch-support.txt | 1 - .../features/perf/kprobes-event/arch-support.txt | 1 - .../features/perf/perf-regs/arch-support.txt | 1 - .../features/perf/perf-stackdump/arch-support.txt | 1 - .../sched/membarrier-sync-core/arch-support.txt | 1 - .../features/sched/numa-balancing/arch-support.txt | 1 - .../seccomp/seccomp-filter/arch-support.txt | 1 - .../time/arch-tick-broadcast/arch-support.txt | 1 - .../features/time/clockevents/arch-support.txt | 1 - .../time/context-tracking/arch-support.txt | 1 - .../features/time/irq-time-acct/arch-support.txt | 1 - .../features/time/virt-cpuacct/arch-support.txt | 1 - .../features/vm/ELF-ASLR/arch-support.txt | 1 - .../features/vm/PG_uncached/arch-support.txt | 1 - Documentation/features/vm/THP/arch-support.txt | 1 - Documentation/features/vm/TLB/arch-support.txt | 1 - .../features/vm/huge-vmap/arch-support.txt | 1 - .../features/vm/ioremap_prot/arch-support.txt | 1 - .../features/vm/pte_special/arch-support.txt | 1 - Documentation/filesystems/erofs.rst | 6 +- Documentation/filesystems/files.rst | 53 +- Documentation/filesystems/fscrypt.rst | 121 +- Documentation/filesystems/fuse-io.rst | 3 +- Documentation/filesystems/nfs/exporting.rst | 14 +- Documentation/filesystems/overlayfs.rst | 52 +- Documentation/filesystems/porting.rst | 16 + Documentation/filesystems/proc.rst | 8 +- .../filesystems/xfs-online-fsck-design.rst | 2 +- Documentation/firmware-guide/acpi/enumeration.rst | 43 + Documentation/gpu/amdgpu/driver-misc.rst | 18 + Documentation/gpu/amdgpu/thermal.rst | 30 + Documentation/gpu/automated_testing.rst | 20 +- Documentation/gpu/drivers.rst | 1 + Documentation/gpu/drm-kms.rst | 2 + Documentation/gpu/drm-mm.rst | 20 +- Documentation/gpu/drm-uapi.rst | 92 +- Documentation/gpu/drm-usage-stats.rst | 1 + Documentation/gpu/drm-vm-bind-async.rst | 309 ++++ Documentation/gpu/i915.rst | 29 +- Documentation/gpu/implementation_guidelines.rst | 9 + Documentation/gpu/index.rst | 1 + Documentation/gpu/panfrost.rst | 40 + Documentation/gpu/rfc/xe.rst | 93 +- Documentation/hwmon/adt7475.rst | 3 +- Documentation/hwmon/aquacomputer_d5next.rst | 7 + Documentation/hwmon/asus_ec_sensors.rst | 1 + Documentation/hwmon/index.rst | 2 + Documentation/hwmon/ltc2991.rst | 43 + Documentation/hwmon/max31827.rst | 4 +- Documentation/hwmon/nct6683.rst | 1 + Documentation/hwmon/powerz.rst | 30 + Documentation/hwmon/sch5627.rst | 10 + Documentation/i2c/fault-codes.rst | 4 + Documentation/i2c/i2c-address-translators.rst | 2 +- Documentation/kbuild/kbuild.rst | 6 + Documentation/kbuild/makefiles.rst | 13 +- .../maintainer/maintainer-entry-profile.rst | 2 +- Documentation/memory-barriers.txt | 7 + Documentation/misc-devices/eeprom.rst | 107 -- Documentation/misc-devices/index.rst | 1 - Documentation/mm/damon/design.rst | 26 +- Documentation/mm/overcommit-accounting.rst | 3 +- Documentation/mm/page_tables.rst | 127 ++ Documentation/mm/vmemmap_dedup.rst | 2 +- Documentation/netlink/genetlink-c.yaml | 45 +- Documentation/netlink/genetlink-legacy.yaml | 51 +- Documentation/netlink/genetlink.yaml | 39 +- Documentation/netlink/netlink-raw.yaml | 23 +- Documentation/netlink/specs/devlink.yaml | 1628 ++++++++++++++++++-- Documentation/netlink/specs/dpll.yaml | 506 ++++++ Documentation/netlink/specs/ethtool.yaml | 3 - Documentation/netlink/specs/handshake.yaml | 8 +- Documentation/netlink/specs/mptcp.yaml | 393 +++++ Documentation/netlink/specs/netdev.yaml | 21 +- Documentation/netlink/specs/nfsd.yaml | 89 ++ .../networking/device_drivers/appletalk/cops.rst | 80 - .../networking/device_drivers/appletalk/index.rst | 18 - .../networking/device_drivers/ethernet/index.rst | 1 + .../device_drivers/ethernet/intel/idpf.rst | 160 ++ .../ethernet/mellanox/mlx5/kconfig.rst | 2 +- .../device_drivers/ethernet/neterion/s2io.rst | 4 +- Documentation/networking/device_drivers/index.rst | 2 - .../networking/device_drivers/qlogic/index.rst | 18 - .../networking/device_drivers/qlogic/qlge.rst | 118 -- Documentation/networking/devlink/devlink-port.rst | 2 +- Documentation/networking/devlink/i40e.rst | 59 + Documentation/networking/devlink/index.rst | 29 + Documentation/networking/dsa/b53.rst | 14 +- Documentation/networking/dsa/bcm_sf2.rst | 2 +- Documentation/networking/dsa/configuration.rst | 102 +- Documentation/networking/dsa/dsa.rst | 162 +- Documentation/networking/dsa/lan9303.rst | 2 +- Documentation/networking/dsa/sja1105.rst | 6 +- Documentation/networking/filter.rst | 4 +- Documentation/networking/index.rst | 2 +- Documentation/networking/ip-sysctl.rst | 41 +- Documentation/networking/ipddp.rst | 78 - Documentation/networking/mptcp-sysctl.rst | 11 + Documentation/networking/msg_zerocopy.rst | 13 +- Documentation/networking/netconsole.rst | 22 +- Documentation/networking/page_pool.rst | 4 +- Documentation/networking/pktgen.rst | 12 + Documentation/networking/scaling.rst | 42 + Documentation/networking/sfp-phylink.rst | 10 +- Documentation/networking/smc-sysctl.rst | 6 +- Documentation/networking/tcp_ao.rst | 444 ++++++ Documentation/networking/xdp-rx-metadata.rst | 7 + Documentation/powerpc/associativity.rst | 105 -- Documentation/powerpc/booting.rst | 110 -- Documentation/powerpc/bootwrapper.rst | 131 -- Documentation/powerpc/cpu_families.rst | 237 --- Documentation/powerpc/cpu_features.rst | 60 - Documentation/powerpc/cxl.rst | 469 ------ Documentation/powerpc/cxlflash.rst | 433 ------ Documentation/powerpc/dawr-power9.rst | 101 -- Documentation/powerpc/dexcr.rst | 58 - Documentation/powerpc/dscr.rst | 87 -- Documentation/powerpc/eeh-pci-error-recovery.rst | 336 ---- Documentation/powerpc/elf_hwcaps.rst | 231 --- Documentation/powerpc/elfnote.rst | 41 - Documentation/powerpc/features.rst | 3 - Documentation/powerpc/firmware-assisted-dump.rst | 381 ----- Documentation/powerpc/hvcs.rst | 581 ------- Documentation/powerpc/imc.rst | 199 --- Documentation/powerpc/index.rst | 48 - Documentation/powerpc/isa-versions.rst | 101 -- Documentation/powerpc/kasan.txt | 58 - Documentation/powerpc/kaslr-booke32.rst | 42 - Documentation/powerpc/mpc52xx.rst | 43 - Documentation/powerpc/papr_hcalls.rst | 302 ---- .../powerpc/pci_iov_resource_on_powernv.rst | 312 ---- Documentation/powerpc/pmu-ebb.rst | 138 -- Documentation/powerpc/ptrace.rst | 157 -- Documentation/powerpc/qe_firmware.rst | 296 ---- Documentation/powerpc/syscall64-abi.rst | 153 -- Documentation/powerpc/transactional_memory.rst | 274 ---- Documentation/powerpc/ultravisor.rst | 1117 -------------- Documentation/powerpc/vas-api.rst | 305 ---- Documentation/powerpc/vcpudispatch_stats.rst | 75 - Documentation/powerpc/vmemmap_dedup.rst | 101 -- Documentation/process/7.AdvancedTopics.rst | 18 + Documentation/process/backporting.rst | 604 ++++++++ Documentation/process/changes.rst | 2 +- Documentation/process/index.rst | 3 +- Documentation/process/maintainer-netdev.rst | 35 +- Documentation/process/security-bugs.rst | 35 +- Documentation/process/stable-kernel-rules.rst | 13 + Documentation/process/submitting-patches.rst | 10 +- Documentation/riscv/acpi.rst | 10 - Documentation/riscv/boot-image-header.rst | 59 - Documentation/riscv/boot.rst | 169 -- Documentation/riscv/features.rst | 3 - Documentation/riscv/hwprobe.rst | 98 -- Documentation/riscv/index.rst | 24 - Documentation/riscv/patch-acceptance.rst | 59 - Documentation/riscv/uabi.rst | 48 - Documentation/riscv/vector.rst | 140 -- Documentation/riscv/vm-layout.rst | 157 -- Documentation/rust/index.rst | 19 + Documentation/scheduler/sched-arch.rst | 4 +- Documentation/scheduler/sched-capacity.rst | 13 +- Documentation/scheduler/sched-energy.rst | 29 +- Documentation/scheduler/sched-rt-group.rst | 40 +- Documentation/security/index.rst | 1 + Documentation/security/snp-tdx-threat-model.rst | 253 +++ Documentation/sound/soc/codec-to-codec.rst | 8 +- Documentation/sound/soc/dapm.rst | 2 +- Documentation/sound/soc/dpcm.rst | 3 +- Documentation/sphinx/cdomain.py | 4 +- Documentation/sphinx/kernel_abi.py | 2 +- Documentation/sphinx/kernel_feat.py | 4 +- Documentation/sphinx/kerneldoc.py | 4 +- Documentation/sphinx/kfigure.py | 2 +- Documentation/sphinx/maintainers_include.py | 8 +- Documentation/subsystem-apis.rst | 2 +- Documentation/trace/coresight/coresight.rst | 2 +- Documentation/trace/fprobetrace.rst | 8 +- Documentation/trace/kprobes.rst | 1 - Documentation/trace/kprobetrace.rst | 8 +- .../trace/postprocess/trace-vmscan-postprocess.pl | 42 +- Documentation/trace/user_events.rst | 21 +- .../translations/it_IT/riscv/patch-acceptance.rst | 2 +- .../sp_SP/process/embargoed-hardware-issues.rst | 341 ++++ Documentation/translations/sp_SP/process/index.rst | 2 + .../translations/sp_SP/process/security-bugs.rst | 103 ++ Documentation/translations/zh_CN/arch/index.rst | 3 +- .../zh_CN/arch/loongarch/introduction.rst | 4 +- .../zh_CN/arch/riscv/boot-image-header.rst | 69 + .../translations/zh_CN/arch/riscv/index.rst | 30 + .../zh_CN/arch/riscv/patch-acceptance.rst | 33 + .../translations/zh_CN/arch/riscv/vm-layout.rst | 104 ++ .../translations/zh_CN/core-api/cpu_hotplug.rst | 6 - Documentation/translations/zh_CN/index.rst | 5 +- .../zh_CN/maintainer/maintainer-entry-profile.rst | 2 +- .../translations/zh_CN/riscv/boot-image-header.rst | 69 - Documentation/translations/zh_CN/riscv/index.rst | 30 - .../translations/zh_CN/riscv/patch-acceptance.rst | 33 - .../translations/zh_CN/riscv/vm-layout.rst | 104 -- .../translations/zh_CN/scheduler/sched-arch.rst | 5 +- .../translations/zh_CN/subsystem-apis.rst | 110 ++ .../zh_CN/video4linux/v4l2-framework.txt | 12 - .../translations/zh_TW/admin-guide/README.rst | 164 +- .../translations/zh_TW/admin-guide/bootconfig.rst | 294 ++++ .../translations/zh_TW/admin-guide/bug-bisect.rst | 10 +- .../translations/zh_TW/admin-guide/bug-hunting.rst | 38 +- .../zh_TW/admin-guide/clearing-warn-once.rst | 4 +- .../translations/zh_TW/admin-guide/cpu-load.rst | 8 +- .../translations/zh_TW/admin-guide/cputopology.rst | 97 ++ .../translations/zh_TW/admin-guide/index.rst | 137 +- .../translations/zh_TW/admin-guide/init.rst | 36 +- .../zh_TW/admin-guide/lockup-watchdogs.rst | 67 + .../zh_TW/admin-guide/mm/damon/index.rst | 30 + .../zh_TW/admin-guide/mm/damon/lru_sort.rst | 264 ++++ .../zh_TW/admin-guide/mm/damon/reclaim.rst | 229 +++ .../zh_TW/admin-guide/mm/damon/start.rst | 125 ++ .../zh_TW/admin-guide/mm/damon/usage.rst | 592 +++++++ .../translations/zh_TW/admin-guide/mm/index.rst | 50 + .../translations/zh_TW/admin-guide/mm/ksm.rst | 199 +++ .../zh_TW/admin-guide/reporting-issues.rst | 727 ++++----- .../zh_TW/admin-guide/reporting-regressions.rst | 371 +++++ .../zh_TW/admin-guide/security-bugs.rst | 26 +- .../translations/zh_TW/admin-guide/sysrq.rst | 281 ++++ .../zh_TW/admin-guide/tainted-kernels.rst | 84 +- .../translations/zh_TW/admin-guide/unicode.rst | 10 +- Documentation/translations/zh_TW/arch/arm/Booting | 176 +++ .../zh_TW/arch/arm/kernel_user_helpers.txt | 285 ++++ .../translations/zh_TW/arch/arm64/amu.rst | 6 +- .../translations/zh_TW/arch/arm64/booting.txt | 28 +- .../translations/zh_TW/arch/arm64/elf_hwcaps.rst | 10 +- .../zh_TW/arch/arm64/legacy_instructions.txt | 14 +- .../translations/zh_TW/arch/arm64/memory.txt | 16 +- .../translations/zh_TW/arch/arm64/perf.rst | 2 +- .../zh_TW/arch/arm64/silicon-errata.txt | 28 +- .../zh_TW/arch/arm64/tagged-pointers.txt | 10 +- Documentation/translations/zh_TW/arch/index.rst | 29 + .../translations/zh_TW/arch/loongarch/booting.rst | 49 + .../translations/zh_TW/arch/loongarch/features.rst | 9 + .../translations/zh_TW/arch/loongarch/index.rst | 28 + .../zh_TW/arch/loongarch/introduction.rst | 354 +++++ .../zh_TW/arch/loongarch/irq-chip-model.rst | 158 ++ .../translations/zh_TW/arch/mips/booting.rst | 35 + .../translations/zh_TW/arch/mips/features.rst | 14 + .../translations/zh_TW/arch/mips/index.rst | 30 + .../translations/zh_TW/arch/mips/ingenic-tcu.rst | 73 + .../translations/zh_TW/arch/openrisc/index.rst | 33 + .../zh_TW/arch/openrisc/openrisc_port.rst | 128 ++ .../translations/zh_TW/arch/openrisc/todo.rst | 24 + .../translations/zh_TW/arch/parisc/debugging.rst | 46 + .../translations/zh_TW/arch/parisc/index.rst | 32 + .../translations/zh_TW/arch/parisc/registers.rst | 157 ++ Documentation/translations/zh_TW/cpu-freq/core.rst | 38 +- .../translations/zh_TW/cpu-freq/cpu-drivers.rst | 158 +- .../translations/zh_TW/cpu-freq/cpufreq-stats.rst | 52 +- .../translations/zh_TW/cpu-freq/index.rst | 13 +- .../translations/zh_TW/dev-tools/gcov.rst | 265 ++++ .../zh_TW/dev-tools/gdb-kernel-debugging.rst | 168 ++ .../translations/zh_TW/dev-tools/index.rst | 15 +- .../translations/zh_TW/dev-tools/kasan.rst | 463 ++++++ .../translations/zh_TW/dev-tools/sparse.rst | 91 ++ .../translations/zh_TW/dev-tools/sparse.txt | 91 -- .../zh_TW/dev-tools/testing-overview.rst | 162 ++ .../translations/zh_TW/filesystems/debugfs.rst | 47 +- .../translations/zh_TW/filesystems/index.rst | 2 +- .../translations/zh_TW/filesystems/sysfs.txt | 14 +- .../translations/zh_TW/filesystems/tmpfs.rst | 35 +- .../translations/zh_TW/filesystems/virtiofs.rst | 9 +- Documentation/translations/zh_TW/index.rst | 5 +- .../translations/zh_TW/process/1.Intro.rst | 78 +- .../translations/zh_TW/process/2.Process.rst | 130 +- .../translations/zh_TW/process/3.Early-stage.rst | 44 +- .../translations/zh_TW/process/4.Coding.rst | 104 +- .../translations/zh_TW/process/5.Posting.rst | 80 +- .../translations/zh_TW/process/6.Followthrough.rst | 46 +- .../zh_TW/process/7.AdvancedTopics.rst | 56 +- .../translations/zh_TW/process/8.Conclusion.rst | 14 +- .../process/code-of-conduct-interpretation.rst | 52 +- .../translations/zh_TW/process/code-of-conduct.rst | 18 +- .../translations/zh_TW/process/coding-style.rst | 405 +++-- .../zh_TW/process/development-process.rst | 2 +- .../translations/zh_TW/process/email-clients.rst | 279 ++-- .../zh_TW/process/embargoed-hardware-issues.rst | 76 +- Documentation/translations/zh_TW/process/index.rst | 5 +- .../zh_TW/process/kernel-driver-statement.rst | 2 +- .../translations/zh_TW/process/license-rules.rst | 54 +- .../zh_TW/process/management-style.rst | 60 +- .../zh_TW/process/stable-api-nonsense.rst | 86 +- .../zh_TW/process/stable-kernel-rules.rst | 36 +- .../zh_TW/process/submit-checklist.rst | 92 +- .../zh_TW/process/submitting-patches.rst | 749 +++++---- .../zh_TW/process/volatile-considered-harmful.rst | 32 +- Documentation/usb/gadget-testing.rst | 2 + Documentation/usb/gadget_uvc.rst | 2 +- .../userspace-api/dma-buf-alloc-exchange.rst | 389 +++++ Documentation/userspace-api/index.rst | 1 + Documentation/userspace-api/landlock.rst | 99 +- .../userspace-api/media/drivers/camera-sensor.rst | 104 ++ .../userspace-api/media/drivers/index.rst | 2 + .../userspace-api/media/drivers/npcm-video.rst | 66 + Documentation/userspace-api/media/gen-errors.rst | 4 +- Documentation/userspace-api/media/v4l/buffer.rst | 4 +- Documentation/userspace-api/media/v4l/control.rst | 4 + .../userspace-api/media/v4l/dev-subdev.rst | 49 +- .../userspace-api/media/v4l/dv-timings.rst | 21 + .../userspace-api/media/v4l/pixfmt-reserved.rst | 7 + .../userspace-api/media/v4l/pixfmt-srggb12p.rst | 4 +- .../userspace-api/media/v4l/subdev-formats.rst | 72 + .../userspace-api/netlink/genetlink-legacy.rst | 16 +- Documentation/userspace-api/netlink/specs.rst | 23 +- Documentation/virt/kvm/api.rst | 158 +- Documentation/virt/kvm/arm/index.rst | 1 + Documentation/virt/kvm/arm/vcpu-features.rst | 48 + Documentation/virt/kvm/devices/arm-vgic-v3.rst | 7 + Documentation/virt/kvm/x86/mmu.rst | 43 +- 954 files changed, 37483 insertions(+), 18396 deletions(-) create mode 100644 Documentation/ABI/testing/configfs-tsm create mode 100644 Documentation/ABI/testing/sysfs-bus-iio-adc-mcp3564 create mode 100644 Documentation/ABI/testing/sysfs-bus-iio-resolver-ad2s1210 create mode 100644 Documentation/ABI/testing/sysfs-driver-qat_ras create mode 100644 Documentation/ABI/testing/sysfs-driver-qat_rl create mode 100644 Documentation/admin-guide/media/mgb4.rst create mode 100644 Documentation/admin-guide/perf/ampere_cspmu.rst delete mode 100644 Documentation/arch/ia64/aliasing.rst delete mode 100644 Documentation/arch/ia64/efirtc.rst delete mode 100644 Documentation/arch/ia64/err_inject.rst delete mode 100644 Documentation/arch/ia64/features.rst delete mode 100644 Documentation/arch/ia64/fsys.rst delete mode 100644 Documentation/arch/ia64/ia64.rst delete mode 100644 Documentation/arch/ia64/index.rst delete mode 100644 Documentation/arch/ia64/irq-redir.rst delete mode 100644 Documentation/arch/ia64/mca.rst delete mode 100644 Documentation/arch/ia64/serial.rst create mode 100644 Documentation/arch/powerpc/associativity.rst create mode 100644 Documentation/arch/powerpc/booting.rst create mode 100644 Documentation/arch/powerpc/bootwrapper.rst create mode 100644 Documentation/arch/powerpc/cpu_families.rst create mode 100644 Documentation/arch/powerpc/cpu_features.rst create mode 100644 Documentation/arch/powerpc/cxl.rst create mode 100644 Documentation/arch/powerpc/cxlflash.rst create mode 100644 Documentation/arch/powerpc/dawr-power9.rst create mode 100644 Documentation/arch/powerpc/dexcr.rst create mode 100644 Documentation/arch/powerpc/dscr.rst create mode 100644 Documentation/arch/powerpc/eeh-pci-error-recovery.rst create mode 100644 Documentation/arch/powerpc/elf_hwcaps.rst create mode 100644 Documentation/arch/powerpc/elfnote.rst create mode 100644 Documentation/arch/powerpc/features.rst create mode 100644 Documentation/arch/powerpc/firmware-assisted-dump.rst create mode 100644 Documentation/arch/powerpc/hvcs.rst create mode 100644 Documentation/arch/powerpc/imc.rst create mode 100644 Documentation/arch/powerpc/index.rst create mode 100644 Documentation/arch/powerpc/isa-versions.rst create mode 100644 Documentation/arch/powerpc/kasan.txt create mode 100644 Documentation/arch/powerpc/kaslr-booke32.rst create mode 100644 Documentation/arch/powerpc/kvm-nested.rst create mode 100644 Documentation/arch/powerpc/mpc52xx.rst create mode 100644 Documentation/arch/powerpc/papr_hcalls.rst create mode 100644 Documentation/arch/powerpc/pci_iov_resource_on_powernv.rst create mode 100644 Documentation/arch/powerpc/pmu-ebb.rst create mode 100644 Documentation/arch/powerpc/ptrace.rst create mode 100644 Documentation/arch/powerpc/qe_firmware.rst create mode 100644 Documentation/arch/powerpc/syscall64-abi.rst create mode 100644 Documentation/arch/powerpc/transactional_memory.rst create mode 100644 Documentation/arch/powerpc/ultravisor.rst create mode 100644 Documentation/arch/powerpc/vas-api.rst create mode 100644 Documentation/arch/powerpc/vcpudispatch_stats.rst create mode 100644 Documentation/arch/powerpc/vmemmap_dedup.rst create mode 100644 Documentation/arch/riscv/acpi.rst create mode 100644 Documentation/arch/riscv/boot-image-header.rst create mode 100644 Documentation/arch/riscv/boot.rst create mode 100644 Documentation/arch/riscv/features.rst create mode 100644 Documentation/arch/riscv/hwprobe.rst create mode 100644 Documentation/arch/riscv/index.rst create mode 100644 Documentation/arch/riscv/patch-acceptance.rst create mode 100644 Documentation/arch/riscv/uabi.rst create mode 100644 Documentation/arch/riscv/vector.rst create mode 100644 Documentation/arch/riscv/vm-layout.rst create mode 100644 Documentation/devicetree/bindings/arm/amd,pensando.yaml delete mode 100644 Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.yaml create mode 100644 Documentation/devicetree/bindings/clock/amlogic,s4-peripherals-clkc.yaml create mode 100644 Documentation/devicetree/bindings/clock/amlogic,s4-pll-clkc.yaml create mode 100644 Documentation/devicetree/bindings/clock/qcom,sm4450-gcc.yaml create mode 100644 Documentation/devicetree/bindings/devfreq/event/rockchip,dfi.yaml delete mode 100644 Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt create mode 100644 Documentation/devicetree/bindings/display/bridge/fsl,imx93-mipi-dsi.yaml create mode 100644 Documentation/devicetree/bindings/display/lvds-data-mapping.yaml create mode 100644 Documentation/devicetree/bindings/display/panel/jdi,lpm102a188a.yaml create mode 100644 Documentation/devicetree/bindings/display/panel/panel-simple-lvds-dual-ports.yaml create mode 100644 Documentation/devicetree/bindings/display/panel/raydium,rm692e5.yaml create mode 100644 Documentation/devicetree/bindings/display/renesas,shmobile-lcdc.yaml create mode 100644 Documentation/devicetree/bindings/display/solomon,ssd-common.yaml create mode 100644 Documentation/devicetree/bindings/display/solomon,ssd132x.yaml delete mode 100644 Documentation/devicetree/bindings/gpio/intel,ixp4xx-gpio.txt create mode 100644 Documentation/devicetree/bindings/gpio/intel,ixp4xx-gpio.yaml create mode 100644 Documentation/devicetree/bindings/hwmon/adi,ltc2991.yaml delete mode 100644 Documentation/devicetree/bindings/hwmon/ina3221.txt create mode 100644 Documentation/devicetree/bindings/hwmon/pmbus/infineon,tda38640.yaml create mode 100644 Documentation/devicetree/bindings/hwmon/ti,ina3221.yaml delete mode 100644 Documentation/devicetree/bindings/i2c/i2c-demux-pinctrl.txt create mode 100644 Documentation/devicetree/bindings/i2c/i2c-demux-pinctrl.yaml create mode 100644 Documentation/devicetree/bindings/iio/adc/microchip,mcp3564.yaml create mode 100644 Documentation/devicetree/bindings/iio/adc/ti,twl6030-gpadc.yaml create mode 100644 Documentation/devicetree/bindings/iio/pressure/rohm,bm1390.yaml create mode 100644 Documentation/devicetree/bindings/iio/resolver/adi,ad2s1210.yaml create mode 100644 Documentation/devicetree/bindings/input/qcom,pm8921-keypad.yaml delete mode 100644 Documentation/devicetree/bindings/input/qcom,pm8xxx-keypad.txt create mode 100644 Documentation/devicetree/bindings/interconnect/qcom,msm8939.yaml create mode 100644 Documentation/devicetree/bindings/interconnect/qcom,msm8996.yaml create mode 100644 Documentation/devicetree/bindings/interconnect/qcom,rpm-common.yaml create mode 100644 Documentation/devicetree/bindings/interconnect/qcom,sdm660.yaml create mode 100644 Documentation/devicetree/bindings/interconnect/qcom,sdx75-rpmh.yaml create mode 100644 Documentation/devicetree/bindings/interrupt-controller/thead,c900-aclint-mswi.yaml delete mode 100644 Documentation/devicetree/bindings/leds/backlight/max8925-backlight.txt create mode 100644 Documentation/devicetree/bindings/leds/backlight/mps,mp3309c.yaml create mode 100644 Documentation/devicetree/bindings/leds/kinetic,ktd202x.yaml create mode 100644 Documentation/devicetree/bindings/media/i2c/onnn,mt9m114.yaml create mode 100644 Documentation/devicetree/bindings/media/i2c/ovti,ov5642.yaml delete mode 100644 Documentation/devicetree/bindings/media/nokia,n900-ir create mode 100644 Documentation/devicetree/bindings/media/nuvoton,npcm-ece.yaml create mode 100644 Documentation/devicetree/bindings/media/nuvoton,npcm-vcd.yaml create mode 100644 Documentation/devicetree/bindings/media/ti,j721e-csi2rx-shim.yaml create mode 100644 Documentation/devicetree/bindings/memory-controllers/xlnx,versal-ddrmc-edac.yaml create mode 100644 Documentation/devicetree/bindings/mfd/arm,dev-platforms-syscon.yaml delete mode 100644 Documentation/devicetree/bindings/mfd/max8925.txt delete mode 100644 Documentation/devicetree/bindings/mfd/max8998.txt create mode 100644 Documentation/devicetree/bindings/mfd/maxim,max8925.yaml create mode 100644 Documentation/devicetree/bindings/mfd/maxim,max8998.yaml create mode 100644 Documentation/devicetree/bindings/mfd/ti,twl.yaml delete mode 100644 Documentation/devicetree/bindings/mfd/twl-family.txt create mode 100644 Documentation/devicetree/bindings/mmc/npcm,sdhci.yaml create mode 100644 Documentation/devicetree/bindings/net/loongson,ls1b-gmac.yaml create mode 100644 Documentation/devicetree/bindings/net/loongson,ls1c-emac.yaml create mode 100644 Documentation/devicetree/bindings/net/ti,cc1352p7.yaml create mode 100644 Documentation/devicetree/bindings/nvmem/nvmem-deprecated-cells.yaml create mode 100644 Documentation/devicetree/bindings/pci/rcar-gen4-pci-ep.yaml create mode 100644 Documentation/devicetree/bindings/pci/rcar-gen4-pci-host.yaml create mode 100644 Documentation/devicetree/bindings/pci/xlnx,xdma-host.yaml create mode 100644 Documentation/devicetree/bindings/phy/marvell,pxa1928-usb-phy.yaml create mode 100644 Documentation/devicetree/bindings/phy/mediatek,mt7628-usbphy.yaml delete mode 100644 Documentation/devicetree/bindings/phy/phy-stih407-usb.txt delete mode 100644 Documentation/devicetree/bindings/phy/pxa1928-usb-phy.txt delete mode 100644 Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-usb3-phy.yaml delete mode 100644 Documentation/devicetree/bindings/phy/ralink-usb-phy.txt create mode 100644 Documentation/devicetree/bindings/phy/st,stih407-usb2-phy.yaml create mode 100644 Documentation/devicetree/bindings/pinctrl/nuvoton,npcm845-pinctrl.yaml create mode 100644 Documentation/devicetree/bindings/pinctrl/realtek,rtd1315e-pinctrl.yaml create mode 100644 Documentation/devicetree/bindings/pinctrl/realtek,rtd1319d-pinctrl.yaml create mode 100644 Documentation/devicetree/bindings/pinctrl/realtek,rtd1619b-pinctrl.yaml delete mode 100644 Documentation/devicetree/bindings/power/supply/max8925_battery.txt create mode 100644 Documentation/devicetree/bindings/power/supply/mitsumi,mm8013.yaml create mode 100644 Documentation/devicetree/bindings/power/supply/qcom,pm8916-bms-vm.yaml create mode 100644 Documentation/devicetree/bindings/power/supply/qcom,pm8916-lbc.yaml delete mode 100644 Documentation/devicetree/bindings/power/supply/stericsson,ab8500-battery.txt create mode 100644 Documentation/devicetree/bindings/regulator/adi,max77503-regulator.yaml delete mode 100644 Documentation/devicetree/bindings/regulator/da9210.txt create mode 100644 Documentation/devicetree/bindings/regulator/dlg,da9210.yaml create mode 100644 Documentation/devicetree/bindings/regulator/mediatek,mt6358-regulator.yaml create mode 100644 Documentation/devicetree/bindings/regulator/mps,mpq2286.yaml delete mode 100644 Documentation/devicetree/bindings/regulator/mt6358-regulator.txt create mode 100644 Documentation/devicetree/bindings/remoteproc/qcom,sm6375-pas.yaml delete mode 100644 Documentation/devicetree/bindings/reserved-memory/framebuffer.yaml delete mode 100644 Documentation/devicetree/bindings/reserved-memory/memory-region.yaml delete mode 100644 Documentation/devicetree/bindings/reserved-memory/reserved-memory.yaml delete mode 100644 Documentation/devicetree/bindings/reserved-memory/shared-dma-pool.yaml create mode 100644 Documentation/devicetree/bindings/riscv/sophgo.yaml create mode 100644 Documentation/devicetree/bindings/rtc/cirrus,ep9301-rtc.yaml delete mode 100644 Documentation/devicetree/bindings/rtc/epson,rtc7301.txt create mode 100644 Documentation/devicetree/bindings/rtc/epson,rtc7301.yaml delete mode 100644 Documentation/devicetree/bindings/rtc/maxim,mcp795.txt create mode 100644 Documentation/devicetree/bindings/rtc/mstar,ssd202d-rtc.yaml create mode 100644 Documentation/devicetree/bindings/rtc/nxp,pcf2123.yaml delete mode 100644 Documentation/devicetree/bindings/rtc/nxp,pcf8523.txt create mode 100644 Documentation/devicetree/bindings/rtc/nxp,pcf8523.yaml delete mode 100644 Documentation/devicetree/bindings/rtc/nxp,rtc-2123.txt create mode 100644 Documentation/devicetree/bindings/serial/esp,esp32-acm.yaml create mode 100644 Documentation/devicetree/bindings/serial/esp,esp32-uart.yaml delete mode 100644 Documentation/devicetree/bindings/serial/maxim,max310x.txt create mode 100644 Documentation/devicetree/bindings/serial/maxim,max310x.yaml delete mode 100644 Documentation/devicetree/bindings/serial/nxp,sc16is7xx.txt create mode 100644 Documentation/devicetree/bindings/serial/nxp,sc16is7xx.yaml create mode 100644 Documentation/devicetree/bindings/soc/nuvoton/nuvoton,gfxi.yaml create mode 100644 Documentation/devicetree/bindings/soc/sti/st,sti-syscon.yaml create mode 100644 Documentation/devicetree/bindings/soc/tegra/nvidia,tegra20-pmc.yaml create mode 100644 Documentation/devicetree/bindings/sound/awinic,aw87390.yaml create mode 100644 Documentation/devicetree/bindings/sound/nxp,tfa9879.yaml create mode 100644 Documentation/devicetree/bindings/sound/realtek,rt5616.yaml create mode 100644 Documentation/devicetree/bindings/sound/richtek,rtq9128.yaml delete mode 100644 Documentation/devicetree/bindings/sound/rt5616.txt create mode 100644 Documentation/devicetree/bindings/sound/starfive,jh7110-pwmdac.yaml delete mode 100644 Documentation/devicetree/bindings/sound/tfa9879.txt create mode 100644 Documentation/devicetree/bindings/spi/arm,pl022-peripheral-props.yaml create mode 100644 Documentation/devicetree/bindings/timer/cirrus,ep9301-timer.yaml create mode 100644 Documentation/devicetree/bindings/timer/thead,c900-aclint-mtimer.yaml create mode 100644 Documentation/devicetree/bindings/usb/nxp,ptn36502.yaml create mode 100644 Documentation/devicetree/bindings/usb/realtek,rtd-dwc3.yaml create mode 100644 Documentation/devicetree/bindings/usb/realtek,rtd-type-c.yaml create mode 100644 Documentation/devicetree/bindings/watchdog/atmel,at91rm9200-wdt.yaml delete mode 100644 Documentation/devicetree/bindings/watchdog/atmel-at91rm9200-wdt.txt create mode 100644 Documentation/devicetree/bindings/watchdog/cnxt,cx92755-wdt.yaml delete mode 100644 Documentation/devicetree/bindings/watchdog/da9062-wdt.txt delete mode 100644 Documentation/devicetree/bindings/watchdog/digicolor-wdt.txt create mode 100644 Documentation/devicetree/bindings/watchdog/dlg,da9062-watchdog.yaml create mode 100644 Documentation/driver-api/dpll.rst delete mode 100644 Documentation/driver-api/media/v4l2-videobuf.rst create mode 100644 Documentation/driver-api/tty/tty_ioctl.rst create mode 100644 Documentation/gpu/drm-vm-bind-async.rst create mode 100644 Documentation/gpu/implementation_guidelines.rst create mode 100644 Documentation/gpu/panfrost.rst create mode 100644 Documentation/hwmon/ltc2991.rst create mode 100644 Documentation/hwmon/powerz.rst delete mode 100644 Documentation/misc-devices/eeprom.rst create mode 100644 Documentation/netlink/specs/dpll.yaml create mode 100644 Documentation/netlink/specs/mptcp.yaml create mode 100644 Documentation/netlink/specs/nfsd.yaml delete mode 100644 Documentation/networking/device_drivers/appletalk/cops.rst delete mode 100644 Documentation/networking/device_drivers/appletalk/index.rst create mode 100644 Documentation/networking/device_drivers/ethernet/intel/idpf.rst delete mode 100644 Documentation/networking/device_drivers/qlogic/index.rst delete mode 100644 Documentation/networking/device_drivers/qlogic/qlge.rst create mode 100644 Documentation/networking/devlink/i40e.rst delete mode 100644 Documentation/networking/ipddp.rst create mode 100644 Documentation/networking/tcp_ao.rst delete mode 100644 Documentation/powerpc/associativity.rst delete mode 100644 Documentation/powerpc/booting.rst delete mode 100644 Documentation/powerpc/bootwrapper.rst delete mode 100644 Documentation/powerpc/cpu_families.rst delete mode 100644 Documentation/powerpc/cpu_features.rst delete mode 100644 Documentation/powerpc/cxl.rst delete mode 100644 Documentation/powerpc/cxlflash.rst delete mode 100644 Documentation/powerpc/dawr-power9.rst delete mode 100644 Documentation/powerpc/dexcr.rst delete mode 100644 Documentation/powerpc/dscr.rst delete mode 100644 Documentation/powerpc/eeh-pci-error-recovery.rst delete mode 100644 Documentation/powerpc/elf_hwcaps.rst delete mode 100644 Documentation/powerpc/elfnote.rst delete mode 100644 Documentation/powerpc/features.rst delete mode 100644 Documentation/powerpc/firmware-assisted-dump.rst delete mode 100644 Documentation/powerpc/hvcs.rst delete mode 100644 Documentation/powerpc/imc.rst delete mode 100644 Documentation/powerpc/index.rst delete mode 100644 Documentation/powerpc/isa-versions.rst delete mode 100644 Documentation/powerpc/kasan.txt delete mode 100644 Documentation/powerpc/kaslr-booke32.rst delete mode 100644 Documentation/powerpc/mpc52xx.rst delete mode 100644 Documentation/powerpc/papr_hcalls.rst delete mode 100644 Documentation/powerpc/pci_iov_resource_on_powernv.rst delete mode 100644 Documentation/powerpc/pmu-ebb.rst delete mode 100644 Documentation/powerpc/ptrace.rst delete mode 100644 Documentation/powerpc/qe_firmware.rst delete mode 100644 Documentation/powerpc/syscall64-abi.rst delete mode 100644 Documentation/powerpc/transactional_memory.rst delete mode 100644 Documentation/powerpc/ultravisor.rst delete mode 100644 Documentation/powerpc/vas-api.rst delete mode 100644 Documentation/powerpc/vcpudispatch_stats.rst delete mode 100644 Documentation/powerpc/vmemmap_dedup.rst create mode 100644 Documentation/process/backporting.rst delete mode 100644 Documentation/riscv/acpi.rst delete mode 100644 Documentation/riscv/boot-image-header.rst delete mode 100644 Documentation/riscv/boot.rst delete mode 100644 Documentation/riscv/features.rst delete mode 100644 Documentation/riscv/hwprobe.rst delete mode 100644 Documentation/riscv/index.rst delete mode 100644 Documentation/riscv/patch-acceptance.rst delete mode 100644 Documentation/riscv/uabi.rst delete mode 100644 Documentation/riscv/vector.rst delete mode 100644 Documentation/riscv/vm-layout.rst create mode 100644 Documentation/security/snp-tdx-threat-model.rst create mode 100644 Documentation/translations/sp_SP/process/embargoed-hardware-issues.rst create mode 100644 Documentation/translations/sp_SP/process/security-bugs.rst create mode 100644 Documentation/translations/zh_CN/arch/riscv/boot-image-header.rst create mode 100644 Documentation/translations/zh_CN/arch/riscv/index.rst create mode 100644 Documentation/translations/zh_CN/arch/riscv/patch-acceptance.rst create mode 100644 Documentation/translations/zh_CN/arch/riscv/vm-layout.rst delete mode 100644 Documentation/translations/zh_CN/riscv/boot-image-header.rst delete mode 100644 Documentation/translations/zh_CN/riscv/index.rst delete mode 100644 Documentation/translations/zh_CN/riscv/patch-acceptance.rst delete mode 100644 Documentation/translations/zh_CN/riscv/vm-layout.rst create mode 100644 Documentation/translations/zh_CN/subsystem-apis.rst create mode 100644 Documentation/translations/zh_TW/admin-guide/bootconfig.rst create mode 100644 Documentation/translations/zh_TW/admin-guide/cputopology.rst create mode 100644 Documentation/translations/zh_TW/admin-guide/lockup-watchdogs.rst create mode 100644 Documentation/translations/zh_TW/admin-guide/mm/damon/index.rst create mode 100644 Documentation/translations/zh_TW/admin-guide/mm/damon/lru_sort.rst create mode 100644 Documentation/translations/zh_TW/admin-guide/mm/damon/reclaim.rst create mode 100644 Documentation/translations/zh_TW/admin-guide/mm/damon/start.rst create mode 100644 Documentation/translations/zh_TW/admin-guide/mm/damon/usage.rst create mode 100644 Documentation/translations/zh_TW/admin-guide/mm/index.rst create mode 100644 Documentation/translations/zh_TW/admin-guide/mm/ksm.rst create mode 100644 Documentation/translations/zh_TW/admin-guide/reporting-regressions.rst create mode 100644 Documentation/translations/zh_TW/admin-guide/sysrq.rst create mode 100644 Documentation/translations/zh_TW/arch/arm/Booting create mode 100644 Documentation/translations/zh_TW/arch/arm/kernel_user_helpers.txt create mode 100644 Documentation/translations/zh_TW/arch/index.rst create mode 100644 Documentation/translations/zh_TW/arch/loongarch/booting.rst create mode 100644 Documentation/translations/zh_TW/arch/loongarch/features.rst create mode 100644 Documentation/translations/zh_TW/arch/loongarch/index.rst create mode 100644 Documentation/translations/zh_TW/arch/loongarch/introduction.rst create mode 100644 Documentation/translations/zh_TW/arch/loongarch/irq-chip-model.rst create mode 100644 Documentation/translations/zh_TW/arch/mips/booting.rst create mode 100644 Documentation/translations/zh_TW/arch/mips/features.rst create mode 100644 Documentation/translations/zh_TW/arch/mips/index.rst create mode 100644 Documentation/translations/zh_TW/arch/mips/ingenic-tcu.rst create mode 100644 Documentation/translations/zh_TW/arch/openrisc/index.rst create mode 100644 Documentation/translations/zh_TW/arch/openrisc/openrisc_port.rst create mode 100644 Documentation/translations/zh_TW/arch/openrisc/todo.rst create mode 100644 Documentation/translations/zh_TW/arch/parisc/debugging.rst create mode 100644 Documentation/translations/zh_TW/arch/parisc/index.rst create mode 100644 Documentation/translations/zh_TW/arch/parisc/registers.rst create mode 100644 Documentation/translations/zh_TW/dev-tools/gcov.rst create mode 100644 Documentation/translations/zh_TW/dev-tools/gdb-kernel-debugging.rst create mode 100644 Documentation/translations/zh_TW/dev-tools/kasan.rst create mode 100644 Documentation/translations/zh_TW/dev-tools/sparse.rst delete mode 100644 Documentation/translations/zh_TW/dev-tools/sparse.txt create mode 100644 Documentation/translations/zh_TW/dev-tools/testing-overview.rst create mode 100644 Documentation/userspace-api/dma-buf-alloc-exchange.rst create mode 100644 Documentation/userspace-api/media/drivers/camera-sensor.rst create mode 100644 Documentation/userspace-api/media/drivers/npcm-video.rst create mode 100644 Documentation/virt/kvm/arm/vcpu-features.rst (limited to 'Documentation') diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index 825e619250..f2ec42949a 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -270,6 +270,12 @@ Description: Shows the operation capability bits displayed in bitmap format correlates to the operations allowed. It's visible only on platforms that support the capability. +What: /sys/bus/dsa/devices/wq./driver_name +Date: Sept 8, 2023 +KernelVersion: 6.7.0 +Contact: dmaengine@vger.kernel.org +Description: Name of driver to be bounded to the wq. + What: /sys/bus/dsa/devices/engine./group_id Date: Oct 25, 2019 KernelVersion: 5.6.0 diff --git a/Documentation/ABI/testing/configfs-tsm b/Documentation/ABI/testing/configfs-tsm new file mode 100644 index 0000000000..dd24202b5b --- /dev/null +++ b/Documentation/ABI/testing/configfs-tsm @@ -0,0 +1,82 @@ +What: /sys/kernel/config/tsm/report/$name/inblob +Date: September, 2023 +KernelVersion: v6.7 +Contact: linux-coco@lists.linux.dev +Description: + (WO) Up to 64 bytes of user specified binary data. For replay + protection this should include a nonce, but the kernel does not + place any restrictions on the content. + +What: /sys/kernel/config/tsm/report/$name/outblob +Date: September, 2023 +KernelVersion: v6.7 +Contact: linux-coco@lists.linux.dev +Description: + (RO) Binary attestation report generated from @inblob and other + options The format of the report is implementation specific + where the implementation is conveyed via the @provider + attribute. + +What: /sys/kernel/config/tsm/report/$name/auxblob +Date: October, 2023 +KernelVersion: v6.7 +Contact: linux-coco@lists.linux.dev +Description: + (RO) Optional supplemental data that a TSM may emit, visibility + of this attribute depends on TSM, and may be empty if no + auxiliary data is available. + + When @provider is "sev_guest" this file contains the + "cert_table" from SEV-ES Guest-Hypervisor Communication Block + Standardization v2.03 Section 4.1.8.1 MSG_REPORT_REQ. + https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/specifications/56421.pdf + +What: /sys/kernel/config/tsm/report/$name/provider +Date: September, 2023 +KernelVersion: v6.7 +Contact: linux-coco@lists.linux.dev +Description: + (RO) A name for the format-specification of @outblob like + "sev_guest" [1] or "tdx_guest" [2] in the near term, or a + common standard format in the future. + + [1]: SEV Secure Nested Paging Firmware ABI Specification + Revision 1.55 Table 22 + https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/specifications/56860.pdf + + [2]: Intel® Trust Domain Extensions Data Center Attestation + Primitives : Quote Generation Library and Quote Verification + Library Revision 0.8 Appendix 4,5 + https://download.01.org/intel-sgx/latest/dcap-latest/linux/docs/Intel_TDX_DCAP_Quoting_Library_API.pdf + +What: /sys/kernel/config/tsm/report/$name/generation +Date: September, 2023 +KernelVersion: v6.7 +Contact: linux-coco@lists.linux.dev +Description: + (RO) The value in this attribute increments each time @inblob or + any option is written. Userspace can detect conflicts by + checking generation before writing to any attribute and making + sure the number of writes matches expectations after reading + @outblob, or it can prevent conflicts by creating a report + instance per requesting context. + +What: /sys/kernel/config/tsm/report/$name/privlevel +Date: September, 2023 +KernelVersion: v6.7 +Contact: linux-coco@lists.linux.dev +Description: + (WO) Attribute is visible if a TSM implementation provider + supports the concept of attestation reports for TVMs running at + different privilege levels, like SEV-SNP "VMPL", specify the + privilege level via this attribute. The minimum acceptable + value is conveyed via @privlevel_floor and the maximum + acceptable value is TSM_PRIVLEVEL_MAX (3). + +What: /sys/kernel/config/tsm/report/$name/privlevel_floor +Date: September, 2023 +KernelVersion: v6.7 +Contact: linux-coco@lists.linux.dev +Description: + (RO) Indicates the minimum permissible value that can be written + to @privlevel. diff --git a/Documentation/ABI/testing/configfs-usb-gadget-uac2 b/Documentation/ABI/testing/configfs-usb-gadget-uac2 index 3371c39f65..a2bf4fd82a 100644 --- a/Documentation/ABI/testing/configfs-usb-gadget-uac2 +++ b/Documentation/ABI/testing/configfs-usb-gadget-uac2 @@ -35,4 +35,6 @@ Description: req_number the number of pre-allocated requests for both capture and playback function_name name of the interface + c_terminal_type code of the capture terminal type + p_terminal_type code of the playback terminal type ===================== ======================================= diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs index df53578005..042fd125fb 100644 --- a/Documentation/ABI/testing/debugfs-driver-habanalabs +++ b/Documentation/ABI/testing/debugfs-driver-habanalabs @@ -1,4 +1,4 @@ -What: /sys/kernel/debug/habanalabs/hl/addr +What: /sys/kernel/debug/accel//addr Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org @@ -8,34 +8,34 @@ Description: Sets the device address to be used for read or write through only when the IOMMU is disabled. The acceptable value is a string that starts with "0x" -What: /sys/kernel/debug/habanalabs/hl/clk_gate +What: /sys/kernel/debug/accel//clk_gate Date: May 2020 KernelVersion: 5.8 Contact: ogabbay@kernel.org Description: This setting is now deprecated as clock gating is handled solely by the f/w -What: /sys/kernel/debug/habanalabs/hl/command_buffers +What: /sys/kernel/debug/accel//command_buffers Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: Displays a list with information about the currently allocated command buffers -What: /sys/kernel/debug/habanalabs/hl/command_submission +What: /sys/kernel/debug/accel//command_submission Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: Displays a list with information about the currently active command submissions -What: /sys/kernel/debug/habanalabs/hl/command_submission_jobs +What: /sys/kernel/debug/accel//command_submission_jobs Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: Displays a list with detailed information about each JOB (CB) of each active command submission -What: /sys/kernel/debug/habanalabs/hl/data32 +What: /sys/kernel/debug/accel//data32 Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org @@ -50,7 +50,7 @@ Description: Allows the root user to read or write directly through the If the IOMMU is disabled, it also allows the root user to read or write from the host a device VA of a host mapped memory -What: /sys/kernel/debug/habanalabs/hl/data64 +What: /sys/kernel/debug/accel//data64 Date: Jan 2020 KernelVersion: 5.6 Contact: ogabbay@kernel.org @@ -65,7 +65,7 @@ Description: Allows the root user to read or write 64 bit data directly If the IOMMU is disabled, it also allows the root user to read or write from the host a device VA of a host mapped memory -What: /sys/kernel/debug/habanalabs/hl/data_dma +What: /sys/kernel/debug/accel//data_dma Date: Apr 2021 KernelVersion: 5.13 Contact: ogabbay@kernel.org @@ -79,11 +79,11 @@ Description: Allows the root user to read from the device's internal a very long time. This interface doesn't support concurrency in the same device. In GAUDI and GOYA, this action can cause undefined behavior - in case the it is done while the device is executing user + in case it is done while the device is executing user workloads. Only supported on GAUDI at this stage. -What: /sys/kernel/debug/habanalabs/hl/device +What: /sys/kernel/debug/accel//device Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org @@ -91,14 +91,14 @@ Description: Enables the root user to set the device to specific state. Valid values are "disable", "enable", "suspend", "resume". User can read this property to see the valid values -What: /sys/kernel/debug/habanalabs/hl/device_release_watchdog_timeout +What: /sys/kernel/debug/accel//device_release_watchdog_timeout Date: Oct 2022 KernelVersion: 6.2 Contact: ttayar@habana.ai Description: The watchdog timeout value in seconds for a device release upon certain error cases, after which the device is reset. -What: /sys/kernel/debug/habanalabs/hl/dma_size +What: /sys/kernel/debug/accel//dma_size Date: Apr 2021 KernelVersion: 5.13 Contact: ogabbay@kernel.org @@ -108,7 +108,7 @@ Description: Specify the size of the DMA transaction when using DMA to read When the write is finished, the user can read the "data_dma" blob -What: /sys/kernel/debug/habanalabs/hl/dump_razwi_events +What: /sys/kernel/debug/accel//dump_razwi_events Date: Aug 2022 KernelVersion: 5.20 Contact: fkassabri@habana.ai @@ -117,7 +117,7 @@ Description: Dumps all razwi events to dmesg if exist. the routine will clear the status register. Usage: cat dump_razwi_events -What: /sys/kernel/debug/habanalabs/hl/dump_security_violations +What: /sys/kernel/debug/accel//dump_security_violations Date: Jan 2021 KernelVersion: 5.12 Contact: ogabbay@kernel.org @@ -125,14 +125,14 @@ Description: Dumps all security violations to dmesg. This will also ack all security violations meanings those violations will not be dumped next time user calls this API -What: /sys/kernel/debug/habanalabs/hl/engines +What: /sys/kernel/debug/accel//engines Date: Jul 2019 KernelVersion: 5.3 Contact: ogabbay@kernel.org Description: Displays the status registers values of the device engines and their derived idle status -What: /sys/kernel/debug/habanalabs/hl/i2c_addr +What: /sys/kernel/debug/accel//i2c_addr Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org @@ -140,7 +140,7 @@ Description: Sets I2C device address for I2C transaction that is generated by the device's CPU, Not available when device is loaded with secured firmware -What: /sys/kernel/debug/habanalabs/hl/i2c_bus +What: /sys/kernel/debug/accel//i2c_bus Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org @@ -148,7 +148,7 @@ Description: Sets I2C bus address for I2C transaction that is generated by the device's CPU, Not available when device is loaded with secured firmware -What: /sys/kernel/debug/habanalabs/hl/i2c_data +What: /sys/kernel/debug/accel//i2c_data Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org @@ -157,7 +157,7 @@ Description: Triggers an I2C transaction that is generated by the device's reading from the file generates a read transaction, Not available when device is loaded with secured firmware -What: /sys/kernel/debug/habanalabs/hl/i2c_len +What: /sys/kernel/debug/accel//i2c_len Date: Dec 2021 KernelVersion: 5.17 Contact: obitton@habana.ai @@ -165,7 +165,7 @@ Description: Sets I2C length in bytes for I2C transaction that is generated b the device's CPU, Not available when device is loaded with secured firmware -What: /sys/kernel/debug/habanalabs/hl/i2c_reg +What: /sys/kernel/debug/accel//i2c_reg Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org @@ -173,35 +173,35 @@ Description: Sets I2C register id for I2C transaction that is generated by the device's CPU, Not available when device is loaded with secured firmware -What: /sys/kernel/debug/habanalabs/hl/led0 +What: /sys/kernel/debug/accel//led0 Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: Sets the state of the first S/W led on the device, Not available when device is loaded with secured firmware -What: /sys/kernel/debug/habanalabs/hl/led1 +What: /sys/kernel/debug/accel//led1 Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: Sets the state of the second S/W led on the device, Not available when device is loaded with secured firmware -What: /sys/kernel/debug/habanalabs/hl/led2 +What: /sys/kernel/debug/accel//led2 Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: Sets the state of the third S/W led on the device, Not available when device is loaded with secured firmware -What: /sys/kernel/debug/habanalabs/hl/memory_scrub +What: /sys/kernel/debug/accel//memory_scrub Date: May 2022 KernelVersion: 5.19 Contact: dhirschfeld@habana.ai Description: Allows the root user to scrub the dram memory. The scrubbing value can be set using the debugfs file memory_scrub_val. -What: /sys/kernel/debug/habanalabs/hl/memory_scrub_val +What: /sys/kernel/debug/accel//memory_scrub_val Date: May 2022 KernelVersion: 5.19 Contact: dhirschfeld@habana.ai @@ -209,7 +209,7 @@ Description: The value to which the dram will be set to when the user scrubs the dram using 'memory_scrub' debugfs file and the scrubbing value when using module param 'memory_scrub' -What: /sys/kernel/debug/habanalabs/hl/mmu +What: /sys/kernel/debug/accel//mmu Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org @@ -217,19 +217,19 @@ Description: Displays the hop values and physical address for a given ASID and virtual address. The user should write the ASID and VA into the file and then read the file to get the result. e.g. to display info about VA 0x1000 for ASID 1 you need to do: - echo "1 0x1000" > /sys/kernel/debug/habanalabs/hl0/mmu + echo "1 0x1000" > /sys/kernel/debug/accel/0/mmu -What: /sys/kernel/debug/habanalabs/hl/mmu_error +What: /sys/kernel/debug/accel//mmu_error Date: Mar 2021 KernelVersion: 5.12 Contact: fkassabri@habana.ai Description: Check and display page fault or access violation mmu errors for all MMUs specified in mmu_cap_mask. e.g. to display error info for MMU hw cap bit 9, you need to do: - echo "0x200" > /sys/kernel/debug/habanalabs/hl0/mmu_error - cat /sys/kernel/debug/habanalabs/hl0/mmu_error + echo "0x200" > /sys/kernel/debug/accel/0/mmu_error + cat /sys/kernel/debug/accel/0/mmu_error -What: /sys/kernel/debug/habanalabs/hl/monitor_dump +What: /sys/kernel/debug/accel//monitor_dump Date: Mar 2022 KernelVersion: 5.19 Contact: osharabi@habana.ai @@ -243,7 +243,7 @@ Description: Allows the root user to dump monitors status from the device's This interface doesn't support concurrency in the same device. Only supported on GAUDI. -What: /sys/kernel/debug/habanalabs/hl/monitor_dump_trig +What: /sys/kernel/debug/accel//monitor_dump_trig Date: Mar 2022 KernelVersion: 5.19 Contact: osharabi@habana.ai @@ -253,14 +253,14 @@ Description: Triggers dump of monitor data. The value to trigger the operatio When the write is finished, the user can read the "monitor_dump" blob -What: /sys/kernel/debug/habanalabs/hl/set_power_state +What: /sys/kernel/debug/accel//set_power_state Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: Sets the PCI power state. Valid values are "1" for D0 and "2" for D3Hot -What: /sys/kernel/debug/habanalabs/hl/skip_reset_on_timeout +What: /sys/kernel/debug/accel//skip_reset_on_timeout Date: Jun 2021 KernelVersion: 5.13 Contact: ynudelman@habana.ai @@ -268,7 +268,7 @@ Description: Sets the skip reset on timeout option for the device. Value of "0" means device will be reset in case some CS has timed out, otherwise it will not be reset. -What: /sys/kernel/debug/habanalabs/hl/state_dump +What: /sys/kernel/debug/accel//state_dump Date: Oct 2021 KernelVersion: 5.15 Contact: ynudelman@habana.ai @@ -279,7 +279,7 @@ Description: Gets the state dump occurring on a CS timeout or failure. Writing an integer X discards X state dumps, so that the next read would return X+1-st newest state dump. -What: /sys/kernel/debug/habanalabs/hl/stop_on_err +What: /sys/kernel/debug/accel//stop_on_err Date: Mar 2020 KernelVersion: 5.6 Contact: ogabbay@kernel.org @@ -287,21 +287,21 @@ Description: Sets the stop-on_error option for the device engines. Value of "0" is for disable, otherwise enable. Relevant only for GOYA and GAUDI. -What: /sys/kernel/debug/habanalabs/hl/timeout_locked +What: /sys/kernel/debug/accel//timeout_locked Date: Sep 2021 KernelVersion: 5.16 Contact: obitton@habana.ai Description: Sets the command submission timeout value in seconds. -What: /sys/kernel/debug/habanalabs/hl/userptr +What: /sys/kernel/debug/accel//userptr Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org -Description: Displays a list with information about the currently user +Description: Displays a list with information about the current user pointers (user virtual addresses) that are pinned and mapped to DMA addresses -What: /sys/kernel/debug/habanalabs/hl/userptr_lookup +What: /sys/kernel/debug/accel//userptr_lookup Date: Oct 2021 KernelVersion: 5.15 Contact: ogabbay@kernel.org @@ -309,7 +309,7 @@ Description: Allows to search for specific user pointers (user virtual addresses) that are pinned and mapped to DMA addresses, and see their resolution to the specific dma address. -What: /sys/kernel/debug/habanalabs/hl/vm +What: /sys/kernel/debug/accel//vm Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org diff --git a/Documentation/ABI/testing/debugfs-driver-qat b/Documentation/ABI/testing/debugfs-driver-qat index 6731ffacc5..b2db010d85 100644 --- a/Documentation/ABI/testing/debugfs-driver-qat +++ b/Documentation/ABI/testing/debugfs-driver-qat @@ -1,4 +1,4 @@ -What: /sys/kernel/debug/qat__/qat/fw_counters +What: /sys/kernel/debug/qat__/fw_counters Date: November 2023 KernelVersion: 6.6 Contact: qat-linux@intel.com @@ -59,3 +59,25 @@ Description: (RO) Read returns the device health status. The driver does not monitor for Heartbeat. It is left for a user to poll the status periodically. + +What: /sys/kernel/debug/qat__/pm_status +Date: January 2024 +KernelVersion: 6.7 +Contact: qat-linux@intel.com +Description: (RO) Read returns power management information specific to the + QAT device. + + This attribute is only available for qat_4xxx devices. + +What: /sys/kernel/debug/qat__/cnv_errors +Date: January 2024 +KernelVersion: 6.7 +Contact: qat-linux@intel.com +Description: (RO) Read returns, for each Acceleration Engine (AE), the number + of errors and the type of the last error detected by the device + when performing verified compression. + Reported counters:: + + : Number of Compress and Verify (CnV) errors and type + of the last CnV error detected by Acceleration + Engine N. diff --git a/Documentation/ABI/testing/sysfs-bus-cdx b/Documentation/ABI/testing/sysfs-bus-cdx index 7af477f499..8c067ff99e 100644 --- a/Documentation/ABI/testing/sysfs-bus-cdx +++ b/Documentation/ABI/testing/sysfs-bus-cdx @@ -28,14 +28,57 @@ Description: of a device manufacturer. Combination of Vendor ID and Device ID identifies a device. +What: /sys/bus/cdx/devices/.../subsystem_vendor +Date: July 2023 +Contact: puneet.gupta@amd.com +Description: + Subsystem Vendor ID for this CDX device, in hexadecimal. + Subsystem Vendor ID is 16 bit identifier specific to the + card manufacturer. + +What: /sys/bus/cdx/devices/.../subsystem_device +Date: July 2023 +Contact: puneet.gupta@amd.com +Description: + Subsystem Device ID for this CDX device, in hexadecimal + Subsystem Device ID is 16 bit identifier specific to the + card manufacturer. + +What: /sys/bus/cdx/devices/.../class +Date: July 2023 +Contact: puneet.gupta@amd.com +Description: + This file contains the class of the CDX device, in hexadecimal. + Class is 24 bit identifier specifies the functionality of the device. + +What: /sys/bus/cdx/devices/.../revision +Date: July 2023 +Contact: puneet.gupta@amd.com +Description: + This file contains the revision field of the CDX device, in hexadecimal. + Revision is 8 bit revision identifier of the device. + +What: /sys/bus/cdx/devices/.../enable +Date: October 2023 +Contact: abhijit.gangurde@amd.com +Description: + CDX bus should be disabled before updating the devices in FPGA. + Writing n/0/off will attempt to disable the CDX bus and. + writing y/1/on will attempt to enable the CDX bus. Reading this file + gives the current state of the bus, 1 for enabled and 0 for disabled. + + For example:: + + # echo 1 > /sys/bus/cdx/.../enable + What: /sys/bus/cdx/devices/.../reset Date: March 2023 Contact: nipun.gupta@amd.com Description: - Writing y/1/on to this file resets the CDX device. - On resetting the device, the corresponding driver is notified - twice, once before the device is being reset, and again after - the reset has been complete. + Writing y/1/on to this file resets the CDX device or all devices + on the bus. On resetting the device, the corresponding driver is + notified twice, once before the device is being reset, and again + after the reset has been complete. For example:: @@ -54,3 +97,18 @@ Description: For example:: # echo 1 > /sys/bus/cdx/devices/.../remove + +What: /sys/bus/cdx/devices/.../modalias +Date: July 2023 +Contact: nipun.gupta@amd.com +Description: + This attribute indicates the CDX ID of the device. + That is in the format: + cdx:vXXXXdXXXXsvXXXXsdXXXXcXXXXXX, + where: + + - vXXXX contains the vendor ID; + - dXXXX contains the device ID; + - svXXXX contains the subsystem vendor ID; + - sdXXXX contains the subsystem device ID; + - cXXXXXX contains the device class. diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl index 087f762ebf..e76c360060 100644 --- a/Documentation/ABI/testing/sysfs-bus-cxl +++ b/Documentation/ABI/testing/sysfs-bus-cxl @@ -178,6 +178,21 @@ Description: hardware decoder target list. +What: /sys/bus/cxl/devices/portX/decoders_committed +Date: October, 2023 +KernelVersion: v6.7 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) A memory device is considered active when any of its + decoders are in the "committed" state (See CXL 3.0 8.2.4.19.7 + CXL HDM Decoder n Control Register). Hotplug and destructive + operations like "sanitize" are blocked while device is actively + decoding a Host Physical Address range. Note that this number + may be elevated without any regionX objects active or even + enumerated, as this may be due to decoders established by + platform firwmare or a previous kernel (kexec). + + What: /sys/bus/cxl/devices/decoderX.Y Date: June, 2021 KernelVersion: v5.14 @@ -369,6 +384,21 @@ Description: provided it is currently idle / not bound to a driver. +What: /sys/bus/cxl/devices/decoderX.Y/qos_class +Date: May, 2023 +KernelVersion: v6.5 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) For CXL host platforms that support "QoS Telemmetry" this + root-decoder-only attribute conveys a platform specific cookie + that identifies a QoS performance class for the CXL Window. + This class-id can be compared against a similar "qos_class" + published for each memory-type that an endpoint supports. While + it is not required that endpoints map their local memory-class + to a matching platform class, mismatches are not recommended and + there are platform specific side-effects that may result. + + What: /sys/bus/cxl/devices/regionZ/uuid Date: May, 2022 KernelVersion: v6.0 diff --git a/Documentation/ABI/testing/sysfs-bus-i3c b/Documentation/ABI/testing/sysfs-bus-i3c index 1f4a266233..e5248fd67a 100644 --- a/Documentation/ABI/testing/sysfs-bus-i3c +++ b/Documentation/ABI/testing/sysfs-bus-i3c @@ -67,7 +67,7 @@ What: /sys/bus/i3c/devices/i3c-/pid KernelVersion: 5.0 Contact: linux-i3c@vger.kernel.org Description: - PID stands for Provisional ID and is used to uniquely identify + PID stands for Provisioned ID and is used to uniquely identify a device on a bus. This PID contains information about the vendor, the part and an instance ID so that several devices of the same type can be connected on the same bus. @@ -123,7 +123,7 @@ What: /sys/bus/i3c/devices/i3c-/-/pid KernelVersion: 5.0 Contact: linux-i3c@vger.kernel.org Description: - PID stands for Provisional ID and is used to uniquely identify + PID stands for Provisioned ID and is used to uniquely identify a device on a bus. This PID contains information about the vendor, the part and an instance ID so that several devices of the same type can be connected on the same bus. diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio index a2854dc9a8..19cde14f38 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio +++ b/Documentation/ABI/testing/sysfs-bus-iio @@ -279,6 +279,35 @@ Description: but should match other such assignments on device). Units after application of scale and offset are m/s^2. +What: /sys/bus/iio/devices/iio:deviceX/in_deltaangl_x_raw +What: /sys/bus/iio/devices/iio:deviceX/in_deltaangl_y_raw +What: /sys/bus/iio/devices/iio:deviceX/in_deltaangl_z_raw +KernelVersion: 6.5 +Contact: linux-iio@vger.kernel.org +Description: + Angular displacement between two consecutive samples on x, y or + z (may be arbitrarily assigned but should match other such + assignments on device). + In order to compute the total angular displacement during a + desired period of time, the application should sum-up the delta + angle samples acquired during that time. + Units after application of scale and offset are radians. + +What: /sys/bus/iio/devices/iio:deviceX/in_deltavelocity_x_raw +What: /sys/bus/iio/devices/iio:deviceX/in_deltavelocity_y_raw +What: /sys/bus/iio/devices/iio:deviceX/in_deltavelocity_z_raw +KernelVersion: 6.5 +Contact: linux-iio@vger.kernel.org +Description: + The linear velocity change between two consecutive samples on x, + y or z (may be arbitrarily assigned but should match other such + assignments on device). + In order to compute the total linear velocity change during a + desired period of time, the application should sum-up the delta + velocity samples acquired during that time. + Units after application of scale and offset are meters per + second. + What: /sys/bus/iio/devices/iio:deviceX/in_angl_raw What: /sys/bus/iio/devices/iio:deviceX/in_anglY_raw KernelVersion: 4.17 @@ -461,6 +490,8 @@ What: /sys/bus/iio/devices/iio:deviceX/in_humidityrelative_scale What: /sys/bus/iio/devices/iio:deviceX/in_velocity_sqrt(x^2+y^2+z^2)_scale What: /sys/bus/iio/devices/iio:deviceX/in_illuminance_scale What: /sys/bus/iio/devices/iio:deviceX/in_countY_scale +What: /sys/bus/iio/devices/iio:deviceX/in_deltaangl_scale +What: /sys/bus/iio/devices/iio:deviceX/in_deltavelocity_scale What: /sys/bus/iio/devices/iio:deviceX/in_angl_scale What: /sys/bus/iio/devices/iio:deviceX/in_intensity_x_scale What: /sys/bus/iio/devices/iio:deviceX/in_intensity_y_scale @@ -1332,6 +1363,12 @@ Description: What: /sys/.../iio:deviceX/bufferY/in_accel_x_en What: /sys/.../iio:deviceX/bufferY/in_accel_y_en What: /sys/.../iio:deviceX/bufferY/in_accel_z_en +What: /sys/.../iio:deviceX/bufferY/in_deltaangl_x_en +What: /sys/.../iio:deviceX/bufferY/in_deltaangl_y_en +What: /sys/.../iio:deviceX/bufferY/in_deltaangl_z_en +What: /sys/.../iio:deviceX/bufferY/in_deltavelocity_x_en +What: /sys/.../iio:deviceX/bufferY/in_deltavelocity_y_en +What: /sys/.../iio:deviceX/bufferY/in_deltavelocity_z_en What: /sys/.../iio:deviceX/bufferY/in_anglvel_x_en What: /sys/.../iio:deviceX/bufferY/in_anglvel_y_en What: /sys/.../iio:deviceX/bufferY/in_anglvel_z_en @@ -1362,6 +1399,8 @@ Description: Scan element control for triggered data capture. What: /sys/.../iio:deviceX/bufferY/in_accel_type +What: /sys/.../iio:deviceX/bufferY/in_deltaangl_type +What: /sys/.../iio:deviceX/bufferY/in_deltavelocity_type What: /sys/.../iio:deviceX/bufferY/in_anglvel_type What: /sys/.../iio:deviceX/bufferY/in_magn_type What: /sys/.../iio:deviceX/bufferY/in_incli_type @@ -1416,6 +1455,12 @@ What: /sys/.../iio:deviceX/bufferY/in_voltage_q_index What: /sys/.../iio:deviceX/bufferY/in_accel_x_index What: /sys/.../iio:deviceX/bufferY/in_accel_y_index What: /sys/.../iio:deviceX/bufferY/in_accel_z_index +What: /sys/.../iio:deviceX/bufferY/in_deltaangl_x_index +What: /sys/.../iio:deviceX/bufferY/in_deltaangl_y_index +What: /sys/.../iio:deviceX/bufferY/in_deltaangl_z_index +What: /sys/.../iio:deviceX/bufferY/in_deltavelocity_x_index +What: /sys/.../iio:deviceX/bufferY/in_deltavelocity_y_index +What: /sys/.../iio:deviceX/bufferY/in_deltavelocity_z_index What: /sys/.../iio:deviceX/bufferY/in_anglvel_x_index What: /sys/.../iio:deviceX/bufferY/in_anglvel_y_index What: /sys/.../iio:deviceX/bufferY/in_anglvel_z_index @@ -2179,3 +2224,33 @@ Contact: linux-iio@vger.kernel.org Description: Number of conditions that must occur, during a running period, before an event is generated. + +What: /sys/bus/iio/devices/iio:deviceX/in_colortemp_raw +KernelVersion: 6.7 +Contact: linux-iio@vger.kernel.org +Description: + Represents light color temperature, which measures light color + temperature in Kelvin. + +What: /sys/bus/iio/devices/iio:deviceX/in_chromaticity_x_raw +What: /sys/bus/iio/devices/iio:deviceX/in_chromaticity_y_raw +KernelVersion: 6.7 +Contact: linux-iio@vger.kernel.org +Description: + The x and y light color coordinate on the CIE 1931 chromaticity + diagram. + +What: /sys/bus/iio/devices/iio:deviceX/events/in_altvoltageY_mag_either_label +What: /sys/bus/iio/devices/iio:deviceX/events/in_altvoltageY_mag_rising_label +What: /sys/bus/iio/devices/iio:deviceX/events/in_altvoltageY_thresh_falling_label +What: /sys/bus/iio/devices/iio:deviceX/events/in_altvoltageY_thresh_rising_label +What: /sys/bus/iio/devices/iio:deviceX/events/in_anglvelY_mag_rising_label +What: /sys/bus/iio/devices/iio:deviceX/events/in_anglY_thresh_rising_label +What: /sys/bus/iio/devices/iio:deviceX/events/in_phaseY_mag_rising_label +KernelVersion: 6.7 +Contact: linux-iio@vger.kernel.org +Description: + Optional symbolic label to a device channel event. + If a label is defined for this event add that to the event + specific attributes. This is useful for userspace to be able to + better identify an individual event. diff --git a/Documentation/ABI/testing/sysfs-bus-iio-adc-mcp3564 b/Documentation/ABI/testing/sysfs-bus-iio-adc-mcp3564 new file mode 100644 index 0000000000..b168aa44b2 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-iio-adc-mcp3564 @@ -0,0 +1,53 @@ +What: /sys/bus/iio/devices/iio:deviceX/boost_current_gain +KernelVersion: 6.4 +Contact: linux-iio@vger.kernel.org +Description: + This attribute is used to set the gain of the biasing current + circuit of the Delta-Sigma modulator. The different BOOST + settings are applied to the entire modulator circuit, including + the voltage reference buffers. + +What: /sys/bus/iio/devices/iio:deviceX/boost_current_gain_available +KernelVersion: 6.4 +Contact: linux-iio@vger.kernel.org +Description: + Reading returns a list with the possible gain values for + the current biasing circuit of the Delta-Sigma modulator. + +What: /sys/bus/iio/devices/iio:deviceX/auto_zeroing_mux_enable +KernelVersion: 6.4 +Contact: linux-iio@vger.kernel.org +Description: + This attribute is used to enable the analog input multiplexer + auto-zeroing algorithm (the input multiplexer and the ADC + include an offset cancellation algorithm that cancels the offset + contribution of the ADC). When the offset cancellation algorithm + is enabled, ADC takes two conversions, one with the differential + input as VIN+/VIN-, one with VIN+/VIN- inverted. In this case the + conversion time is multiplied by two compared to the default + case where the algorithm is disabled. This technique allows the + cancellation of the ADC offset error and the achievement of + ultra-low offset without any digital calibration. The resulting + offset is the residue of the difference between the two + conversions, which is on the order of magnitude of the noise + floor. This offset is effectively canceled at every conversion, + so the residual offset error temperature drift is extremely low. + Write '1' to enable it, write '0' to disable it. + +What: /sys/bus/iio/devices/iio:deviceX/auto_zeroing_ref_enable +KernelVersion: 6.4 +Contact: linux-iio@vger.kernel.org +Description: + This attribute is used to enable the chopping algorithm for the + internal voltage reference buffer. This setting has no effect + when external voltage reference is selected. + Internal voltage reference buffer injects a certain quantity of + 1/f noise into the system that can be modulated with the + incoming input signals and can limit the SNR performance at + higher Oversampling Ratio values (over 256). To overcome this + limitation, the buffer includes an auto-zeroing algorithm that + greatly reduces (cancels out) the 1/f noise and cancels the + offset value of the reference buffer. As a result, the SNR of + the system is not affected by this 1/f noise component of the + reference buffer, even at maximum oversampling ratio values. + Write '1' to enable it, write '0' to disable it. diff --git a/Documentation/ABI/testing/sysfs-bus-iio-resolver-ad2s1210 b/Documentation/ABI/testing/sysfs-bus-iio-resolver-ad2s1210 new file mode 100644 index 0000000000..f92c79342b --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-iio-resolver-ad2s1210 @@ -0,0 +1,27 @@ +What: /sys/bus/iio/devices/iio:deviceX/events/in_altvoltage0_mag_rising_reset_max +KernelVersion: 6.7 +Contact: linux-iio@vger.kernel.org +Description: + Reading returns the current Degradation of Signal Reset Maximum + Threshold value in millivolts. Writing sets the value. + +What: /sys/bus/iio/devices/iio:deviceX/events/in_altvoltage0_mag_rising_reset_max_available +KernelVersion: 6.7 +Contact: linux-iio@vger.kernel.org +Description: + Reading returns the allowable voltage range for + in_altvoltage0_mag_rising_reset_max. + +What: /sys/bus/iio/devices/iio:deviceX/events/in_altvoltage0_mag_rising_reset_min +KernelVersion: 6.7 +Contact: linux-iio@vger.kernel.org +Description: + Reading returns the current Degradation of Signal Reset Minimum + Threshold value in millivolts. Writing sets the value. + +What: /sys/bus/iio/devices/iio:deviceX/events/in_altvoltage0_mag_rising_reset_min_available +KernelVersion: 6.7 +Contact: linux-iio@vger.kernel.org +Description: + Reading returns the allowable voltage range for + in_altvoltage0_mag_rising_reset_min. diff --git a/Documentation/ABI/testing/sysfs-bus-papr-pmem b/Documentation/ABI/testing/sysfs-bus-papr-pmem index 46cfe02058..34ee8c59ab 100644 --- a/Documentation/ABI/testing/sysfs-bus-papr-pmem +++ b/Documentation/ABI/testing/sysfs-bus-papr-pmem @@ -8,7 +8,7 @@ Description: more bits set in the dimm-health-bitmap retrieved in response to H_SCM_HEALTH hcall. The details of the bit flags returned in response to this hcall is available - at 'Documentation/powerpc/papr_hcalls.rst' . Below are + at 'Documentation/arch/powerpc/papr_hcalls.rst' . Below are the flags reported in this sysfs file: * "not_armed" diff --git a/Documentation/ABI/testing/sysfs-bus-usb b/Documentation/ABI/testing/sysfs-bus-usb index a44bfe0200..2b7108e219 100644 --- a/Documentation/ABI/testing/sysfs-bus-usb +++ b/Documentation/ABI/testing/sysfs-bus-usb @@ -313,6 +313,15 @@ Description: Inter-Chip SSIC devices support asymmetric lanes up to 4 lanes per direction. Devices before USB 3.2 are single lane (tx_lanes = 1) +What: /sys/bus/usb/devices/.../typec +Date: November 2023 +Contact: Heikki Krogerus +Description: + Symlink to the USB Type-C partner device. USB Type-C partner + represents the component that communicates over the + Configuration Channel (CC signal on USB Type-C connectors and + cables) with the local port. + What: /sys/bus/usb/devices/usbX/bAlternateSetting Description: The current interface alternate setting number, in decimal. diff --git a/Documentation/ABI/testing/sysfs-bus-vdpa b/Documentation/ABI/testing/sysfs-bus-vdpa index 28a6111202..4da53878bf 100644 --- a/Documentation/ABI/testing/sysfs-bus-vdpa +++ b/Documentation/ABI/testing/sysfs-bus-vdpa @@ -1,4 +1,4 @@ -What: /sys/bus/vdpa/driver_autoprobe +What: /sys/bus/vdpa/drivers_autoprobe Date: March 2020 Contact: virtualization@lists.linux-foundation.org Description: @@ -17,7 +17,7 @@ Description: Writing a device name to this file will cause the kernel binds devices to a compatible driver. - This can be useful when /sys/bus/vdpa/driver_autoprobe is + This can be useful when /sys/bus/vdpa/drivers_autoprobe is disabled. What: /sys/bus/vdpa/drivers/.../bind diff --git a/Documentation/ABI/testing/sysfs-class-firmware-attributes b/Documentation/ABI/testing/sysfs-class-firmware-attributes index f205d39409..9c82c7b42f 100644 --- a/Documentation/ABI/testing/sysfs-class-firmware-attributes +++ b/Documentation/ABI/testing/sysfs-class-firmware-attributes @@ -383,6 +383,36 @@ Description: Note that any changes to this attribute requires a reboot for changes to take effect. +What: /sys/class/firmware-attributes/*/attributes/save_settings +Date: August 2023 +KernelVersion: 6.6 +Contact: Mark Pearson +Description: + On Lenovo platforms there is a limitation in the number of times an attribute can be + saved. This is an architectural limitation and it limits the number of attributes + that can be modified to 48. + A solution for this is instead of the attribute being saved after every modification, + to allow a user to bulk set the attributes, and then trigger a final save. This allows + unlimited attributes. + + Read the attribute to check what save mode is enabled (single or bulk). + E.g: + # cat /sys/class/firmware-attributes/thinklmi/attributes/save_settings + single + + Write the attribute with 'bulk' to enable bulk save mode. + Write the attribute with 'single' to enable saving, after every attribute set. + The default setting is single mode. + E.g: + # echo bulk > /sys/class/firmware-attributes/thinklmi/attributes/save_settings + + When in bulk mode write 'save' to trigger a save of all currently modified attributes. + Note, once a save has been triggered, in bulk mode, attributes can no longer be set and + will return a permissions error. This is to prevent users hitting the 48+ save limitation + (which requires entering the BIOS to clear the error condition) + E.g: + # echo save > /sys/class/firmware-attributes/thinklmi/attributes/save_settings + What: /sys/class/firmware-attributes/*/attributes/debug_cmd Date: July 2021 KernelVersion: 5.14 diff --git a/Documentation/ABI/testing/sysfs-class-led-driver-turris-omnia b/Documentation/ABI/testing/sysfs-class-led-driver-turris-omnia index c4d46970c1..369b4ae8be 100644 --- a/Documentation/ABI/testing/sysfs-class-led-driver-turris-omnia +++ b/Documentation/ABI/testing/sysfs-class-led-driver-turris-omnia @@ -12,3 +12,17 @@ Description: (RW) On the front panel of the Turris Omnia router there is also able to change this setting from software. Format: %i + +What: /sys/class/leds//device/gamma_correction +Date: August 2023 +KernelVersion: 6.6 +Contact: Marek Behún +Description: (RW) Newer versions of the microcontroller firmware of the + Turris Omnia router support gamma correction for the RGB LEDs. + This feature can be enabled/disabled by writing to this file. + + If the feature is not supported because the MCU firmware is too + old, the file always reads as 0, and writing to the file results + in the EOPNOTSUPP error. + + Format: %i diff --git a/Documentation/ABI/testing/sysfs-class-net-queues b/Documentation/ABI/testing/sysfs-class-net-queues index 906ff3ca92..5bff64d256 100644 --- a/Documentation/ABI/testing/sysfs-class-net-queues +++ b/Documentation/ABI/testing/sysfs-class-net-queues @@ -1,4 +1,4 @@ -What: /sys/class//queues/rx-/rps_cpus +What: /sys/class/net//queues/rx-/rps_cpus Date: March 2010 KernelVersion: 2.6.35 Contact: netdev@vger.kernel.org @@ -8,7 +8,7 @@ Description: network device queue. Possible values depend on the number of available CPU(s) in the system. -What: /sys/class//queues/rx-/rps_flow_cnt +What: /sys/class/net//queues/rx-/rps_flow_cnt Date: April 2010 KernelVersion: 2.6.35 Contact: netdev@vger.kernel.org @@ -16,7 +16,7 @@ Description: Number of Receive Packet Steering flows being currently processed by this particular network device receive queue. -What: /sys/class//queues/tx-/tx_timeout +What: /sys/class/net//queues/tx-/tx_timeout Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -24,7 +24,7 @@ Description: Indicates the number of transmit timeout events seen by this network interface transmit queue. -What: /sys/class//queues/tx-/tx_maxrate +What: /sys/class/net//queues/tx-/tx_maxrate Date: March 2015 KernelVersion: 4.1 Contact: netdev@vger.kernel.org @@ -32,7 +32,7 @@ Description: A Mbps max-rate set for the queue, a value of zero means disabled, default is disabled. -What: /sys/class//queues/tx-/xps_cpus +What: /sys/class/net//queues/tx-/xps_cpus Date: November 2010 KernelVersion: 2.6.38 Contact: netdev@vger.kernel.org @@ -42,7 +42,7 @@ Description: network device transmit queue. Possible values depend on the number of available CPU(s) in the system. -What: /sys/class//queues/tx-/xps_rxqs +What: /sys/class/net//queues/tx-/xps_rxqs Date: June 2018 KernelVersion: 4.18.0 Contact: netdev@vger.kernel.org @@ -53,7 +53,7 @@ Description: number of available receive queue(s) in the network device. Default is disabled. -What: /sys/class//queues/tx-/byte_queue_limits/hold_time +What: /sys/class/net//queues/tx-/byte_queue_limits/hold_time Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -62,7 +62,7 @@ Description: of this particular network device transmit queue. Default value is 1000. -What: /sys/class//queues/tx-/byte_queue_limits/inflight +What: /sys/class/net//queues/tx-/byte_queue_limits/inflight Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -70,7 +70,7 @@ Description: Indicates the number of bytes (objects) in flight on this network device transmit queue. -What: /sys/class//queues/tx-/byte_queue_limits/limit +What: /sys/class/net//queues/tx-/byte_queue_limits/limit Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -79,7 +79,7 @@ Description: on this network device transmit queue. This value is clamped to be within the bounds defined by limit_max and limit_min. -What: /sys/class//queues/tx-/byte_queue_limits/limit_max +What: /sys/class/net//queues/tx-/byte_queue_limits/limit_max Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -88,7 +88,7 @@ Description: queued on this network device transmit queue. See include/linux/dynamic_queue_limits.h for the default value. -What: /sys/class//queues/tx-/byte_queue_limits/limit_min +What: /sys/class/net//queues/tx-/byte_queue_limits/limit_min Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org diff --git a/Documentation/ABI/testing/sysfs-class-net-statistics b/Documentation/ABI/testing/sysfs-class-net-statistics index 55db278153..53e508c693 100644 --- a/Documentation/ABI/testing/sysfs-class-net-statistics +++ b/Documentation/ABI/testing/sysfs-class-net-statistics @@ -1,4 +1,4 @@ -What: /sys/class//statistics/collisions +What: /sys/class/net//statistics/collisions Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -6,7 +6,7 @@ Description: Indicates the number of collisions seen by this network device. This value might not be relevant with all MAC layers. -What: /sys/class//statistics/multicast +What: /sys/class/net//statistics/multicast Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -14,7 +14,7 @@ Description: Indicates the number of multicast packets received by this network device. -What: /sys/class//statistics/rx_bytes +What: /sys/class/net//statistics/rx_bytes Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -23,7 +23,7 @@ Description: See the network driver for the exact meaning of when this value is incremented. -What: /sys/class//statistics/rx_compressed +What: /sys/class/net//statistics/rx_compressed Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -32,7 +32,7 @@ Description: network device. This value might only be relevant for interfaces that support packet compression (e.g: PPP). -What: /sys/class//statistics/rx_crc_errors +What: /sys/class/net//statistics/rx_crc_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -41,7 +41,7 @@ Description: by this network device. Note that the specific meaning might depend on the MAC layer used by the interface. -What: /sys/class//statistics/rx_dropped +What: /sys/class/net//statistics/rx_dropped Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -51,7 +51,7 @@ Description: packet processing. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_errors +What: /sys/class/net//statistics/rx_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -59,7 +59,7 @@ Description: Indicates the number of receive errors on this network device. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_fifo_errors +What: /sys/class/net//statistics/rx_fifo_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -68,7 +68,7 @@ Description: network device. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_frame_errors +What: /sys/class/net//statistics/rx_frame_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -78,7 +78,7 @@ Description: on the MAC layer protocol used. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_length_errors +What: /sys/class/net//statistics/rx_length_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -87,7 +87,7 @@ Description: error, oversized or undersized. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_missed_errors +What: /sys/class/net//statistics/rx_missed_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -96,7 +96,7 @@ Description: due to lack of capacity in the receive side. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_nohandler +What: /sys/class/net//statistics/rx_nohandler Date: February 2016 KernelVersion: 4.6 Contact: netdev@vger.kernel.org @@ -104,7 +104,7 @@ Description: Indicates the number of received packets that were dropped on an inactive device by the network core. -What: /sys/class//statistics/rx_over_errors +What: /sys/class/net//statistics/rx_over_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -114,7 +114,7 @@ Description: (e.g: larger than MTU). See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_packets +What: /sys/class/net//statistics/rx_packets Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -122,7 +122,7 @@ Description: Indicates the total number of good packets received by this network device. -What: /sys/class//statistics/tx_aborted_errors +What: /sys/class/net//statistics/tx_aborted_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -132,7 +132,7 @@ Description: a medium collision). See the network driver for the exact meaning of this value. -What: /sys/class//statistics/tx_bytes +What: /sys/class/net//statistics/tx_bytes Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -143,7 +143,7 @@ Description: transmitted packets or all packets that have been queued for transmission. -What: /sys/class//statistics/tx_carrier_errors +What: /sys/class/net//statistics/tx_carrier_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -152,7 +152,7 @@ Description: because of carrier errors (e.g: physical link down). See the network driver for the exact meaning of this value. -What: /sys/class//statistics/tx_compressed +What: /sys/class/net//statistics/tx_compressed Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -161,7 +161,7 @@ Description: this might only be relevant for devices that support compression (e.g: PPP). -What: /sys/class//statistics/tx_dropped +What: /sys/class/net//statistics/tx_dropped Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -170,7 +170,7 @@ Description: See the driver for the exact reasons as to why the packets were dropped. -What: /sys/class//statistics/tx_errors +What: /sys/class/net//statistics/tx_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -179,7 +179,7 @@ Description: a network device. See the driver for the exact reasons as to why the packets were dropped. -What: /sys/class//statistics/tx_fifo_errors +What: /sys/class/net//statistics/tx_fifo_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -188,7 +188,7 @@ Description: FIFO error. See the driver for the exact reasons as to why the packets were dropped. -What: /sys/class//statistics/tx_heartbeat_errors +What: /sys/class/net//statistics/tx_heartbeat_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -197,7 +197,7 @@ Description: reported as heartbeat errors. See the driver for the exact reasons as to why the packets were dropped. -What: /sys/class//statistics/tx_packets +What: /sys/class/net//statistics/tx_packets Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -206,7 +206,7 @@ Description: device. See the driver for whether this reports the number of all attempted or successful transmissions. -What: /sys/class//statistics/tx_window_errors +What: /sys/class/net//statistics/tx_window_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org diff --git a/Documentation/ABI/testing/sysfs-class-usb_power_delivery b/Documentation/ABI/testing/sysfs-class-usb_power_delivery index 1bf9d1d790..61d233c320 100644 --- a/Documentation/ABI/testing/sysfs-class-usb_power_delivery +++ b/Documentation/ABI/testing/sysfs-class-usb_power_delivery @@ -124,6 +124,13 @@ Contact: Heikki Krogerus Description: The voltage the supply supports in millivolts. +What: /sys/class/usb_power_delivery/.../source-capabilities/:fixed_supply/peak_current +Date: October 2023 +Contact: Heikki Krogerus +Description: + This file shows the value of the Fixed Power Source Peak Current + Capability field. + What: /sys/class/usb_power_delivery/.../source-capabilities/:fixed_supply/maximum_current Date: May 2022 Contact: Heikki Krogerus diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 7ecd5c8161..a1db6db475 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -354,9 +354,6 @@ Description: Parameters for the CPU cache attributes - ReadWriteAllocate: both writeallocate and readallocate - attributes: - LEGACY used only on IA64 and is same as write_policy - coherency_line_size: the minimum amount of data in bytes that gets transferred from memory to cache diff --git a/Documentation/ABI/testing/sysfs-driver-habanalabs b/Documentation/ABI/testing/sysfs-driver-habanalabs index 1b98b6503b..c63ca1ad50 100644 --- a/Documentation/ABI/testing/sysfs-driver-habanalabs +++ b/Documentation/ABI/testing/sysfs-driver-habanalabs @@ -1,4 +1,4 @@ -What: /sys/class/habanalabs/hl/armcp_kernel_ver +What: /sys/class/accel/accel/device/armcp_kernel_ver Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org @@ -6,7 +6,7 @@ Description: Version of the Linux kernel running on the device's CPU. Will be DEPRECATED in Linux kernel version 5.10, and be replaced with cpucp_kernel_ver -What: /sys/class/habanalabs/hl/armcp_ver +What: /sys/class/accel/accel/device/armcp_ver Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org @@ -14,7 +14,7 @@ Description: Version of the application running on the device's CPU Will be DEPRECATED in Linux kernel version 5.10, and be replaced with cpucp_ver -What: /sys/class/habanalabs/hl/clk_max_freq_mhz +What: /sys/class/accel/accel/device/clk_max_freq_mhz Date: Jun 2019 KernelVersion: 5.7 Contact: ogabbay@kernel.org @@ -24,58 +24,58 @@ Description: Allows the user to set the maximum clock frequency, in MHz. frequency value of the device clock. This property is valid only for the Gaudi ASIC family -What: /sys/class/habanalabs/hl/clk_cur_freq_mhz +What: /sys/class/accel/accel/device/clk_cur_freq_mhz Date: Jun 2019 KernelVersion: 5.7 Contact: ogabbay@kernel.org Description: Displays the current frequency, in MHz, of the device clock. This property is valid only for the Gaudi ASIC family -What: /sys/class/habanalabs/hl/cpld_ver +What: /sys/class/accel/accel/device/cpld_ver Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: Version of the Device's CPLD F/W -What: /sys/class/habanalabs/hl/cpucp_kernel_ver +What: /sys/class/accel/accel/device/cpucp_kernel_ver Date: Oct 2020 KernelVersion: 5.10 Contact: ogabbay@kernel.org Description: Version of the Linux kernel running on the device's CPU -What: /sys/class/habanalabs/hl/cpucp_ver +What: /sys/class/accel/accel/device/cpucp_ver Date: Oct 2020 KernelVersion: 5.10 Contact: ogabbay@kernel.org Description: Version of the application running on the device's CPU -What: /sys/class/habanalabs/hl/device_type +What: /sys/class/accel/accel/device/device_type Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: Displays the code name of the device according to its type. The supported values are: "GOYA" -What: /sys/class/habanalabs/hl/eeprom +What: /sys/class/accel/accel/device/eeprom Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: A binary file attribute that contains the contents of the on-board EEPROM -What: /sys/class/habanalabs/hl/fuse_ver +What: /sys/class/accel/accel/device/fuse_ver Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: Displays the device's version from the eFuse -What: /sys/class/habanalabs/hl/fw_os_ver +What: /sys/class/accel/accel/device/fw_os_ver Date: Dec 2021 KernelVersion: 5.18 Contact: ogabbay@kernel.org Description: Version of the firmware OS running on the device's CPU -What: /sys/class/habanalabs/hl/hard_reset +What: /sys/class/accel/accel/device/hard_reset Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org @@ -83,14 +83,14 @@ Description: Interface to trigger a hard-reset operation for the device. Hard-reset will reset ALL internal components of the device except for the PCI interface and the internal PLLs -What: /sys/class/habanalabs/hl/hard_reset_cnt +What: /sys/class/accel/accel/device/hard_reset_cnt Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: Displays how many times the device have undergone a hard-reset operation since the driver was loaded -What: /sys/class/habanalabs/hl/high_pll +What: /sys/class/accel/accel/device/high_pll Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org @@ -98,7 +98,7 @@ Description: Allows the user to set the maximum clock frequency for MME, TPC and IC when the power management profile is set to "automatic". This property is valid only for the Goya ASIC family -What: /sys/class/habanalabs/hl/ic_clk +What: /sys/class/accel/accel/device/ic_clk Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org @@ -110,27 +110,27 @@ Description: Allows the user to set the maximum clock frequency, in Hz, of frequency value of the IC. This property is valid only for the Goya ASIC family -What: /sys/class/habanalabs/hl/ic_clk_curr +What: /sys/class/accel/accel/device/ic_clk_curr Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: Displays the current clock frequency, in Hz, of the Interconnect fabric. This property is valid only for the Goya ASIC family -What: /sys/class/habanalabs/hl/infineon_ver +What: /sys/class/accel/accel/device/infineon_ver Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: Version of the Device's power supply F/W code. Relevant only to GOYA and GAUDI -What: /sys/class/habanalabs/hl/max_power +What: /sys/class/accel/accel/device/max_power Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: Allows the user to set the maximum power consumption of the device in milliwatts. -What: /sys/class/habanalabs/hl/mme_clk +What: /sys/class/accel/accel/device/mme_clk Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org @@ -142,21 +142,21 @@ Description: Allows the user to set the maximum clock frequency, in Hz, of frequency value of the MME. This property is valid only for the Goya ASIC family -What: /sys/class/habanalabs/hl/mme_clk_curr +What: /sys/class/accel/accel/device/mme_clk_curr Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: Displays the current clock frequency, in Hz, of the MME compute engine. This property is valid only for the Goya ASIC family -What: /sys/class/habanalabs/hl/pci_addr +What: /sys/class/accel/accel/device/pci_addr Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: Displays the PCI address of the device. This is needed so the user would be able to open a device based on its PCI address -What: /sys/class/habanalabs/hl/pm_mng_profile +What: /sys/class/accel/accel/device/pm_mng_profile Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org @@ -170,19 +170,19 @@ Description: Power management profile. Values are "auto", "manual". In "auto" ic_clk, mme_clk and tpc_clk. This property is valid only for the Goya ASIC family -What: /sys/class/habanalabs/hl/preboot_btl_ver +What: /sys/class/accel/accel/device/preboot_btl_ver Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: Version of the device's preboot F/W code -What: /sys/class/habanalabs/hl/security_enabled +What: /sys/class/accel/accel/device/security_enabled Date: Oct 2022 KernelVersion: 6.1 Contact: obitton@habana.ai Description: Displays the device's security status -What: /sys/class/habanalabs/hl/soft_reset +What: /sys/class/accel/accel/device/soft_reset Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org @@ -190,14 +190,14 @@ Description: Interface to trigger a soft-reset operation for the device. Soft-reset will reset only the compute and DMA engines of the device -What: /sys/class/habanalabs/hl/soft_reset_cnt +What: /sys/class/accel/accel/device/soft_reset_cnt Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: Displays how many times the device have undergone a soft-reset operation since the driver was loaded -What: /sys/class/habanalabs/hl/status +What: /sys/class/accel/accel/device/status Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org @@ -215,13 +215,13 @@ Description: Status of the card: a compute-reset which is executed after a device release (relevant for Gaudi2 only). -What: /sys/class/habanalabs/hl/thermal_ver +What: /sys/class/accel/accel/device/thermal_ver Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: Version of the Device's thermal daemon -What: /sys/class/habanalabs/hl/tpc_clk +What: /sys/class/accel/accel/device/tpc_clk Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org @@ -233,20 +233,20 @@ Description: Allows the user to set the maximum clock frequency, in Hz, of frequency value of the TPC. This property is valid only for Goya ASIC family -What: /sys/class/habanalabs/hl/tpc_clk_curr +What: /sys/class/accel/accel/device/tpc_clk_curr Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: Displays the current clock frequency, in Hz, of the TPC compute engines. This property is valid only for the Goya ASIC family -What: /sys/class/habanalabs/hl/uboot_ver +What: /sys/class/accel/accel/device/uboot_ver Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: Version of the u-boot running on the device's CPU -What: /sys/class/habanalabs/hl/vrm_ver +What: /sys/class/accel/accel/device/vrm_ver Date: Jan 2022 KernelVersion: 5.17 Contact: ogabbay@kernel.org diff --git a/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc b/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc index a8ab58035c..c12316dfd9 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc +++ b/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc @@ -17,7 +17,7 @@ Description: Read only. Returns the firmware version of Intel MAX10 What: /sys/bus/.../drivers/intel-m10-bmc/.../mac_address Date: January 2021 KernelVersion: 5.12 -Contact: Russ Weight +Contact: Peter Colberg Description: Read only. Returns the first MAC address in a block of sequential MAC addresses assigned to the board that is managed by the Intel MAX10 BMC. It is stored in @@ -28,7 +28,7 @@ Description: Read only. Returns the first MAC address in a block What: /sys/bus/.../drivers/intel-m10-bmc/.../mac_count Date: January 2021 KernelVersion: 5.12 -Contact: Russ Weight +Contact: Peter Colberg Description: Read only. Returns the number of sequential MAC addresses assigned to the board managed by the Intel MAX10 BMC. This value is stored in FLASH and is mirrored diff --git a/Documentation/ABI/testing/sysfs-driver-qat b/Documentation/ABI/testing/sysfs-driver-qat index 96834d103a..bbf329cf0d 100644 --- a/Documentation/ABI/testing/sysfs-driver-qat +++ b/Documentation/ABI/testing/sysfs-driver-qat @@ -95,3 +95,49 @@ Description: (RW) This configuration option provides a way to force the device i 0 This attribute is only available for qat_4xxx devices. + +What: /sys/bus/pci/devices//qat/rp2srv +Date: January 2024 +KernelVersion: 6.7 +Contact: qat-linux@intel.com +Description: + (RW) This attribute provides a way for a user to query a + specific ring pair for the type of service that it is currently + configured for. + + When written to, the value is cached and used to perform the + read operation. Allowed values are in the range 0 to N-1, where + N is the max number of ring pairs supported by a device. This + can be queried using the attribute qat/num_rps. + + A read returns the service associated to the ring pair queried. + + The values are: + + * dc: the ring pair is configured for running compression services + * sym: the ring pair is configured for running symmetric crypto + services + * asym: the ring pair is configured for running asymmetric crypto + services + + Example usage:: + + # echo 1 > /sys/bus/pci/devices//qat/rp2srv + # cat /sys/bus/pci/devices//qat/rp2srv + sym + + This attribute is only available for qat_4xxx devices. + +What: /sys/bus/pci/devices//qat/num_rps +Date: January 2024 +KernelVersion: 6.7 +Contact: qat-linux@intel.com +Description: + (RO) Returns the number of ring pairs that a single device has. + + Example usage:: + + # cat /sys/bus/pci/devices//qat/num_rps + 64 + + This attribute is only available for qat_4xxx devices. diff --git a/Documentation/ABI/testing/sysfs-driver-qat_ras b/Documentation/ABI/testing/sysfs-driver-qat_ras new file mode 100644 index 0000000000..176dea1e9c --- /dev/null +++ b/Documentation/ABI/testing/sysfs-driver-qat_ras @@ -0,0 +1,41 @@ +What: /sys/bus/pci/devices//qat_ras/errors_correctable +Date: January 2024 +KernelVersion: 6.7 +Contact: qat-linux@intel.com +Description: (RO) Reports the number of correctable errors detected by the device. + + This attribute is only available for qat_4xxx devices. + +What: /sys/bus/pci/devices//qat_ras/errors_nonfatal +Date: January 2024 +KernelVersion: 6.7 +Contact: qat-linux@intel.com +Description: (RO) Reports the number of non fatal errors detected by the device. + + This attribute is only available for qat_4xxx devices. + +What: /sys/bus/pci/devices//qat_ras/errors_fatal +Date: January 2024 +KernelVersion: 6.7 +Contact: qat-linux@intel.com +Description: (RO) Reports the number of fatal errors detected by the device. + + This attribute is only available for qat_4xxx devices. + +What: /sys/bus/pci/devices//qat_ras/reset_error_counters +Date: January 2024 +KernelVersion: 6.7 +Contact: qat-linux@intel.com +Description: (WO) Write to resets all error counters of a device. + + The following example reports how to reset the counters:: + + # echo 1 > /sys/bus/pci/devices//qat_ras/reset_error_counters + # cat /sys/bus/pci/devices//qat_ras/errors_correctable + 0 + # cat /sys/bus/pci/devices//qat_ras/errors_nonfatal + 0 + # cat /sys/bus/pci/devices//qat_ras/errors_fatal + 0 + + This attribute is only available for qat_4xxx devices. diff --git a/Documentation/ABI/testing/sysfs-driver-qat_rl b/Documentation/ABI/testing/sysfs-driver-qat_rl new file mode 100644 index 0000000000..8c282ae315 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-driver-qat_rl @@ -0,0 +1,226 @@ +What: /sys/bus/pci/devices//qat_rl/sla_op +Date: January 2024 +KernelVersion: 6.7 +Contact: qat-linux@intel.com +Description: + (WO) This attribute is used to perform an operation on an SLA. + The supported operations are: add, update, rm, rm_all, and get. + + Input values must be filled through the associated attribute in + this group before a write to this file. + If the operation completes successfully, the associated + attributes will be updated. + The associated attributes are: cir, pir, srv, rp, and id. + + Supported operations: + + * add: Creates a new SLA with the provided inputs from user. + * Inputs: cir, pir, srv, and rp + * Output: id + + * get: Returns the configuration of the specified SLA in id attribute + * Inputs: id + * Outputs: cir, pir, srv, and rp + + * update: Updates the SLA with new values set in the following attributes + * Inputs: id, cir, and pir + + * rm: Removes the specified SLA in the id attribute. + * Inputs: id + + * rm_all: Removes all the configured SLAs. + * Inputs: None + + This attribute is only available for qat_4xxx devices. + +What: /sys/bus/pci/devices//qat_rl/rp +Date: January 2024 +KernelVersion: 6.7 +Contact: qat-linux@intel.com +Description: + (RW) When read, reports the current assigned ring pairs for the + queried SLA. + When wrote to, configures the ring pairs associated to a new SLA. + + The value is a 64-bit bit mask and is written/displayed in hex. + Each bit of this mask represents a single ring pair i.e., + bit 1 == ring pair id 0; bit 3 == ring pair id 2. + + Selected ring pairs must to be assigned to a single service, + i.e. the one provided with the srv attribute. The service + assigned to a certain ring pair can be checked by querying + the attribute qat/rp2srv. + + The maximum number of ring pairs is 4 per SLA. + + Applicability in sla_op: + + * WRITE: add operation + * READ: get operation + + Example usage:: + + ## Read + # echo 4 > /sys/bus/pci/devices//qat_rl/id + # cat /sys/bus/pci/devices//qat_rl/rp + 0x5 + + ## Write + # echo 0x5 > /sys/bus/pci/devices//qat_rl/rp + + This attribute is only available for qat_4xxx devices. + +What: /sys/bus/pci/devices//qat_rl/id +Date: January 2024 +KernelVersion: 6.7 +Contact: qat-linux@intel.com +Description: + (RW) If written to, the value is used to retrieve a particular + SLA and operate on it. + This is valid only for the following operations: update, rm, + and get. + A read of this attribute is only guaranteed to have correct data + after creation of an SLA. + + Applicability in sla_op: + + * WRITE: rm and update operations + * READ: add and get operations + + Example usage:: + + ## Read + ## Set attributes e.g. cir, pir, srv, etc + # echo "add" > /sys/bus/pci/devices//qat_rl/sla_op + # cat /sys/bus/pci/devices//qat_rl/id + 4 + + ## Write + # echo 7 > /sys/bus/pci/devices//qat_rl/id + # echo "get" > /sys/bus/pci/devices//qat_rl/sla_op + # cat /sys/bus/pci/devices//qat_rl/rp + 0x5 ## ring pair ID 0 and ring pair ID 2 + + This attribute is only available for qat_4xxx devices. + +What: /sys/bus/pci/devices//qat_rl/cir +Date: January 2024 +KernelVersion: 6.7 +Contact: qat-linux@intel.com +Description: + (RW) Committed information rate (CIR). Rate guaranteed to be + achieved by a particular SLA. The value is expressed in + permille scale, i.e. 1000 refers to the maximum device + throughput for a selected service. + + After sending a "get" to sla_op, this will be populated with the + CIR for that queried SLA. + Write to this file before sending an "add/update" sla_op, to set + the SLA to the specified value. + + Applicability in sla_op: + + * WRITE: add and update operations + * READ: get operation + + Example usage:: + + ## Write + # echo 500 > /sys/bus/pci/devices//qat_rl/cir + # echo "add" /sys/bus/pci/devices//qat_rl/sla_op + + ## Read + # echo 4 > /sys/bus/pci/devices//qat_rl/id + # echo "get" > /sys/bus/pci/devices//qat_rl/sla_op + # cat /sys/bus/pci/devices//qat_rl/cir + 500 + + This attribute is only available for qat_4xxx devices. + +What: /sys/bus/pci/devices//qat_rl/pir +Date: January 2024 +KernelVersion: 6.7 +Contact: qat-linux@intel.com +Description: + (RW) Peak information rate (PIR). The maximum rate that can be + achieved by that particular SLA. An SLA can reach a value + between CIR and PIR when the device is not fully utilized by + requests from other users (assigned to different SLAs). + + After sending a "get" to sla_op, this will be populated with the + PIR for that queried SLA. + Write to this file before sending an "add/update" sla_op, to set + the SLA to the specified value. + + Applicability in sla_op: + + * WRITE: add and update operations + * READ: get operation + + Example usage:: + + ## Write + # echo 750 > /sys/bus/pci/devices//qat_rl/pir + # echo "add" > /sys/bus/pci/devices//qat_rl/sla_op + + ## Read + # echo 4 > /sys/bus/pci/devices//qat_rl/id + # echo "get" > /sys/bus/pci/devices//qat_rl/sla_op + # cat /sys/bus/pci/devices//qat_rl/pir + 750 + + This attribute is only available for qat_4xxx devices. + +What: /sys/bus/pci/devices//qat_rl/srv +Date: January 2024 +KernelVersion: 6.7 +Contact: qat-linux@intel.com +Description: + (RW) Service (SRV). Represents the service (sym, asym, dc) + associated to an SLA. + Can be written to or queried to set/show the SRV type for an SLA. + The SRV attribute is used to specify the SRV type before adding + an SLA. After an SLA is configured, reports the service + associated to that SLA. + + Applicability in sla_op: + + * WRITE: add and update operations + * READ: get operation + + Example usage:: + + ## Write + # echo "dc" > /sys/bus/pci/devices//qat_rl/srv + # echo "add" > /sys/bus/pci/devices//qat_rl/sla_op + # cat /sys/bus/pci/devices//qat_rl/id + 4 + + ## Read + # echo 4 > /sys/bus/pci/devices//qat_rl/id + # echo "get" > /sys/bus/pci/devices//qat_rl/sla_op + # cat /sys/bus/pci/devices//qat_rl/srv + dc + + This attribute is only available for qat_4xxx devices. + +What: /sys/bus/pci/devices//qat_rl/cap_rem +Date: January 2024 +KernelVersion: 6.7 +Contact: qat-linux@intel.com +Description: + (RW) This file will return the remaining capability for a + particular service/sla. This is the remaining value that a new + SLA can be set to or a current SLA can be increased with. + + Example usage:: + + # echo "asym" > /sys/bus/pci/devices//qat_rl/cap_rem + # cat /sys/bus/pci/devices//qat_rl/cap_rem + 250 + # echo 250 > /sys/bus/pci/devices//qat_rl/cir + # echo "add" > /sys/bus/pci/devices//qat_rl/sla_op + # cat /sys/bus/pci/devices//qat_rl/cap_rem + 0 + + This attribute is only available for qat_4xxx devices. diff --git a/Documentation/ABI/testing/sysfs-firmware-dmi-entries b/Documentation/ABI/testing/sysfs-firmware-dmi-entries index fe0289c877..b6c23807b8 100644 --- a/Documentation/ABI/testing/sysfs-firmware-dmi-entries +++ b/Documentation/ABI/testing/sysfs-firmware-dmi-entries @@ -2,7 +2,7 @@ What: /sys/firmware/dmi/entries/ Date: February 2011 Contact: Mike Waychison Description: - Many machines' firmware (x86 and ia64) export DMI / + Many machines' firmware (x86 and arm64) export DMI / SMBIOS tables to the operating system. Getting at this information is often valuable to userland, especially in cases where there are OEM extensions used. diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-damon b/Documentation/ABI/testing/sysfs-kernel-mm-damon index 420b30f09c..b35649a46a 100644 --- a/Documentation/ABI/testing/sysfs-kernel-mm-damon +++ b/Documentation/ABI/testing/sysfs-kernel-mm-damon @@ -151,6 +151,13 @@ Contact: SeongJae Park Description: Writing to and reading from this file sets and gets the action of the scheme. +What: /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes//apply_interval_us +Date: Sep 2023 +Contact: SeongJae Park +Description: Writing a value to this file sets the action apply interval of + the scheme in microseconds. Reading this file returns the + value. + What: /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes//access_pattern/sz/min Date: Mar 2022 Contact: SeongJae Park diff --git a/Documentation/ABI/testing/sysfs-tty b/Documentation/ABI/testing/sysfs-tty index 820e412d38..895c47f05f 100644 --- a/Documentation/ABI/testing/sysfs-tty +++ b/Documentation/ABI/testing/sysfs-tty @@ -87,19 +87,22 @@ What: /sys/class/tty/ttyS/close_delay Date: October 2012 Contact: Alan Cox Description: - Show the closing delay time for this port in ms. + Show the closing delay time for this port in centiseconds. - These sysfs values expose the TIOCGSERIAL interface via - sysfs rather than via ioctls. + These sysfs values expose the TIOCGSERIAL interface via + sysfs rather than via ioctls. What: /sys/class/tty/ttyS/closing_wait Date: October 2012 Contact: Alan Cox Description: - Show the close wait time for this port in ms. + Show the close wait time for this port in centiseconds. - These sysfs values expose the TIOCGSERIAL interface via - sysfs rather than via ioctls. + Waiting forever is represented as 0. If waiting on close is + disabled then the value is 65535. + + These sysfs values expose the TIOCGSERIAL interface via + sysfs rather than via ioctls. What: /sys/class/tty/ttyS/custom_divisor Date: October 2012 diff --git a/Documentation/PCI/pci-error-recovery.rst b/Documentation/PCI/pci-error-recovery.rst index 0c7552a00c..42e1e78353 100644 --- a/Documentation/PCI/pci-error-recovery.rst +++ b/Documentation/PCI/pci-error-recovery.rst @@ -364,7 +364,7 @@ Note, however, not all failures are truly "permanent". Some are caused by over-heating, some by a poorly seated card. Many PCI error events are caused by software bugs, e.g. DMAs to wild addresses or bogus split transactions due to programming -errors. See the discussion in Documentation/powerpc/eeh-pci-error-recovery.rst +errors. See the discussion in Documentation/arch/powerpc/eeh-pci-error-recovery.rst for additional detail on real-life experience of the causes of software errors. @@ -404,7 +404,7 @@ That is, the recovery API only requires that: .. note:: Implementation details for the powerpc platform are discussed in - the file Documentation/powerpc/eeh-pci-error-recovery.rst + the file Documentation/arch/powerpc/eeh-pci-error-recovery.rst As of this writing, there is a growing list of device drivers with patches implementing error recovery. Not all of these patches are in diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst index 93d899d532..414f8a2012 100644 --- a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst +++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst @@ -181,7 +181,7 @@ operations is carried out at several levels: of this wait (or series of waits, as the case may be) is to permit a concurrent CPU-hotplug operation to complete. #. In the case of RCU-sched, one of the last acts of an outgoing CPU is - to invoke ``rcu_report_dead()``, which reports a quiescent state for + to invoke ``rcutree_report_cpu_dead()``, which reports a quiescent state for that CPU. However, this is likely paranoia-induced redundancy. +-----------------------------------------------------------------------+ diff --git a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-callback-registry.svg b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-callback-registry.svg index 7ac6f92698..63eff86717 100644 --- a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-callback-registry.svg +++ b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-callback-registry.svg @@ -564,15 +564,6 @@ font-size="192" id="text202-7-9-6" style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcutree_migrate_callbacks() - rcu_migrate_callbacks() rcu_report_dead() + style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcutree_report_cpu_dead() rcu_cpu_starting() + xml:space="preserve">rcutree_report_cpu_starting() rcutree_migrate_callbacks() - rcu_migrate_callbacks() rcu_report_dead() + style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcutree_report_cpu_dead() rcu_cpu_starting() + xml:space="preserve">rcutree_report_cpu_starting() rcu_report_dead() + style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcutree_report_cpu_dead() rcu_cpu_starting() + xml:space="preserve">rcutree_report_cpu_starting() \ --append="root= " -Please note, that --args-linux does not need to be specified for ia64. -It is planned to make this a no-op on that architecture, but for now -it should be omitted - Following are the arch specific command line options to be used while loading dump-capture kernel. -For i386, x86_64 and ia64: +For i386 and x86_64: "1 irqpoll nr_cpus=1 reset_devices" diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst index 599e8d3bcb..78e4d2e7ba 100644 --- a/Documentation/admin-guide/kdump/vmcoreinfo.rst +++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst @@ -413,36 +413,6 @@ of a higher page table lookup overhead, and also consumes more page table space per process. Used to check whether PAE was enabled in the crash kernel when converting virtual addresses to physical addresses. -ia64 -==== - -pgdat_list|(pgdat_list, MAX_NUMNODES) -------------------------------------- - -pg_data_t array storing all NUMA nodes information. MAX_NUMNODES -indicates the number of the nodes. - -node_memblk|(node_memblk, NR_NODE_MEMBLKS) ------------------------------------------- - -List of node memory chunks. Filled when parsing the SRAT table to obtain -information about memory nodes. NR_NODE_MEMBLKS indicates the number of -node memory chunks. - -These values are used to compute the number of nodes the crashed kernel used. - -node_memblk_s|(node_memblk_s, start_paddr)|(node_memblk_s, size) ----------------------------------------------------------------- - -The size of a struct node_memblk_s and the offsets of the -node_memblk_s's members. Used to compute the number of nodes. - -PGTABLE_3|PGTABLE_4 -------------------- - -User-space tools need to know whether the crash kernel was in 3-level or -4-level paging mode. Used to distinguish the page table. - ARM64 ===== diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 41644336e3..b72e2049c4 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -580,6 +580,10 @@ named mounts. Specifying both "all" and "named" disables all v1 hierarchies. + cgroup_favordynmods= [KNL] Enable or Disable favordynmods. + Format: { "true" | "false" } + Defaults to the value of CONFIG_CGROUP_FAVOR_DYNMODS. + cgroup.memory= [KNL] Pass options to the cgroup memory controller. Format: nosocket -- Disable socket memory accounting. @@ -1331,6 +1335,7 @@ earlyprintk=dbgp[debugController#] earlyprintk=pciserial[,force],bus:device.function[,baudrate] earlyprintk=xdbc[xhciController#] + earlyprintk=bios earlyprintk is useful when the kernel crashes before the normal console is initialized. It is not enabled by @@ -1361,6 +1366,8 @@ The sclp output can only be used on s390. + The bios output can only be used on SuperH. + The optional "force" to "pciserial" enables use of a PCI device even when its classcode is not of the UART class. @@ -1449,7 +1456,7 @@ See comment before function elanfreq_setup() in arch/x86/kernel/cpu/cpufreq/elanfreq.c. - elfcorehdr=[size[KMG]@]offset[KMG] [IA64,PPC,SH,X86,S390] + elfcorehdr=[size[KMG]@]offset[KMG] [PPC,SH,X86,S390] Specifies physical address of start of kernel core image elf header and optionally the size. Generally kexec loader will pass this option to capture kernel. @@ -1512,12 +1519,6 @@ floppy= [HW] See Documentation/admin-guide/blockdev/floppy.rst. - force_pal_cache_flush - [IA-64] Avoid check_sal_cache_flush which may hang on - buggy SAL_CACHE_FLUSH implementations. Using this - parameter will force ia64_sal_cache_flush to call - ia64_pal_cache_flush instead of SAL_CACHE_FLUSH. - forcepae [X86-32] Forcefully enable Physical Address Extension (PAE). Many Pentium M systems disable PAE but may have a @@ -1893,6 +1894,12 @@ 0 -- machine default 1 -- force brightness inversion + ia32_emulation= [X86-64] + Format: + When true, allows loading 32-bit programs and executing 32-bit + syscalls, essentially overriding IA32_EMULATION_DEFAULT_DISABLED at + boot time. When false, unconditionally disables IA32 emulation. + icn= [HW,ISDN] Format: [,[,[,]]] @@ -2220,7 +2227,7 @@ forcing Dual Address Cycle for PCI cards supporting greater than 32-bit addressing. - iommu.strict= [ARM64, X86] Configure TLB invalidation behaviour + iommu.strict= [ARM64, X86, S390] Configure TLB invalidation behaviour Format: { "0" | "1" } 0 - Lazy mode. Request that DMA unmap operations use deferred @@ -2913,6 +2920,38 @@ to extract confidential information from the kernel are also disabled. + locktorture.acq_writer_lim= [KNL] + Set the time limit in jiffies for a lock + acquisition. Acquisitions exceeding this limit + will result in a splat once they do complete. + + locktorture.bind_readers= [KNL] + Specify the list of CPUs to which the readers are + to be bound. + + locktorture.bind_writers= [KNL] + Specify the list of CPUs to which the writers are + to be bound. + + locktorture.call_rcu_chains= [KNL] + Specify the number of self-propagating call_rcu() + chains to set up. These are used to ensure that + there is a high probability of an RCU grace period + in progress at any given time. Defaults to 0, + which disables these call_rcu() chains. + + locktorture.long_hold= [KNL] + Specify the duration in milliseconds for the + occasional long-duration lock hold time. Defaults + to 100 milliseconds. Select 0 to disable. + + locktorture.nested_locks= [KNL] + Specify the maximum lock nesting depth that + locktorture is to exercise, up to a limit of 8 + (MAX_NESTED_LOCKS). Specify zero to disable. + Note that this parameter is ineffective on types + of locks that do not support nested acquisition. + locktorture.nreaders_stress= [KNL] Set the number of locking read-acquisition kthreads. Defaults to being automatically set based on the @@ -2928,6 +2967,25 @@ Set time (s) between CPU-hotplug operations, or zero to disable CPU-hotplug testing. + locktorture.rt_boost= [KNL] + Do periodic testing of real-time lock priority + boosting. Select 0 to disable, 1 to boost + only rt_mutex, and 2 to boost unconditionally. + Defaults to 2, which might seem to be an + odd choice, but which should be harmless for + non-real-time spinlocks, due to their disabling + of preemption. Note that non-realtime mutexes + disable boosting. + + locktorture.rt_boost_factor= [KNL] + Number that determines how often and for how + long priority boosting is exercised. This is + scaled down by the number of writers, so that the + number of boosts per unit time remains roughly + constant as the number of writers increases. + On the other hand, the duration of each boost + increases with the number of writers. + locktorture.shuffle_interval= [KNL] Set task-shuffle interval (jiffies). Shuffling tasks allows some CPUs to go into dyntick-idle @@ -2950,13 +3008,13 @@ locktorture.torture_type= [KNL] Specify the locking implementation to test. + locktorture.verbose= [KNL] + Enable additional printk() statements. + locktorture.writer_fifo= [KNL] Run the write-side locktorture kthreads at sched_set_fifo() real-time priority. - locktorture.verbose= [KNL] - Enable additional printk() statements. - logibm.irq= [HW,MOUSE] Logitech Bus Mouse Driver Format: @@ -3275,6 +3333,11 @@ mga= [HW,DRM] + microcode.force_minrev= [X86] + Format: + Enable or disable the microcode minimal revision + enforcement for the runtime microcode loader. + min_addr=nn[KMG] [KNL,BOOT,IA-64] All physical memory below this physical address is ignored. @@ -3533,6 +3596,13 @@ [NFS] set the TCP port on which the NFSv4 callback channel should listen. + nfs.delay_retrans= + [NFS] specifies the number of times the NFSv4 client + retries the request before returning an EAGAIN error, + after a reply of NFS4ERR_DELAY from the server. + Only applies if the softerr mount option is enabled, + and the specified value is >= 0. + nfs.enable_ino64= [NFS] enable 64-bit inode numbers. If zero, the NFS client will fake up a 32-bit inode @@ -4769,6 +4839,13 @@ Set maximum number of finished RCU callbacks to process in one batch. + rcutree.do_rcu_barrier= [KNL] + Request a call to rcu_barrier(). This is + throttled so that userspace tests can safely + hammer on the sysfs variable if they so choose. + If triggered before the RCU grace-period machinery + is fully active, this will error out with EAGAIN. + rcutree.dump_tree= [KNL] Dump the structure of the rcu_node combining tree out at early boot. This is used for diagnostic @@ -5225,6 +5302,12 @@ Dump ftrace buffer after reporting RCU CPU stall warning. + rcupdate.rcu_cpu_stall_notifiers= [KNL] + Provide RCU CPU stall notifiers, but see the + warnings in the RCU_CPU_STALL_NOTIFIER Kconfig + option's help text. TL;DR: You almost certainly + do not want rcupdate.rcu_cpu_stall_notifiers. + rcupdate.rcu_cpu_stall_suppress= [KNL] Suppress RCU CPU stall warning messages. @@ -5422,6 +5505,12 @@ test until boot completes in order to avoid interference. + refscale.lookup_instances= [KNL] + Number of data elements to use for the forms of + SLAB_TYPESAFE_BY_RCU testing. A negative number + is negated and multiplied by nr_cpu_ids, while + zero specifies nr_cpu_ids. + refscale.loops= [KNL] Set the number of loops over the synchronization primitive under test. Increasing this number @@ -5611,9 +5700,10 @@ s390_iommu= [HW,S390] Set s390 IOTLB flushing mode strict - With strict flushing every unmap operation will result in - an IOTLB flush. Default is lazy flushing before reuse, - which is faster. + With strict flushing every unmap operation will result + in an IOTLB flush. Default is lazy flushing before + reuse, which is faster. Deprecated, equivalent to + iommu.strict=1. s390_iommu_aperture= [KNL,S390] Specifies the size of the per device DMA address space diff --git a/Documentation/admin-guide/laptops/thinkpad-acpi.rst b/Documentation/admin-guide/laptops/thinkpad-acpi.rst index e27a1c3f63..98d3040101 100644 --- a/Documentation/admin-guide/laptops/thinkpad-acpi.rst +++ b/Documentation/admin-guide/laptops/thinkpad-acpi.rst @@ -53,6 +53,7 @@ detailed description): - Lap mode sensor - Setting keyboard language - WWAN Antenna type + - Auxmac A compatibility table by model and feature is maintained on the web site, http://ibm-acpi.sf.net/. I appreciate any success or failure @@ -1511,6 +1512,25 @@ Currently 2 antenna types are supported as mentioned below: The property is read-only. If the platform doesn't have support the sysfs class is not created. +Auxmac +------ + +sysfs: auxmac + +Some newer Thinkpads have a feature called MAC Address Pass-through. This +feature is implemented by the system firmware to provide a system unique MAC, +that can override a dock or USB ethernet dongle MAC, when connected to a +network. This property enables user-space to easily determine the MAC address +if the feature is enabled. + +The values of this auxiliary MAC are: + + cat /sys/devices/platform/thinkpad_acpi/auxmac + +If the feature is disabled, the value will be 'disabled'. + +This property is read-only. + Adaptive keyboard ----------------- diff --git a/Documentation/admin-guide/media/mgb4.rst b/Documentation/admin-guide/media/mgb4.rst new file mode 100644 index 0000000000..2977f74d7e --- /dev/null +++ b/Documentation/admin-guide/media/mgb4.rst @@ -0,0 +1,374 @@ +.. SPDX-License-Identifier: GPL-2.0 + +==================== +mgb4 sysfs interface +==================== + +The mgb4 driver provides a sysfs interface, that is used to configure video +stream related parameters (some of them must be set properly before the v4l2 +device can be opened) and obtain the video device/stream status. + +There are two types of parameters - global / PCI card related, found under +``/sys/class/video4linux/videoX/device`` and module specific found under +``/sys/class/video4linux/videoX``. + + +Global (PCI card) parameters +============================ + +**module_type** (R): + Module type. + + | 0 - No module present + | 1 - FPDL3 + | 2 - GMSL + +**module_version** (R): + Module version number. Zero in case of a missing module. + +**fw_type** (R): + Firmware type. + + | 1 - FPDL3 + | 2 - GMSL + +**fw_version** (R): + Firmware version number. + +**serial_number** (R): + Card serial number. The format is:: + + PRODUCT-REVISION-SERIES-SERIAL + + where each component is a 8b number. + + +Common FPDL3/GMSL input parameters +================================== + +**input_id** (R): + Input number ID, zero based. + +**oldi_lane_width** (RW): + Number of deserializer output lanes. + + | 0 - single + | 1 - dual (default) + +**color_mapping** (RW): + Mapping of the incoming bits in the signal to the colour bits of the pixels. + + | 0 - OLDI/JEIDA + | 1 - SPWG/VESA (default) + +**link_status** (R): + Video link status. If the link is locked, chips are properly connected and + communicating at the same speed and protocol. The link can be locked without + an active video stream. + + A value of 0 is equivalent to the V4L2_IN_ST_NO_SYNC flag of the V4L2 + VIDIOC_ENUMINPUT status bits. + + | 0 - unlocked + | 1 - locked + +**stream_status** (R): + Video stream status. A stream is detected if the link is locked, the input + pixel clock is running and the DE signal is moving. + + A value of 0 is equivalent to the V4L2_IN_ST_NO_SIGNAL flag of the V4L2 + VIDIOC_ENUMINPUT status bits. + + | 0 - not detected + | 1 - detected + +**video_width** (R): + Video stream width. This is the actual width as detected by the HW. + + The value is identical to what VIDIOC_QUERY_DV_TIMINGS returns in the width + field of the v4l2_bt_timings struct. + +**video_height** (R): + Video stream height. This is the actual height as detected by the HW. + + The value is identical to what VIDIOC_QUERY_DV_TIMINGS returns in the height + field of the v4l2_bt_timings struct. + +**vsync_status** (R): + The type of VSYNC pulses as detected by the video format detector. + + The value is equivalent to the flags returned by VIDIOC_QUERY_DV_TIMINGS in + the polarities field of the v4l2_bt_timings struct. + + | 0 - active low + | 1 - active high + | 2 - not available + +**hsync_status** (R): + The type of HSYNC pulses as detected by the video format detector. + + The value is equivalent to the flags returned by VIDIOC_QUERY_DV_TIMINGS in + the polarities field of the v4l2_bt_timings struct. + + | 0 - active low + | 1 - active high + | 2 - not available + +**vsync_gap_length** (RW): + If the incoming video signal does not contain synchronization VSYNC and + HSYNC pulses, these must be generated internally in the FPGA to achieve + the correct frame ordering. This value indicates, how many "empty" pixels + (pixels with deasserted Data Enable signal) are necessary to generate the + internal VSYNC pulse. + +**hsync_gap_length** (RW): + If the incoming video signal does not contain synchronization VSYNC and + HSYNC pulses, these must be generated internally in the FPGA to achieve + the correct frame ordering. This value indicates, how many "empty" pixels + (pixels with deasserted Data Enable signal) are necessary to generate the + internal HSYNC pulse. The value must be greater than 1 and smaller than + vsync_gap_length. + +**pclk_frequency** (R): + Input pixel clock frequency in kHz. + + The value is identical to what VIDIOC_QUERY_DV_TIMINGS returns in + the pixelclock field of the v4l2_bt_timings struct. + + *Note: The frequency_range parameter must be set properly first to get + a valid frequency here.* + +**hsync_width** (R): + Width of the HSYNC signal in PCLK clock ticks. + + The value is identical to what VIDIOC_QUERY_DV_TIMINGS returns in + the hsync field of the v4l2_bt_timings struct. + +**vsync_width** (R): + Width of the VSYNC signal in PCLK clock ticks. + + The value is identical to what VIDIOC_QUERY_DV_TIMINGS returns in + the vsync field of the v4l2_bt_timings struct. + +**hback_porch** (R): + Number of PCLK pulses between deassertion of the HSYNC signal and the first + valid pixel in the video line (marked by DE=1). + + The value is identical to what VIDIOC_QUERY_DV_TIMINGS returns in + the hbackporch field of the v4l2_bt_timings struct. + +**hfront_porch** (R): + Number of PCLK pulses between the end of the last valid pixel in the video + line (marked by DE=1) and assertion of the HSYNC signal. + + The value is identical to what VIDIOC_QUERY_DV_TIMINGS returns in + the hfrontporch field of the v4l2_bt_timings struct. + +**vback_porch** (R): + Number of video lines between deassertion of the VSYNC signal and the video + line with the first valid pixel (marked by DE=1). + + The value is identical to what VIDIOC_QUERY_DV_TIMINGS returns in + the vbackporch field of the v4l2_bt_timings struct. + +**vfront_porch** (R): + Number of video lines between the end of the last valid pixel line (marked + by DE=1) and assertion of the VSYNC signal. + + The value is identical to what VIDIOC_QUERY_DV_TIMINGS returns in + the vfrontporch field of the v4l2_bt_timings struct. + +**frequency_range** (RW) + PLL frequency range of the OLDI input clock generator. The PLL frequency is + derived from the Pixel Clock Frequency (PCLK) and is equal to PCLK if + oldi_lane_width is set to "single" and PCLK/2 if oldi_lane_width is set to + "dual". + + | 0 - PLL < 50MHz (default) + | 1 - PLL >= 50MHz + + *Note: This parameter can not be changed while the input v4l2 device is + open.* + + +Common FPDL3/GMSL output parameters +=================================== + +**output_id** (R): + Output number ID, zero based. + +**video_source** (RW): + Output video source. If set to 0 or 1, the source is the corresponding card + input and the v4l2 output devices are disabled. If set to 2 or 3, the source + is the corresponding v4l2 video output device. The default is + the corresponding v4l2 output, i.e. 2 for OUT1 and 3 for OUT2. + + | 0 - input 0 + | 1 - input 1 + | 2 - v4l2 output 0 + | 3 - v4l2 output 1 + + *Note: This parameter can not be changed while ANY of the input/output v4l2 + devices is open.* + +**display_width** (RW): + Display width. There is no autodetection of the connected display, so the + proper value must be set before the start of streaming. The default width + is 1280. + + *Note: This parameter can not be changed while the output v4l2 device is + open.* + +**display_height** (RW): + Display height. There is no autodetection of the connected display, so the + proper value must be set before the start of streaming. The default height + is 640. + + *Note: This parameter can not be changed while the output v4l2 device is + open.* + +**frame_rate** (RW): + Output video frame rate in frames per second. The default frame rate is + 60Hz. + +**hsync_polarity** (RW): + HSYNC signal polarity. + + | 0 - active low (default) + | 1 - active high + +**vsync_polarity** (RW): + VSYNC signal polarity. + + | 0 - active low (default) + | 1 - active high + +**de_polarity** (RW): + DE signal polarity. + + | 0 - active low + | 1 - active high (default) + +**pclk_frequency** (RW): + Output pixel clock frequency. Allowed values are between 25000-190000(kHz) + and there is a non-linear stepping between two consecutive allowed + frequencies. The driver finds the nearest allowed frequency to the given + value and sets it. When reading this property, you get the exact + frequency set by the driver. The default frequency is 70000kHz. + + *Note: This parameter can not be changed while the output v4l2 device is + open.* + +**hsync_width** (RW): + Width of the HSYNC signal in pixels. The default value is 16. + +**vsync_width** (RW): + Width of the VSYNC signal in video lines. The default value is 2. + +**hback_porch** (RW): + Number of PCLK pulses between deassertion of the HSYNC signal and the first + valid pixel in the video line (marked by DE=1). The default value is 32. + +**hfront_porch** (RW): + Number of PCLK pulses between the end of the last valid pixel in the video + line (marked by DE=1) and assertion of the HSYNC signal. The default value + is 32. + +**vback_porch** (RW): + Number of video lines between deassertion of the VSYNC signal and the video + line with the first valid pixel (marked by DE=1). The default value is 2. + +**vfront_porch** (RW): + Number of video lines between the end of the last valid pixel line (marked + by DE=1) and assertion of the VSYNC signal. The default value is 2. + + +FPDL3 specific input parameters +=============================== + +**fpdl3_input_width** (RW): + Number of deserializer input lines. + + | 0 - auto (default) + | 1 - single + | 2 - dual + +FPDL3 specific output parameters +================================ + +**fpdl3_output_width** (RW): + Number of serializer output lines. + + | 0 - auto (default) + | 1 - single + | 2 - dual + +GMSL specific input parameters +============================== + +**gmsl_mode** (RW): + GMSL speed mode. + + | 0 - 12Gb/s (default) + | 1 - 6Gb/s + | 2 - 3Gb/s + | 3 - 1.5Gb/s + +**gmsl_stream_id** (RW): + The GMSL multi-stream contains up to four video streams. This parameter + selects which stream is captured by the video input. The value is the + zero-based index of the stream. The default stream id is 0. + + *Note: This parameter can not be changed while the input v4l2 device is + open.* + +**gmsl_fec** (RW): + GMSL Forward Error Correction (FEC). + + | 0 - disabled + | 1 - enabled (default) + + +==================== +mgb4 mtd partitions +==================== + +The mgb4 driver creates a MTD device with two partitions: + - mgb4-fw.X - FPGA firmware. + - mgb4-data.X - Factory settings, e.g. card serial number. + +The *mgb4-fw* partition is writable and is used for FW updates, *mgb4-data* is +read-only. The *X* attached to the partition name represents the card number. +Depending on the CONFIG_MTD_PARTITIONED_MASTER kernel configuration, you may +also have a third partition named *mgb4-flash* available in the system. This +partition represents the whole, unpartitioned, card's FLASH memory and one should +not fiddle with it... + +==================== +mgb4 iio (triggers) +==================== + +The mgb4 driver creates an Industrial I/O (IIO) device that provides trigger and +signal level status capability. The following scan elements are available: + +**activity**: + The trigger levels and pending status. + + | bit 1 - trigger 1 pending + | bit 2 - trigger 2 pending + | bit 5 - trigger 1 level + | bit 6 - trigger 2 level + +**timestamp**: + The trigger event timestamp. + +The iio device can operate either in "raw" mode where you can fetch the signal +levels (activity bits 5 and 6) using sysfs access or in triggered buffer mode. +In the triggered buffer mode you can follow the signal level changes (activity +bits 1 and 2) using the iio device in /dev. If you enable the timestamps, you +will also get the exact trigger event time that can be matched to a video frame +(every mgb4 video frame has a timestamp with the same clock source). + +*Note: although the activity sample always contains all the status bits, it makes +no sense to get the pending bits in raw mode or the level bits in the triggered +buffer mode - the values do not represent valid data in such case.* diff --git a/Documentation/admin-guide/media/pci-cardlist.rst b/Documentation/admin-guide/media/pci-cardlist.rst index 42528795d4..7d8e3c8987 100644 --- a/Documentation/admin-guide/media/pci-cardlist.rst +++ b/Documentation/admin-guide/media/pci-cardlist.rst @@ -77,6 +77,7 @@ ipu3-cio2 Intel ipu3-cio2 driver ivtv Conexant cx23416/cx23415 MPEG encoder/decoder ivtvfb Conexant cx23415 framebuffer mantis MANTIS based cards +mgb4 Digiteq Automotive MGB4 frame grabber mxb Siemens-Nixdorf 'Multimedia eXtension Board' netup-unidvb NetUP Universal DVB card ngene Micronas nGene diff --git a/Documentation/admin-guide/media/v4l-drivers.rst b/Documentation/admin-guide/media/v4l-drivers.rst index 1c41f87c39..61283d67ce 100644 --- a/Documentation/admin-guide/media/v4l-drivers.rst +++ b/Documentation/admin-guide/media/v4l-drivers.rst @@ -17,6 +17,7 @@ Video4Linux (V4L) driver-specific documentation imx7 ipu3 ivtv + mgb4 omap3isp omap4_camera philips diff --git a/Documentation/admin-guide/media/visl.rst b/Documentation/admin-guide/media/visl.rst index 7d2dc78341..4328c6c72d 100644 --- a/Documentation/admin-guide/media/visl.rst +++ b/Documentation/admin-guide/media/visl.rst @@ -78,7 +78,7 @@ The trace events are defined on a per-codec basis, e.g.: .. code-block:: bash - $ ls /sys/kernel/debug/tracing/events/ | grep visl + $ ls /sys/kernel/tracing/events/ | grep visl visl_fwht_controls visl_h264_controls visl_hevc_controls @@ -90,13 +90,13 @@ For example, in order to dump HEVC SPS data: .. code-block:: bash - $ echo 1 > /sys/kernel/debug/tracing/events/visl_hevc_controls/v4l2_ctrl_hevc_sps/enable + $ echo 1 > /sys/kernel/tracing/events/visl_hevc_controls/v4l2_ctrl_hevc_sps/enable The SPS data will be dumped to the trace buffer, i.e.: .. code-block:: bash - $ cat /sys/kernel/debug/tracing/trace + $ cat /sys/kernel/tracing/trace video_parameter_set_id 0 seq_parameter_set_id 0 pic_width_in_luma_samples 1920 diff --git a/Documentation/admin-guide/mm/damon/usage.rst b/Documentation/admin-guide/mm/damon/usage.rst index 8da1b72818..da94feb97e 100644 --- a/Documentation/admin-guide/mm/damon/usage.rst +++ b/Documentation/admin-guide/mm/damon/usage.rst @@ -20,18 +20,18 @@ DAMON provides below interfaces for different users. you can write and use your personalized DAMON sysfs wrapper programs that reads/writes the sysfs files instead of you. The `DAMON user space tool `_ is one example of such programs. -- *debugfs interface. (DEPRECATED!)* - :ref:`This ` is almost identical to :ref:`sysfs interface - `. This is deprecated, so users should move to the - :ref:`sysfs interface `. If you depend on this and cannot - move, please report your usecase to damon@lists.linux.dev and - linux-mm@kvack.org. - *Kernel Space Programming Interface.* :doc:`This ` is for kernel space programmers. Using this, users can utilize every feature of DAMON most flexibly and efficiently by writing kernel space DAMON application programs for you. You can even extend DAMON for various address spaces. For detail, please refer to the interface :doc:`document `. +- *debugfs interface. (DEPRECATED!)* + :ref:`This ` is almost identical to :ref:`sysfs interface + `. This is deprecated, so users should move to the + :ref:`sysfs interface `. If you depend on this and cannot + move, please report your usecase to damon@lists.linux.dev and + linux-mm@kvack.org. .. _sysfs_interface: @@ -76,7 +76,7 @@ comma (","). :: │ │ │ │ │ │ │ │ ... │ │ │ │ │ │ ... │ │ │ │ │ schemes/nr_schemes - │ │ │ │ │ │ 0/action + │ │ │ │ │ │ 0/action,apply_interval_us │ │ │ │ │ │ │ access_pattern/ │ │ │ │ │ │ │ │ sz/min,max │ │ │ │ │ │ │ │ nr_accesses/min,max @@ -105,14 +105,12 @@ having the root permission could use this directory. kdamonds/ --------- -The monitoring-related information including request specifications and results -are called DAMON context. DAMON executes each context with a kernel thread -called kdamond, and multiple kdamonds could run in parallel. - Under the ``admin`` directory, one directory, ``kdamonds``, which has files for -controlling the kdamonds exist. In the beginning, this directory has only one -file, ``nr_kdamonds``. Writing a number (``N``) to the file creates the number -of child directories named ``0`` to ``N-1``. Each directory represents each +controlling the kdamonds (refer to +:ref:`design ` for more +details) exists. In the beginning, this directory has only one file, +``nr_kdamonds``. Writing a number (``N``) to the file creates the number of +child directories named ``0`` to ``N-1``. Each directory represents each kdamond. kdamonds// @@ -150,9 +148,10 @@ kdamonds//contexts/ In the beginning, this directory has only one file, ``nr_contexts``. Writing a number (``N``) to the file creates the number of child directories named as -``0`` to ``N-1``. Each directory represents each monitoring context. At the -moment, only one context per kdamond is supported, so only ``0`` or ``1`` can -be written to the file. +``0`` to ``N-1``. Each directory represents each monitoring context (refer to +:ref:`design ` for more +details). At the moment, only one context per kdamond is supported, so only +``0`` or ``1`` can be written to the file. .. _sysfs_contexts: @@ -270,8 +269,8 @@ schemes// ------------ In each scheme directory, five directories (``access_pattern``, ``quotas``, -``watermarks``, ``filters``, ``stats``, and ``tried_regions``) and one file -(``action``) exist. +``watermarks``, ``filters``, ``stats``, and ``tried_regions``) and two files +(``action`` and ``apply_interval``) exist. The ``action`` file is for setting and getting the scheme's :ref:`action `. The keywords that can be written to and read @@ -297,6 +296,9 @@ Note that support of each action depends on the running DAMON operations set - ``stat``: Do nothing but count the statistics. Supported by all operations sets. +The ``apply_interval_us`` file is for setting and getting the scheme's +:ref:`apply_interval ` in microseconds. + schemes//access_pattern/ --------------------------- @@ -392,7 +394,7 @@ pages of all memory cgroups except ``/having_care_already``.:: echo N > 1/matching Note that ``anon`` and ``memcg`` filters are currently supported only when -``paddr`` `implementation ` is being used. +``paddr`` :ref:`implementation ` is being used. Also, memory regions that are filtered out by ``addr`` or ``target`` filters are not counted as the scheme has tried to those, while regions that filtered @@ -430,9 +432,9 @@ that reading it returns the total size of the scheme tried regions, and creates directories named integer starting from ``0`` under this directory. Each directory contains files exposing detailed information about each of the memory region that the corresponding scheme's ``action`` has tried to be applied under -this directory, during next :ref:`aggregation interval -`. The information includes address range, -``nr_accesses``, and ``age`` of the region. +this directory, during next :ref:`apply interval ` of the +corresponding scheme. The information includes address range, ``nr_accesses``, +and ``age`` of the region. Writing ``update_schemes_tried_bytes`` to the relevant ``kdamonds//state`` file will only update the ``total_bytes`` file, and will not create the @@ -495,6 +497,62 @@ Please note that it's highly recommended to use user space tools like `damo `_ rather than manually reading and writing the files as above. Above is only for an example. +.. _tracepoint: + +Tracepoints for Monitoring Results +================================== + +Users can get the monitoring results via the :ref:`tried_regions +`. The interface is useful for getting a +snapshot, but it could be inefficient for fully recording all the monitoring +results. For the purpose, two trace points, namely ``damon:damon_aggregated`` +and ``damon:damos_before_apply``, are provided. ``damon:damon_aggregated`` +provides the whole monitoring results, while ``damon:damos_before_apply`` +provides the monitoring results for regions that each DAMON-based Operation +Scheme (:ref:`DAMOS `) is gonna be applied. Hence, +``damon:damos_before_apply`` is more useful for recording internal behavior of +DAMOS, or DAMOS target access +:ref:`pattern ` based query-like efficient +monitoring results recording. + +While the monitoring is turned on, you could record the tracepoint events and +show results using tracepoint supporting tools like ``perf``. For example:: + + # echo on > monitor_on + # perf record -e damon:damon_aggregated & + # sleep 5 + # kill 9 $(pidof perf) + # echo off > monitor_on + # perf script + kdamond.0 46568 [027] 79357.842179: damon:damon_aggregated: target_id=0 nr_regions=11 122509119488-135708762112: 0 864 + [...] + +Each line of the perf script output represents each monitoring region. The +first five fields are as usual other tracepoint outputs. The sixth field +(``target_id=X``) shows the ide of the monitoring target of the region. The +seventh field (``nr_regions=X``) shows the total number of monitoring regions +for the target. The eighth field (``X-Y:``) shows the start (``X``) and end +(``Y``) addresses of the region in bytes. The ninth field (``X``) shows the +``nr_accesses`` of the region (refer to +:ref:`design ` for more details of the +counter). Finally the tenth field (``X``) shows the ``age`` of the region +(refer to :ref:`design ` for more details of the +counter). + +If the event was ``damon:damos_beofre_apply``, the ``perf script`` output would +be somewhat like below:: + + kdamond.0 47293 [000] 80801.060214: damon:damos_before_apply: ctx_idx=0 scheme_idx=0 target_idx=0 nr_regions=11 121932607488-135128711168: 0 136 + [...] + +Each line of the output represents each monitoring region that each DAMON-based +Operation Scheme was about to be applied at the traced time. The first five +fields are as usual. It shows the index of the DAMON context (``ctx_idx=X``) +of the scheme in the list of the contexts of the context's kdamond, the index +of the scheme (``scheme_idx=X``) in the list of the schemes of the context, in +addition to the output of ``damon_aggregated`` tracepoint. + + .. _debugfs_interface: debugfs Interface (DEPRECATED!) @@ -790,23 +848,3 @@ directory by putting the name of the context to the ``rm_contexts`` file. :: Note that ``mk_contexts``, ``rm_contexts``, and ``monitor_on`` files are in the root directory only. - - -.. _tracepoint: - -Tracepoint for Monitoring Results -================================= - -Users can get the monitoring results via the :ref:`tried_regions -` or a tracepoint, ``damon:damon_aggregated``. -While the tried regions directory is useful for getting a snapshot, the -tracepoint is useful for getting a full record of the results. While the -monitoring is turned on, you could record the tracepoint events and show -results using tracepoint supporting tools like ``perf``. For example:: - - # echo on > monitor_on - # perf record -e damon:damon_aggregated & - # sleep 5 - # kill 9 $(pidof perf) - # echo off > monitor_on - # perf script diff --git a/Documentation/admin-guide/mm/ksm.rst b/Documentation/admin-guide/mm/ksm.rst index 776f244bda..e59231ac6b 100644 --- a/Documentation/admin-guide/mm/ksm.rst +++ b/Documentation/admin-guide/mm/ksm.rst @@ -155,6 +155,15 @@ stable_node_chains_prune_millisecs scan. It's a noop if not a single KSM page hit the ``max_page_sharing`` yet. +smart_scan + Historically KSM checked every candidate page for each scan. It did + not take into account historic information. When smart scan is + enabled, pages that have previously not been de-duplicated get + skipped. How often these pages are skipped depends on how often + de-duplication has already been tried and failed. By default this + optimization is enabled. The ``pages_skipped`` metric shows how + effective the setting is. + The effectiveness of KSM and MADV_MERGEABLE is shown in ``/sys/kernel/mm/ksm/``: general_profit @@ -169,6 +178,8 @@ pages_unshared how many pages unique but repeatedly checked for merging pages_volatile how many pages changing too fast to be placed in a tree +pages_skipped + how many pages did the "smart" page scanning algorithm skip full_scans how many times all mergeable areas have been scanned stable_node_chains diff --git a/Documentation/admin-guide/mm/memory-hotplug.rst b/Documentation/admin-guide/mm/memory-hotplug.rst index cfe034cf1e..098f14d83e 100644 --- a/Documentation/admin-guide/mm/memory-hotplug.rst +++ b/Documentation/admin-guide/mm/memory-hotplug.rst @@ -33,7 +33,7 @@ used to expose persistent memory, other performance-differentiated memory and reserved memory regions as ordinary system RAM to Linux. Linux only supports memory hot(un)plug on selected 64 bit architectures, such as -x86_64, arm64, ppc64, s390x and ia64. +x86_64, arm64, ppc64 and s390x. Memory Hot(Un)Plug Granularity ------------------------------ @@ -75,7 +75,7 @@ Memory hotunplug consists of two phases: (1) Offlining memory blocks (2) Removing the memory from Linux -In the fist phase, memory is "hidden" from the page allocator again, for +In the first phase, memory is "hidden" from the page allocator again, for example, by migrating busy memory to other memory locations and removing all relevant free pages from the page allocator After this phase, the memory is no longer visible in memory statistics of the system. @@ -250,15 +250,15 @@ Observing the State of Memory Blocks The state (online/offline/going-offline) of a memory block can be observed either via:: - % cat /sys/device/system/memory/memoryXXX/state + % cat /sys/devices/system/memory/memoryXXX/state Or alternatively (1/0) via:: - % cat /sys/device/system/memory/memoryXXX/online + % cat /sys/devices/system/memory/memoryXXX/online For an online memory block, the managing zone can be observed via:: - % cat /sys/device/system/memory/memoryXXX/valid_zones + % cat /sys/devices/system/memory/memoryXXX/valid_zones Configuring Memory Hot(Un)Plug ============================== @@ -326,7 +326,7 @@ however, a memory block might span memory holes. A memory block spanning memory holes cannot be offlined. For example, assume 1 GiB memory block size. A device for a memory starting at -0x100000000 is ``/sys/device/system/memory/memory4``:: +0x100000000 is ``/sys/devices/system/memory/memory4``:: (0x100000000 / 1Gib = 4) diff --git a/Documentation/admin-guide/mm/pagemap.rst b/Documentation/admin-guide/mm/pagemap.rst index c8f380271c..fe17cf2104 100644 --- a/Documentation/admin-guide/mm/pagemap.rst +++ b/Documentation/admin-guide/mm/pagemap.rst @@ -227,3 +227,92 @@ Before Linux 3.11 pagemap bits 55-60 were used for "page-shift" (which is always 12 at most architectures). Since Linux 3.11 their meaning changes after first clear of soft-dirty bits. Since Linux 4.2 they are used for flags unconditionally. + +Pagemap Scan IOCTL +================== + +The ``PAGEMAP_SCAN`` IOCTL on the pagemap file can be used to get or optionally +clear the info about page table entries. The following operations are supported +in this IOCTL: + +- Scan the address range and get the memory ranges matching the provided criteria. + This is performed when the output buffer is specified. +- Write-protect the pages. The ``PM_SCAN_WP_MATCHING`` is used to write-protect + the pages of interest. The ``PM_SCAN_CHECK_WPASYNC`` aborts the operation if + non-Async Write Protected pages are found. The ``PM_SCAN_WP_MATCHING`` can be + used with or without ``PM_SCAN_CHECK_WPASYNC``. +- Both of those operations can be combined into one atomic operation where we can + get and write protect the pages as well. + +Following flags about pages are currently supported: + +- ``PAGE_IS_WPALLOWED`` - Page has async-write-protection enabled +- ``PAGE_IS_WRITTEN`` - Page has been written to from the time it was write protected +- ``PAGE_IS_FILE`` - Page is file backed +- ``PAGE_IS_PRESENT`` - Page is present in the memory +- ``PAGE_IS_SWAPPED`` - Page is in swapped +- ``PAGE_IS_PFNZERO`` - Page has zero PFN +- ``PAGE_IS_HUGE`` - Page is THP or Hugetlb backed + +The ``struct pm_scan_arg`` is used as the argument of the IOCTL. + + 1. The size of the ``struct pm_scan_arg`` must be specified in the ``size`` + field. This field will be helpful in recognizing the structure if extensions + are done later. + 2. The flags can be specified in the ``flags`` field. The ``PM_SCAN_WP_MATCHING`` + and ``PM_SCAN_CHECK_WPASYNC`` are the only added flags at this time. The get + operation is optionally performed depending upon if the output buffer is + provided or not. + 3. The range is specified through ``start`` and ``end``. + 4. The walk can abort before visiting the complete range such as the user buffer + can get full etc. The walk ending address is specified in``end_walk``. + 5. The output buffer of ``struct page_region`` array and size is specified in + ``vec`` and ``vec_len``. + 6. The optional maximum requested pages are specified in the ``max_pages``. + 7. The masks are specified in ``category_mask``, ``category_anyof_mask``, + ``category_inverted`` and ``return_mask``. + +Find pages which have been written and WP them as well:: + + struct pm_scan_arg arg = { + .size = sizeof(arg), + .flags = PM_SCAN_CHECK_WPASYNC | PM_SCAN_CHECK_WPASYNC, + .. + .category_mask = PAGE_IS_WRITTEN, + .return_mask = PAGE_IS_WRITTEN, + }; + +Find pages which have been written, are file backed, not swapped and either +present or huge:: + + struct pm_scan_arg arg = { + .size = sizeof(arg), + .flags = 0, + .. + .category_mask = PAGE_IS_WRITTEN | PAGE_IS_SWAPPED, + .category_inverted = PAGE_IS_SWAPPED, + .category_anyof_mask = PAGE_IS_PRESENT | PAGE_IS_HUGE, + .return_mask = PAGE_IS_WRITTEN | PAGE_IS_SWAPPED | + PAGE_IS_PRESENT | PAGE_IS_HUGE, + }; + +The ``PAGE_IS_WRITTEN`` flag can be considered as a better-performing alternative +of soft-dirty flag. It doesn't get affected by VMA merging of the kernel and hence +the user can find the true soft-dirty pages in case of normal pages. (There may +still be extra dirty pages reported for THP or Hugetlb pages.) + +"PAGE_IS_WRITTEN" category is used with uffd write protect-enabled ranges to +implement memory dirty tracking in userspace: + + 1. The userfaultfd file descriptor is created with ``userfaultfd`` syscall. + 2. The ``UFFD_FEATURE_WP_UNPOPULATED`` and ``UFFD_FEATURE_WP_ASYNC`` features + are set by ``UFFDIO_API`` IOCTL. + 3. The memory range is registered with ``UFFDIO_REGISTER_MODE_WP`` mode + through ``UFFDIO_REGISTER`` IOCTL. + 4. Then any part of the registered memory or the whole memory region must + be write protected using ``PAGEMAP_SCAN`` IOCTL with flag ``PM_SCAN_WP_MATCHING`` + or the ``UFFDIO_WRITEPROTECT`` IOCTL can be used. Both of these perform the + same operation. The former is better in terms of performance. + 5. Now the ``PAGEMAP_SCAN`` IOCTL can be used to either just find pages which + have been written to since they were last marked and/or optionally write protect + the pages as well. diff --git a/Documentation/admin-guide/mm/userfaultfd.rst b/Documentation/admin-guide/mm/userfaultfd.rst index 4349a8c2b9..203e26da5f 100644 --- a/Documentation/admin-guide/mm/userfaultfd.rst +++ b/Documentation/admin-guide/mm/userfaultfd.rst @@ -244,6 +244,41 @@ write-protected (so future writes will also result in a WP fault). These ioctls support a mode flag (``UFFDIO_COPY_MODE_WP`` or ``UFFDIO_CONTINUE_MODE_WP`` respectively) to configure the mapping this way. +If the userfaultfd context has ``UFFD_FEATURE_WP_ASYNC`` feature bit set, +any vma registered with write-protection will work in async mode rather +than the default sync mode. + +In async mode, there will be no message generated when a write operation +happens, meanwhile the write-protection will be resolved automatically by +the kernel. It can be seen as a more accurate version of soft-dirty +tracking and it can be different in a few ways: + + - The dirty result will not be affected by vma changes (e.g. vma + merging) because the dirty is only tracked by the pte. + + - It supports range operations by default, so one can enable tracking on + any range of memory as long as page aligned. + + - Dirty information will not get lost if the pte was zapped due to + various reasons (e.g. during split of a shmem transparent huge page). + + - Due to a reverted meaning of soft-dirty (page clean when uffd-wp bit + set; dirty when uffd-wp bit cleared), it has different semantics on + some of the memory operations. For example: ``MADV_DONTNEED`` on + anonymous (or ``MADV_REMOVE`` on a file mapping) will be treated as + dirtying of memory by dropping uffd-wp bit during the procedure. + +The user app can collect the "written/dirty" status by looking up the +uffd-wp bit for the pages being interested in /proc/pagemap. + +The page will not be under track of uffd-wp async mode until the page is +explicitly write-protected by ``ioctl(UFFDIO_WRITEPROTECT)`` with the mode +flag ``UFFDIO_WRITEPROTECT_MODE_WP`` set. Trying to resolve a page fault +that was tracked by async mode userfaultfd-wp is invalid. + +When userfaultfd-wp async mode is used alone, it can be applied to all +kinds of memory. + Memory Poisioning Emulation --------------------------- diff --git a/Documentation/admin-guide/module-signing.rst b/Documentation/admin-guide/module-signing.rst index 2898b27032..a8667a7774 100644 --- a/Documentation/admin-guide/module-signing.rst +++ b/Documentation/admin-guide/module-signing.rst @@ -28,10 +28,10 @@ trusted userspace bits. This facility uses X.509 ITU-T standard certificates to encode the public keys involved. The signatures are not themselves encoded in any industrial standard -type. The facility currently only supports the RSA public key encryption -standard (though it is pluggable and permits others to be used). The possible -hash algorithms that can be used are SHA-1, SHA-224, SHA-256, SHA-384, and -SHA-512 (the algorithm is selected by data in the signature). +type. The built-in facility currently only supports the RSA & NIST P-384 ECDSA +public key signing standard (though it is pluggable and permits others to be +used). The possible hash algorithms that can be used are SHA-2 and SHA-3 of +sizes 256, 384, and 512 (the algorithm is selected by data in the signature). ========================== @@ -81,11 +81,12 @@ This has a number of options available: sign the modules with: =============================== ========================================== - ``CONFIG_MODULE_SIG_SHA1`` :menuselection:`Sign modules with SHA-1` - ``CONFIG_MODULE_SIG_SHA224`` :menuselection:`Sign modules with SHA-224` ``CONFIG_MODULE_SIG_SHA256`` :menuselection:`Sign modules with SHA-256` ``CONFIG_MODULE_SIG_SHA384`` :menuselection:`Sign modules with SHA-384` ``CONFIG_MODULE_SIG_SHA512`` :menuselection:`Sign modules with SHA-512` + ``CONFIG_MODULE_SIG_SHA3_256`` :menuselection:`Sign modules with SHA3-256` + ``CONFIG_MODULE_SIG_SHA3_384`` :menuselection:`Sign modules with SHA3-384` + ``CONFIG_MODULE_SIG_SHA3_512`` :menuselection:`Sign modules with SHA3-512` =============================== ========================================== The algorithm selected here will also be built into the kernel (rather @@ -145,6 +146,10 @@ into vmlinux) using parameters in the:: file (which is also generated if it does not already exist). +One can select between RSA (``MODULE_SIG_KEY_TYPE_RSA``) and ECDSA +(``MODULE_SIG_KEY_TYPE_ECDSA``) to generate either RSA 4k or NIST +P-384 keypair. + It is strongly recommended that you provide your own x509.genkey file. Most notably, in the x509.genkey file, the req_distinguished_name section diff --git a/Documentation/admin-guide/perf/ampere_cspmu.rst b/Documentation/admin-guide/perf/ampere_cspmu.rst new file mode 100644 index 0000000000..94f93f5aee --- /dev/null +++ b/Documentation/admin-guide/perf/ampere_cspmu.rst @@ -0,0 +1,29 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================================ +Ampere SoC Performance Monitoring Unit (PMU) +============================================ + +Ampere SoC PMU is a generic PMU IP that follows Arm CoreSight PMU architecture. +Therefore, the driver is implemented as a submodule of arm_cspmu driver. At the +first phase it's used for counting MCU events on AmpereOne. + + +MCU PMU events +-------------- + +The PMU driver supports setting filters for "rank", "bank", and "threshold". +Note, that the filters are per PMU instance rather than per event. + + +Example for perf tool use:: + + / # perf list ampere + + ampere_mcu_pmu_0/act_sent/ [Kernel PMU event] + <...> + ampere_mcu_pmu_1/rd_sent/ [Kernel PMU event] + <...> + + / # perf stat -a -e ampere_mcu_pmu_0/act_sent,bank=5,rank=3,threshold=2/,ampere_mcu_pmu_1/rd_sent/ \ + sleep 1 diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst index f60be04e4e..a2e6f2c811 100644 --- a/Documentation/admin-guide/perf/index.rst +++ b/Documentation/admin-guide/perf/index.rst @@ -22,3 +22,4 @@ Performance monitor support nvidia-pmu meson-ddr-pmu cxl + ampere_cspmu diff --git a/Documentation/admin-guide/pm/intel_idle.rst b/Documentation/admin-guide/pm/intel_idle.rst index b799a43da6..39bd6ecce7 100644 --- a/Documentation/admin-guide/pm/intel_idle.rst +++ b/Documentation/admin-guide/pm/intel_idle.rst @@ -170,7 +170,7 @@ and ``idle=nomwait``. If any of them is present in the kernel command line, the ``MWAIT`` instruction is not allowed to be used, so the initialization of ``intel_idle`` will fail. -Apart from that there are four module parameters recognized by ``intel_idle`` +Apart from that there are five module parameters recognized by ``intel_idle`` itself that can be set via the kernel command line (they cannot be updated via sysfs, so that is the only way to change their values). @@ -216,6 +216,21 @@ are ignored). The idle states disabled this way can be enabled (on a per-CPU basis) from user space via ``sysfs``. +The ``ibrs_off`` module parameter is a boolean flag (defaults to +false). If set, it is used to control if IBRS (Indirect Branch Restricted +Speculation) should be turned off when the CPU enters an idle state. +This flag does not affect CPUs that use Enhanced IBRS which can remain +on with little performance impact. + +For some CPUs, IBRS will be selected as mitigation for Spectre v2 and Retbleed +security vulnerabilities by default. Leaving the IBRS mode on while idling may +have a performance impact on its sibling CPU. The IBRS mode will be turned off +by default when the CPU enters into a deep idle state, but not in some +shallower ones. Setting the ``ibrs_off`` module parameter will force the IBRS +mode to off when the CPU is in any one of the available idle states. This may +help performance of a sibling CPU at the expense of a slightly higher wakeup +latency for the idle CPU. + .. _intel-idle-core-and-package-idle-states: diff --git a/Documentation/admin-guide/pstore-blk.rst b/Documentation/admin-guide/pstore-blk.rst index 2d22ead952..1bb2a1c292 100644 --- a/Documentation/admin-guide/pstore-blk.rst +++ b/Documentation/admin-guide/pstore-blk.rst @@ -76,7 +76,7 @@ kmsg_size ~~~~~~~~~ The chunk size in KB for oops/panic front-end. It **MUST** be a multiple of 4. -It's optional if you do not care oops/panic log. +It's optional if you do not care about the oops/panic log. There are multiple chunks for oops/panic front-end depending on the remaining space except other pstore front-ends. @@ -88,7 +88,7 @@ pmsg_size ~~~~~~~~~ The chunk size in KB for pmsg front-end. It **MUST** be a multiple of 4. -It's optional if you do not care pmsg log. +It's optional if you do not care about the pmsg log. Unlike oops/panic front-end, there is only one chunk for pmsg front-end. @@ -100,7 +100,7 @@ console_size ~~~~~~~~~~~~ The chunk size in KB for console front-end. It **MUST** be a multiple of 4. -It's optional if you do not care console log. +It's optional if you do not care about the console log. Similar to pmsg front-end, there is only one chunk for console front-end. @@ -111,7 +111,7 @@ ftrace_size ~~~~~~~~~~~ The chunk size in KB for ftrace front-end. It **MUST** be a multiple of 4. -It's optional if you do not care console log. +It's optional if you do not care about the ftrace log. Similar to oops front-end, there are multiple chunks for ftrace front-end depending on the count of cpu processors. Each chunk size is equal to diff --git a/Documentation/admin-guide/spkguide.txt b/Documentation/admin-guide/spkguide.txt index 74ea7f3919..0d5965138f 100644 --- a/Documentation/admin-guide/spkguide.txt +++ b/Documentation/admin-guide/spkguide.txt @@ -7,7 +7,7 @@ Last modified on Mon Sep 27 14:26:31 2010 Document version 1.3 Copyright (c) 2005 Gene Collins -Copyright (c) 2008 Samuel Thibault +Copyright (c) 2008, 2023 Samuel Thibault Copyright (c) 2009, 2010 the Speakup Team Permission is granted to copy, distribute and/or modify this document @@ -83,8 +83,7 @@ spkout -- Speak Out txprt -- Transport dummy -- Plain text terminal -Note: Speakup does * NOT * support usb connections! Speakup also does * -NOT * support the internal Tripletalk! +Note: Speakup does * NOT * support the internal Tripletalk! Speakup does support two other synthesizers, but because they work in conjunction with other software, they must be loaded as modules after @@ -94,6 +93,12 @@ These are as follows: decpc -- DecTalk PC (not available at boot up) soft -- One of several software synthesizers (not available at boot up) +By default speakup looks for the synthesizer on the ttyS0 serial port. This can +be changed with the device parameter of the modules, for instance for +DoubleTalk LT: + +speakup_ltlk.dev=ttyUSB0 + See the sections on loading modules and software synthesizers later in this manual for further details. It should be noted here that the speakup.synth boot parameter will have no effect if Speakup has been diff --git a/Documentation/admin-guide/sysctl/fs.rst b/Documentation/admin-guide/sysctl/fs.rst index a321b84ecc..47499a1742 100644 --- a/Documentation/admin-guide/sysctl/fs.rst +++ b/Documentation/admin-guide/sysctl/fs.rst @@ -42,16 +42,16 @@ pre-allocation or re-sizing of any kernel data structures. dentry-state ------------ -This file shows the values in ``struct dentry_stat``, as defined in -``linux/include/linux/dcache.h``:: +This file shows the values in ``struct dentry_stat_t``, as defined in +``fs/dcache.c``:: struct dentry_stat_t dentry_stat { - int nr_dentry; - int nr_unused; - int age_limit; /* age in seconds */ - int want_pages; /* pages requested by system */ - int nr_negative; /* # of unused negative dentries */ - int dummy; /* Reserved for future use */ + long nr_dentry; + long nr_unused; + long age_limit; /* age in seconds */ + long want_pages; /* pages requested by system */ + long nr_negative; /* # of unused negative dentries */ + long dummy; /* Reserved for future use */ }; Dentries are dynamically allocated and deallocated. diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index cf33de56da..6584a1f9bf 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -436,7 +436,7 @@ ignore-unaligned-usertrap On architectures where unaligned accesses cause traps, and where this feature is supported (``CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN``; -currently, ``arc``, ``ia64`` and ``loongarch``), controls whether all +currently, ``arc`` and ``loongarch``), controls whether all unaligned traps are logged. = ============================================================= @@ -445,10 +445,7 @@ unaligned traps are logged. setting. = ============================================================= -See also `unaligned-trap`_ and `unaligned-dump-stack`_. On ``ia64``, -this allows system administrators to override the -``IA64_THREAD_UAC_NOPRINT`` ``prctl`` and avoid logs being flooded. - +See also `unaligned-trap`_. io_uring_disabled ================= @@ -1182,7 +1179,8 @@ automatically on platforms where it can run (that is, platforms with asymmetric CPU topologies and having an Energy Model available). If your platform happens to meet the requirements for EAS but you do not want to use it, change -this value to 0. +this value to 0. On Non-EAS platforms, write operation fails and +read doesn't return anything. task_delayacct =============== @@ -1538,22 +1536,6 @@ See Documentation/admin-guide/kernel-parameters.rst and Documentation/trace/boottime-trace.rst. -.. _unaligned-dump-stack: - -unaligned-dump-stack (ia64) -=========================== - -When logging unaligned accesses, controls whether the stack is -dumped. - -= =================================================== -0 Do not dump the stack. This is the default setting. -1 Dump the stack. -= =================================================== - -See also `ignore-unaligned-usertrap`_. - - unaligned-trap ============== diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index 4877563241..c7525942f1 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -71,6 +71,7 @@ two flavors of JITs, the newer eBPF JIT currently supported on: - s390x - riscv64 - riscv32 + - loongarch64 And the older cBPF JIT supported on the following archs: diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst index 45ba1f4dc0..c59889de12 100644 --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst @@ -742,8 +742,8 @@ overcommit_memory This value contains a flag that enables memory overcommitment. -When this flag is 0, the kernel attempts to estimate the amount -of free memory left when userspace requests more memory. +When this flag is 0, the kernel compares the userspace memory request +size against total memory plus swap and rejects obvious overcommits. When this flag is 1, the kernel pretends there is always enough memory until it actually runs out. diff --git a/Documentation/arch/arm64/cpu-feature-registers.rst b/Documentation/arch/arm64/cpu-feature-registers.rst index de6d8a4790..44f9bd7853 100644 --- a/Documentation/arch/arm64/cpu-feature-registers.rst +++ b/Documentation/arch/arm64/cpu-feature-registers.rst @@ -268,6 +268,8 @@ infrastructure: +------------------------------+---------+---------+ | SHA3 | [35-32] | y | +------------------------------+---------+---------+ + | B16B16 | [27-24] | y | + +------------------------------+---------+---------+ | BF16 | [23-20] | y | +------------------------------+---------+---------+ | BitPerm | [19-16] | y | diff --git a/Documentation/arch/arm64/elf_hwcaps.rst b/Documentation/arch/arm64/elf_hwcaps.rst index 76ff9d7398..ced7b335e2 100644 --- a/Documentation/arch/arm64/elf_hwcaps.rst +++ b/Documentation/arch/arm64/elf_hwcaps.rst @@ -174,7 +174,7 @@ HWCAP2_DCPODP Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0010. HWCAP2_SVE2 - Functionality implied by ID_AA64ZFR0_EL1.SVEVer == 0b0001. + Functionality implied by ID_AA64ZFR0_EL1.SVEver == 0b0001. HWCAP2_SVEAES Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0001. @@ -222,7 +222,7 @@ HWCAP2_RNG Functionality implied by ID_AA64ISAR0_EL1.RNDR == 0b0001. HWCAP2_BTI - Functionality implied by ID_AA64PFR0_EL1.BT == 0b0001. + Functionality implied by ID_AA64PFR1_EL1.BT == 0b0001. HWCAP2_MTE Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0010, as described @@ -232,7 +232,7 @@ HWCAP2_ECV Functionality implied by ID_AA64MMFR0_EL1.ECV == 0b0001. HWCAP2_AFP - Functionality implied by ID_AA64MFR1_EL1.AFP == 0b0001. + Functionality implied by ID_AA64MMFR1_EL1.AFP == 0b0001. HWCAP2_RPRES Functionality implied by ID_AA64ISAR2_EL1.RPRES == 0b0001. @@ -308,6 +308,15 @@ HWCAP2_MOPS HWCAP2_HBC Functionality implied by ID_AA64ISAR2_EL1.BC == 0b0001. +HWCAP2_SVE_B16B16 + Functionality implied by ID_AA64ZFR0_EL1.B16B16 == 0b0001. + +HWCAP2_LRCPC3 + Functionality implied by ID_AA64ISAR1_EL1.LRCPC == 0b0011. + +HWCAP2_LSE128 + Functionality implied by ID_AA64ISAR0_EL1.Atomic == 0b0011. + 4. Unused AT_HWCAP bits ----------------------- diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index 7acd64c61f..29fd5213ee 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -235,3 +235,10 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ASR | ASR8601 | #8601001 | N/A | +----------------+-----------------+-----------------+-----------------------------+ ++----------------+-----------------+-----------------+-----------------------------+ +| Microsoft | Azure Cobalt 100| #2139208 | ARM64_ERRATUM_2139208 | ++----------------+-----------------+-----------------+-----------------------------+ +| Microsoft | Azure Cobalt 100| #2067961 | ARM64_ERRATUM_2067961 | ++----------------+-----------------+-----------------+-----------------------------+ +| Microsoft | Azure Cobalt 100| #2253138 | ARM64_ERRATUM_2253138 | ++----------------+-----------------+-----------------+-----------------------------+ diff --git a/Documentation/arch/ia64/aliasing.rst b/Documentation/arch/ia64/aliasing.rst deleted file mode 100644 index 36a1e1d484..0000000000 --- a/Documentation/arch/ia64/aliasing.rst +++ /dev/null @@ -1,246 +0,0 @@ -================================== -Memory Attribute Aliasing on IA-64 -================================== - -Bjorn Helgaas - -May 4, 2006 - - -Memory Attributes -================= - - Itanium supports several attributes for virtual memory references. - The attribute is part of the virtual translation, i.e., it is - contained in the TLB entry. The ones of most interest to the Linux - kernel are: - - == ====================== - WB Write-back (cacheable) - UC Uncacheable - WC Write-coalescing - == ====================== - - System memory typically uses the WB attribute. The UC attribute is - used for memory-mapped I/O devices. The WC attribute is uncacheable - like UC is, but writes may be delayed and combined to increase - performance for things like frame buffers. - - The Itanium architecture requires that we avoid accessing the same - page with both a cacheable mapping and an uncacheable mapping[1]. - - The design of the chipset determines which attributes are supported - on which regions of the address space. For example, some chipsets - support either WB or UC access to main memory, while others support - only WB access. - -Memory Map -========== - - Platform firmware describes the physical memory map and the - supported attributes for each region. At boot-time, the kernel uses - the EFI GetMemoryMap() interface. ACPI can also describe memory - devices and the attributes they support, but Linux/ia64 currently - doesn't use this information. - - The kernel uses the efi_memmap table returned from GetMemoryMap() to - learn the attributes supported by each region of physical address - space. Unfortunately, this table does not completely describe the - address space because some machines omit some or all of the MMIO - regions from the map. - - The kernel maintains another table, kern_memmap, which describes the - memory Linux is actually using and the attribute for each region. - This contains only system memory; it does not contain MMIO space. - - The kern_memmap table typically contains only a subset of the system - memory described by the efi_memmap. Linux/ia64 can't use all memory - in the system because of constraints imposed by the identity mapping - scheme. - - The efi_memmap table is preserved unmodified because the original - boot-time information is required for kexec. - -Kernel Identity Mappings -======================== - - Linux/ia64 identity mappings are done with large pages, currently - either 16MB or 64MB, referred to as "granules." Cacheable mappings - are speculative[2], so the processor can read any location in the - page at any time, independent of the programmer's intentions. This - means that to avoid attribute aliasing, Linux can create a cacheable - identity mapping only when the entire granule supports cacheable - access. - - Therefore, kern_memmap contains only full granule-sized regions that - can referenced safely by an identity mapping. - - Uncacheable mappings are not speculative, so the processor will - generate UC accesses only to locations explicitly referenced by - software. This allows UC identity mappings to cover granules that - are only partially populated, or populated with a combination of UC - and WB regions. - -User Mappings -============= - - User mappings are typically done with 16K or 64K pages. The smaller - page size allows more flexibility because only 16K or 64K has to be - homogeneous with respect to memory attributes. - -Potential Attribute Aliasing Cases -================================== - - There are several ways the kernel creates new mappings: - -mmap of /dev/mem ----------------- - - This uses remap_pfn_range(), which creates user mappings. These - mappings may be either WB or UC. If the region being mapped - happens to be in kern_memmap, meaning that it may also be mapped - by a kernel identity mapping, the user mapping must use the same - attribute as the kernel mapping. - - If the region is not in kern_memmap, the user mapping should use - an attribute reported as being supported in the EFI memory map. - - Since the EFI memory map does not describe MMIO on some - machines, this should use an uncacheable mapping as a fallback. - -mmap of /sys/class/pci_bus/.../legacy_mem ------------------------------------------ - - This is very similar to mmap of /dev/mem, except that legacy_mem - only allows mmap of the one megabyte "legacy MMIO" area for a - specific PCI bus. Typically this is the first megabyte of - physical address space, but it may be different on machines with - several VGA devices. - - "X" uses this to access VGA frame buffers. Using legacy_mem - rather than /dev/mem allows multiple instances of X to talk to - different VGA cards. - - The /dev/mem mmap constraints apply. - -mmap of /proc/bus/pci/.../??.? ------------------------------- - - This is an MMIO mmap of PCI functions, which additionally may or - may not be requested as using the WC attribute. - - If WC is requested, and the region in kern_memmap is either WC - or UC, and the EFI memory map designates the region as WC, then - the WC mapping is allowed. - - Otherwise, the user mapping must use the same attribute as the - kernel mapping. - -read/write of /dev/mem ----------------------- - - This uses copy_from_user(), which implicitly uses a kernel - identity mapping. This is obviously safe for things in - kern_memmap. - - There may be corner cases of things that are not in kern_memmap, - but could be accessed this way. For example, registers in MMIO - space are not in kern_memmap, but could be accessed with a UC - mapping. This would not cause attribute aliasing. But - registers typically can be accessed only with four-byte or - eight-byte accesses, and the copy_from_user() path doesn't allow - any control over the access size, so this would be dangerous. - -ioremap() ---------- - - This returns a mapping for use inside the kernel. - - If the region is in kern_memmap, we should use the attribute - specified there. - - If the EFI memory map reports that the entire granule supports - WB, we should use that (granules that are partially reserved - or occupied by firmware do not appear in kern_memmap). - - If the granule contains non-WB memory, but we can cover the - region safely with kernel page table mappings, we can use - ioremap_page_range() as most other architectures do. - - Failing all of the above, we have to fall back to a UC mapping. - -Past Problem Cases -================== - -mmap of various MMIO regions from /dev/mem by "X" on Intel platforms --------------------------------------------------------------------- - - The EFI memory map may not report these MMIO regions. - - These must be allowed so that X will work. This means that - when the EFI memory map is incomplete, every /dev/mem mmap must - succeed. It may create either WB or UC user mappings, depending - on whether the region is in kern_memmap or the EFI memory map. - -mmap of 0x0-0x9FFFF /dev/mem by "hwinfo" on HP sx1000 with VGA enabled ----------------------------------------------------------------------- - - The EFI memory map reports the following attributes: - - =============== ======= ================== - 0x00000-0x9FFFF WB only - 0xA0000-0xBFFFF UC only (VGA frame buffer) - 0xC0000-0xFFFFF WB only - =============== ======= ================== - - This mmap is done with user pages, not kernel identity mappings, - so it is safe to use WB mappings. - - The kernel VGA driver may ioremap the VGA frame buffer at 0xA0000, - which uses a granule-sized UC mapping. This granule will cover some - WB-only memory, but since UC is non-speculative, the processor will - never generate an uncacheable reference to the WB-only areas unless - the driver explicitly touches them. - -mmap of 0x0-0xFFFFF legacy_mem by "X" -------------------------------------- - - If the EFI memory map reports that the entire range supports the - same attributes, we can allow the mmap (and we will prefer WB if - supported, as is the case with HP sx[12]000 machines with VGA - disabled). - - If EFI reports the range as partly WB and partly UC (as on sx[12]000 - machines with VGA enabled), we must fail the mmap because there's no - safe attribute to use. - - If EFI reports some of the range but not all (as on Intel firmware - that doesn't report the VGA frame buffer at all), we should fail the - mmap and force the user to map just the specific region of interest. - -mmap of 0xA0000-0xBFFFF legacy_mem by "X" on HP sx1000 with VGA disabled ------------------------------------------------------------------------- - - The EFI memory map reports the following attributes:: - - 0x00000-0xFFFFF WB only (no VGA MMIO hole) - - This is a special case of the previous case, and the mmap should - fail for the same reason as above. - -read of /sys/devices/.../rom ----------------------------- - - For VGA devices, this may cause an ioremap() of 0xC0000. This - used to be done with a UC mapping, because the VGA frame buffer - at 0xA0000 prevents use of a WB granule. The UC mapping causes - an MCA on HP sx[12]000 chipsets. - - We should use WB page table mappings to avoid covering the VGA - frame buffer. - -Notes -===== - - [1] SDM rev 2.2, vol 2, sec 4.4.1. - [2] SDM rev 2.2, vol 2, sec 4.4.6. diff --git a/Documentation/arch/ia64/efirtc.rst b/Documentation/arch/ia64/efirtc.rst deleted file mode 100644 index fd83284083..0000000000 --- a/Documentation/arch/ia64/efirtc.rst +++ /dev/null @@ -1,144 +0,0 @@ -========================== -EFI Real Time Clock driver -========================== - -S. Eranian - -March 2000 - -1. Introduction -=============== - -This document describes the efirtc.c driver has provided for -the IA-64 platform. - -The purpose of this driver is to supply an API for kernel and user applications -to get access to the Time Service offered by EFI version 0.92. - -EFI provides 4 calls one can make once the OS is booted: GetTime(), -SetTime(), GetWakeupTime(), SetWakeupTime() which are all supported by this -driver. We describe those calls as well the design of the driver in the -following sections. - -2. Design Decisions -=================== - -The original ideas was to provide a very simple driver to get access to, -at first, the time of day service. This is required in order to access, in a -portable way, the CMOS clock. A program like /sbin/hwclock uses such a clock -to initialize the system view of the time during boot. - -Because we wanted to minimize the impact on existing user-level apps using -the CMOS clock, we decided to expose an API that was very similar to the one -used today with the legacy RTC driver (driver/char/rtc.c). However, because -EFI provides a simpler services, not all ioctl() are available. Also -new ioctl()s have been introduced for things that EFI provides but not the -legacy. - -EFI uses a slightly different way of representing the time, noticeably -the reference date is different. Year is the using the full 4-digit format. -The Epoch is January 1st 1998. For backward compatibility reasons we don't -expose this new way of representing time. Instead we use something very -similar to the struct tm, i.e. struct rtc_time, as used by hwclock. -One of the reasons for doing it this way is to allow for EFI to still evolve -without necessarily impacting any of the user applications. The decoupling -enables flexibility and permits writing wrapper code is ncase things change. - -The driver exposes two interfaces, one via the device file and a set of -ioctl()s. The other is read-only via the /proc filesystem. - -As of today we don't offer a /proc/sys interface. - -To allow for a uniform interface between the legacy RTC and EFI time service, -we have created the include/linux/rtc.h header file to contain only the -"public" API of the two drivers. The specifics of the legacy RTC are still -in include/linux/mc146818rtc.h. - - -3. Time of day service -====================== - -The part of the driver gives access to the time of day service of EFI. -Two ioctl()s, compatible with the legacy RTC calls: - - Read the CMOS clock:: - - ioctl(d, RTC_RD_TIME, &rtc); - - Write the CMOS clock:: - - ioctl(d, RTC_SET_TIME, &rtc); - -The rtc is a pointer to a data structure defined in rtc.h which is close -to a struct tm:: - - struct rtc_time { - int tm_sec; - int tm_min; - int tm_hour; - int tm_mday; - int tm_mon; - int tm_year; - int tm_wday; - int tm_yday; - int tm_isdst; - }; - -The driver takes care of converting back an forth between the EFI time and -this format. - -Those two ioctl()s can be exercised with the hwclock command: - -For reading:: - - # /sbin/hwclock --show - Mon Mar 6 15:32:32 2000 -0.910248 seconds - -For setting:: - - # /sbin/hwclock --systohc - -Root privileges are required to be able to set the time of day. - -4. Wakeup Alarm service -======================= - -EFI provides an API by which one can program when a machine should wakeup, -i.e. reboot. This is very different from the alarm provided by the legacy -RTC which is some kind of interval timer alarm. For this reason we don't use -the same ioctl()s to get access to the service. Instead we have -introduced 2 news ioctl()s to the interface of an RTC. - -We have added 2 new ioctl()s that are specific to the EFI driver: - - Read the current state of the alarm:: - - ioctl(d, RTC_WKALM_RD, &wkt) - - Set the alarm or change its status:: - - ioctl(d, RTC_WKALM_SET, &wkt) - -The wkt structure encapsulates a struct rtc_time + 2 extra fields to get -status information:: - - struct rtc_wkalrm { - - unsigned char enabled; /* =1 if alarm is enabled */ - unsigned char pending; /* =1 if alarm is pending */ - - struct rtc_time time; - } - -As of today, none of the existing user-level apps supports this feature. -However writing such a program should be hard by simply using those two -ioctl(). - -Root privileges are required to be able to set the alarm. - -5. References -============= - -Checkout the following Web site for more information on EFI: - -http://developer.intel.com/technology/efi/ diff --git a/Documentation/arch/ia64/err_inject.rst b/Documentation/arch/ia64/err_inject.rst deleted file mode 100644 index 900f71e93a..0000000000 --- a/Documentation/arch/ia64/err_inject.rst +++ /dev/null @@ -1,1067 +0,0 @@ -======================================== -IPF Machine Check (MC) error inject tool -======================================== - -IPF Machine Check (MC) error inject tool is used to inject MC -errors from Linux. The tool is a test bed for IPF MC work flow including -hardware correctable error handling, OS recoverable error handling, MC -event logging, etc. - -The tool includes two parts: a kernel driver and a user application -sample. The driver provides interface to PAL to inject error -and query error injection capabilities. The driver code is in -arch/ia64/kernel/err_inject.c. The application sample (shown below) -provides a combination of various errors and calls the driver's interface -(sysfs interface) to inject errors or query error injection capabilities. - -The tool can be used to test Intel IPF machine MC handling capabilities. -It's especially useful for people who can not access hardware MC injection -tool to inject error. It's also very useful to integrate with other -software test suits to do stressful testing on IPF. - -Below is a sample application as part of the whole tool. The sample -can be used as a working test tool. Or it can be expanded to include -more features. It also can be a integrated into a library or other user -application to have more thorough test. - -The sample application takes err.conf as error configuration input. GCC -compiles the code. After you install err_inject driver, you can run -this sample application to inject errors. - -Errata: Itanium 2 Processors Specification Update lists some errata against -the pal_mc_error_inject PAL procedure. The following err.conf has been tested -on latest Montecito PAL. - -err.conf:: - - #This is configuration file for err_inject_tool. - #The format of the each line is: - #cpu, loop, interval, err_type_info, err_struct_info, err_data_buffer - #where - # cpu: logical cpu number the error will be inject in. - # loop: times the error will be injected. - # interval: In second. every so often one error is injected. - # err_type_info, err_struct_info: PAL parameters. - # - #Note: All values are hex w/o or w/ 0x prefix. - - - #On cpu2, inject only total 0x10 errors, interval 5 seconds - #corrected, data cache, hier-2, physical addr(assigned by tool code). - #working on Montecito latest PAL. - 2, 10, 5, 4101, 95 - - #On cpu4, inject and consume total 0x10 errors, interval 5 seconds - #corrected, data cache, hier-2, physical addr(assigned by tool code). - #working on Montecito latest PAL. - 4, 10, 5, 4109, 95 - - #On cpu15, inject and consume total 0x10 errors, interval 5 seconds - #recoverable, DTR0, hier-2. - #working on Montecito latest PAL. - 0xf, 0x10, 5, 4249, 15 - -The sample application source code: - -err_injection_tool.c:: - - /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Copyright (C) 2006 Intel Co - * Fenghua Yu - * - */ - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - - #define MAX_FN_SIZE 256 - #define MAX_BUF_SIZE 256 - #define DATA_BUF_SIZE 256 - #define NR_CPUS 512 - #define MAX_TASK_NUM 2048 - #define MIN_INTERVAL 5 // seconds - #define ERR_DATA_BUFFER_SIZE 3 // Three 8-byte. - #define PARA_FIELD_NUM 5 - #define MASK_SIZE (NR_CPUS/64) - #define PATH_FORMAT "/sys/devices/system/cpu/cpu%d/err_inject/" - - int sched_setaffinity(pid_t pid, unsigned int len, unsigned long *mask); - - int verbose; - #define vbprintf if (verbose) printf - - int log_info(int cpu, const char *fmt, ...) - { - FILE *log; - char fn[MAX_FN_SIZE]; - char buf[MAX_BUF_SIZE]; - va_list args; - - sprintf(fn, "%d.log", cpu); - log=fopen(fn, "a+"); - if (log==NULL) { - perror("Error open:"); - return -1; - } - - va_start(args, fmt); - vprintf(fmt, args); - memset(buf, 0, MAX_BUF_SIZE); - vsprintf(buf, fmt, args); - va_end(args); - - fwrite(buf, sizeof(buf), 1, log); - fclose(log); - - return 0; - } - - typedef unsigned long u64; - typedef unsigned int u32; - - typedef union err_type_info_u { - struct { - u64 mode : 3, /* 0-2 */ - err_inj : 3, /* 3-5 */ - err_sev : 2, /* 6-7 */ - err_struct : 5, /* 8-12 */ - struct_hier : 3, /* 13-15 */ - reserved : 48; /* 16-63 */ - } err_type_info_u; - u64 err_type_info; - } err_type_info_t; - - typedef union err_struct_info_u { - struct { - u64 siv : 1, /* 0 */ - c_t : 2, /* 1-2 */ - cl_p : 3, /* 3-5 */ - cl_id : 3, /* 6-8 */ - cl_dp : 1, /* 9 */ - reserved1 : 22, /* 10-31 */ - tiv : 1, /* 32 */ - trigger : 4, /* 33-36 */ - trigger_pl : 3, /* 37-39 */ - reserved2 : 24; /* 40-63 */ - } err_struct_info_cache; - struct { - u64 siv : 1, /* 0 */ - tt : 2, /* 1-2 */ - tc_tr : 2, /* 3-4 */ - tr_slot : 8, /* 5-12 */ - reserved1 : 19, /* 13-31 */ - tiv : 1, /* 32 */ - trigger : 4, /* 33-36 */ - trigger_pl : 3, /* 37-39 */ - reserved2 : 24; /* 40-63 */ - } err_struct_info_tlb; - struct { - u64 siv : 1, /* 0 */ - regfile_id : 4, /* 1-4 */ - reg_num : 7, /* 5-11 */ - reserved1 : 20, /* 12-31 */ - tiv : 1, /* 32 */ - trigger : 4, /* 33-36 */ - trigger_pl : 3, /* 37-39 */ - reserved2 : 24; /* 40-63 */ - } err_struct_info_register; - struct { - u64 reserved; - } err_struct_info_bus_processor_interconnect; - u64 err_struct_info; - } err_struct_info_t; - - typedef union err_data_buffer_u { - struct { - u64 trigger_addr; /* 0-63 */ - u64 inj_addr; /* 64-127 */ - u64 way : 5, /* 128-132 */ - index : 20, /* 133-152 */ - : 39; /* 153-191 */ - } err_data_buffer_cache; - struct { - u64 trigger_addr; /* 0-63 */ - u64 inj_addr; /* 64-127 */ - u64 way : 5, /* 128-132 */ - index : 20, /* 133-152 */ - reserved : 39; /* 153-191 */ - } err_data_buffer_tlb; - struct { - u64 trigger_addr; /* 0-63 */ - } err_data_buffer_register; - struct { - u64 reserved; /* 0-63 */ - } err_data_buffer_bus_processor_interconnect; - u64 err_data_buffer[ERR_DATA_BUFFER_SIZE]; - } err_data_buffer_t; - - typedef union capabilities_u { - struct { - u64 i : 1, - d : 1, - rv : 1, - tag : 1, - data : 1, - mesi : 1, - dp : 1, - reserved1 : 3, - pa : 1, - va : 1, - wi : 1, - reserved2 : 20, - trigger : 1, - trigger_pl : 1, - reserved3 : 30; - } capabilities_cache; - struct { - u64 d : 1, - i : 1, - rv : 1, - tc : 1, - tr : 1, - reserved1 : 27, - trigger : 1, - trigger_pl : 1, - reserved2 : 30; - } capabilities_tlb; - struct { - u64 gr_b0 : 1, - gr_b1 : 1, - fr : 1, - br : 1, - pr : 1, - ar : 1, - cr : 1, - rr : 1, - pkr : 1, - dbr : 1, - ibr : 1, - pmc : 1, - pmd : 1, - reserved1 : 3, - regnum : 1, - reserved2 : 15, - trigger : 1, - trigger_pl : 1, - reserved3 : 30; - } capabilities_register; - struct { - u64 reserved; - } capabilities_bus_processor_interconnect; - } capabilities_t; - - typedef struct resources_s { - u64 ibr0 : 1, - ibr2 : 1, - ibr4 : 1, - ibr6 : 1, - dbr0 : 1, - dbr2 : 1, - dbr4 : 1, - dbr6 : 1, - reserved : 48; - } resources_t; - - - long get_page_size(void) - { - long page_size=sysconf(_SC_PAGESIZE); - return page_size; - } - - #define PAGE_SIZE (get_page_size()==-1?0x4000:get_page_size()) - #define SHM_SIZE (2*PAGE_SIZE*NR_CPUS) - #define SHM_VA 0x2000000100000000 - - int shmid; - void *shmaddr; - - int create_shm(void) - { - key_t key; - char fn[MAX_FN_SIZE]; - - /* cpu0 is always existing */ - sprintf(fn, PATH_FORMAT, 0); - if ((key = ftok(fn, 's')) == -1) { - perror("ftok"); - return -1; - } - - shmid = shmget(key, SHM_SIZE, 0644 | IPC_CREAT); - if (shmid == -1) { - if (errno==EEXIST) { - shmid = shmget(key, SHM_SIZE, 0); - if (shmid == -1) { - perror("shmget"); - return -1; - } - } - else { - perror("shmget"); - return -1; - } - } - vbprintf("shmid=%d", shmid); - - /* connect to the segment: */ - shmaddr = shmat(shmid, (void *)SHM_VA, 0); - if (shmaddr == (void*)-1) { - perror("shmat"); - return -1; - } - - memset(shmaddr, 0, SHM_SIZE); - mlock(shmaddr, SHM_SIZE); - - return 0; - } - - int free_shm() - { - munlock(shmaddr, SHM_SIZE); - shmdt(shmaddr); - semctl(shmid, 0, IPC_RMID); - - return 0; - } - - #ifdef _SEM_SEMUN_UNDEFINED - union semun - { - int val; - struct semid_ds *buf; - unsigned short int *array; - struct seminfo *__buf; - }; - #endif - - u32 mode=1; /* 1: physical mode; 2: virtual mode. */ - int one_lock=1; - key_t key[NR_CPUS]; - int semid[NR_CPUS]; - - int create_sem(int cpu) - { - union semun arg; - char fn[MAX_FN_SIZE]; - int sid; - - sprintf(fn, PATH_FORMAT, cpu); - sprintf(fn, "%s/%s", fn, "err_type_info"); - if ((key[cpu] = ftok(fn, 'e')) == -1) { - perror("ftok"); - return -1; - } - - if (semid[cpu]!=0) - return 0; - - /* clear old semaphore */ - if ((sid = semget(key[cpu], 1, 0)) != -1) - semctl(sid, 0, IPC_RMID); - - /* get one semaphore */ - if ((semid[cpu] = semget(key[cpu], 1, IPC_CREAT | IPC_EXCL)) == -1) { - perror("semget"); - printf("Please remove semaphore with key=0x%lx, then run the tool.\n", - (u64)key[cpu]); - return -1; - } - - vbprintf("semid[%d]=0x%lx, key[%d]=%lx\n",cpu,(u64)semid[cpu],cpu, - (u64)key[cpu]); - /* initialize the semaphore to 1: */ - arg.val = 1; - if (semctl(semid[cpu], 0, SETVAL, arg) == -1) { - perror("semctl"); - return -1; - } - - return 0; - } - - static int lock(int cpu) - { - struct sembuf lock; - - lock.sem_num = cpu; - lock.sem_op = 1; - semop(semid[cpu], &lock, 1); - - return 0; - } - - static int unlock(int cpu) - { - struct sembuf unlock; - - unlock.sem_num = cpu; - unlock.sem_op = -1; - semop(semid[cpu], &unlock, 1); - - return 0; - } - - void free_sem(int cpu) - { - semctl(semid[cpu], 0, IPC_RMID); - } - - int wr_multi(char *fn, unsigned long *data, int size) - { - int fd; - char buf[MAX_BUF_SIZE]; - int ret; - - if (size==1) - sprintf(buf, "%lx", *data); - else if (size==3) - sprintf(buf, "%lx,%lx,%lx", data[0], data[1], data[2]); - else { - fprintf(stderr,"write to file with wrong size!\n"); - return -1; - } - - fd=open(fn, O_RDWR); - if (!fd) { - perror("Error:"); - return -1; - } - ret=write(fd, buf, sizeof(buf)); - close(fd); - return ret; - } - - int wr(char *fn, unsigned long data) - { - return wr_multi(fn, &data, 1); - } - - int rd(char *fn, unsigned long *data) - { - int fd; - char buf[MAX_BUF_SIZE]; - - fd=open(fn, O_RDONLY); - if (fd<0) { - perror("Error:"); - return -1; - } - read(fd, buf, MAX_BUF_SIZE); - *data=strtoul(buf, NULL, 16); - close(fd); - return 0; - } - - int rd_status(char *path, int *status) - { - char fn[MAX_FN_SIZE]; - sprintf(fn, "%s/status", path); - if (rd(fn, (u64*)status)<0) { - perror("status reading error.\n"); - return -1; - } - - return 0; - } - - int rd_capabilities(char *path, u64 *capabilities) - { - char fn[MAX_FN_SIZE]; - sprintf(fn, "%s/capabilities", path); - if (rd(fn, capabilities)<0) { - perror("capabilities reading error.\n"); - return -1; - } - - return 0; - } - - int rd_all(char *path) - { - unsigned long err_type_info, err_struct_info, err_data_buffer; - int status; - unsigned long capabilities, resources; - char fn[MAX_FN_SIZE]; - - sprintf(fn, "%s/err_type_info", path); - if (rd(fn, &err_type_info)<0) { - perror("err_type_info reading error.\n"); - return -1; - } - printf("err_type_info=%lx\n", err_type_info); - - sprintf(fn, "%s/err_struct_info", path); - if (rd(fn, &err_struct_info)<0) { - perror("err_struct_info reading error.\n"); - return -1; - } - printf("err_struct_info=%lx\n", err_struct_info); - - sprintf(fn, "%s/err_data_buffer", path); - if (rd(fn, &err_data_buffer)<0) { - perror("err_data_buffer reading error.\n"); - return -1; - } - printf("err_data_buffer=%lx\n", err_data_buffer); - - sprintf(fn, "%s/status", path); - if (rd("status", (u64*)&status)<0) { - perror("status reading error.\n"); - return -1; - } - printf("status=%d\n", status); - - sprintf(fn, "%s/capabilities", path); - if (rd(fn,&capabilities)<0) { - perror("capabilities reading error.\n"); - return -1; - } - printf("capabilities=%lx\n", capabilities); - - sprintf(fn, "%s/resources", path); - if (rd(fn, &resources)<0) { - perror("resources reading error.\n"); - return -1; - } - printf("resources=%lx\n", resources); - - return 0; - } - - int query_capabilities(char *path, err_type_info_t err_type_info, - u64 *capabilities) - { - char fn[MAX_FN_SIZE]; - err_struct_info_t err_struct_info; - err_data_buffer_t err_data_buffer; - - err_struct_info.err_struct_info=0; - memset(err_data_buffer.err_data_buffer, -1, ERR_DATA_BUFFER_SIZE*8); - - sprintf(fn, "%s/err_type_info", path); - wr(fn, err_type_info.err_type_info); - sprintf(fn, "%s/err_struct_info", path); - wr(fn, 0x0); - sprintf(fn, "%s/err_data_buffer", path); - wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE); - - // Fire pal_mc_error_inject procedure. - sprintf(fn, "%s/call_start", path); - wr(fn, mode); - - if (rd_capabilities(path, capabilities)<0) - return -1; - - return 0; - } - - int query_all_capabilities() - { - int status; - err_type_info_t err_type_info; - int err_sev, err_struct, struct_hier; - int cap=0; - u64 capabilities; - char path[MAX_FN_SIZE]; - - err_type_info.err_type_info=0; // Initial - err_type_info.err_type_info_u.mode=0; // Query mode; - err_type_info.err_type_info_u.err_inj=0; - - printf("All capabilities implemented in pal_mc_error_inject:\n"); - sprintf(path, PATH_FORMAT ,0); - for (err_sev=0;err_sev<3;err_sev++) - for (err_struct=0;err_struct<5;err_struct++) - for (struct_hier=0;struct_hier<5;struct_hier++) - { - status=-1; - capabilities=0; - err_type_info.err_type_info_u.err_sev=err_sev; - err_type_info.err_type_info_u.err_struct=err_struct; - err_type_info.err_type_info_u.struct_hier=struct_hier; - - if (query_capabilities(path, err_type_info, &capabilities)<0) - continue; - - if (rd_status(path, &status)<0) - continue; - - if (status==0) { - cap=1; - printf("For err_sev=%d, err_struct=%d, struct_hier=%d: ", - err_sev, err_struct, struct_hier); - printf("capabilities 0x%lx\n", capabilities); - } - } - if (!cap) { - printf("No capabilities supported.\n"); - return 0; - } - - return 0; - } - - int err_inject(int cpu, char *path, err_type_info_t err_type_info, - err_struct_info_t err_struct_info, - err_data_buffer_t err_data_buffer) - { - int status; - char fn[MAX_FN_SIZE]; - - log_info(cpu, "err_type_info=%lx, err_struct_info=%lx, ", - err_type_info.err_type_info, - err_struct_info.err_struct_info); - log_info(cpu,"err_data_buffer=[%lx,%lx,%lx]\n", - err_data_buffer.err_data_buffer[0], - err_data_buffer.err_data_buffer[1], - err_data_buffer.err_data_buffer[2]); - sprintf(fn, "%s/err_type_info", path); - wr(fn, err_type_info.err_type_info); - sprintf(fn, "%s/err_struct_info", path); - wr(fn, err_struct_info.err_struct_info); - sprintf(fn, "%s/err_data_buffer", path); - wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE); - - // Fire pal_mc_error_inject procedure. - sprintf(fn, "%s/call_start", path); - wr(fn,mode); - - if (rd_status(path, &status)<0) { - vbprintf("fail: read status\n"); - return -100; - } - - if (status!=0) { - log_info(cpu, "fail: status=%d\n", status); - return status; - } - - return status; - } - - static int construct_data_buf(char *path, err_type_info_t err_type_info, - err_struct_info_t err_struct_info, - err_data_buffer_t *err_data_buffer, - void *va1) - { - char fn[MAX_FN_SIZE]; - u64 virt_addr=0, phys_addr=0; - - vbprintf("va1=%lx\n", (u64)va1); - memset(&err_data_buffer->err_data_buffer_cache, 0, ERR_DATA_BUFFER_SIZE*8); - - switch (err_type_info.err_type_info_u.err_struct) { - case 1: // Cache - switch (err_struct_info.err_struct_info_cache.cl_id) { - case 1: //Virtual addr - err_data_buffer->err_data_buffer_cache.inj_addr=(u64)va1; - break; - case 2: //Phys addr - sprintf(fn, "%s/virtual_to_phys", path); - virt_addr=(u64)va1; - if (wr(fn,virt_addr)<0) - return -1; - rd(fn, &phys_addr); - err_data_buffer->err_data_buffer_cache.inj_addr=phys_addr; - break; - default: - printf("Not supported cl_id\n"); - break; - } - break; - case 2: // TLB - break; - case 3: // Register file - break; - case 4: // Bus/system interconnect - default: - printf("Not supported err_struct\n"); - break; - } - - return 0; - } - - typedef struct { - u64 cpu; - u64 loop; - u64 interval; - u64 err_type_info; - u64 err_struct_info; - u64 err_data_buffer[ERR_DATA_BUFFER_SIZE]; - } parameters_t; - - parameters_t line_para; - int para; - - static int empty_data_buffer(u64 *err_data_buffer) - { - int empty=1; - int i; - - for (i=0;iMIN_INTERVAL - ?interval:MIN_INTERVAL; - parameters[num].err_type_info=err_type_info_conf; - parameters[num].err_struct_info=err_struct_info_conf; - memcpy(parameters[num++].err_data_buffer, - err_data_buffer_conf,ERR_DATA_BUFFER_SIZE*8) ; - - if (num>=MAX_TASK_NUM) - break; - } - } - else { - parameters[0].cpu=line_para.cpu; - parameters[0].loop=line_para.loop; - parameters[0].interval= line_para.interval>MIN_INTERVAL - ?line_para.interval:MIN_INTERVAL; - parameters[0].err_type_info=line_para.err_type_info; - parameters[0].err_struct_info=line_para.err_struct_info; - memcpy(parameters[0].err_data_buffer, - line_para.err_data_buffer,ERR_DATA_BUFFER_SIZE*8) ; - - num=1; - } - - /* Create semaphore: If one_lock, one semaphore for all processors. - Otherwise, one semaphore for each processor. */ - if (one_lock) { - if (create_sem(0)) { - printf("Can not create semaphore...exit\n"); - free_sem(0); - return -1; - } - } - else { - for (i=0;i - -Using the "epc" instruction effectively introduces a new mode of -execution to the ia64 linux kernel. We call this mode the -"fsys-mode". To recap, the normal states of execution are: - - - kernel mode: - Both the register stack and the memory stack have been - switched over to kernel memory. The user-level state is saved - in a pt-regs structure at the top of the kernel memory stack. - - - user mode: - Both the register stack and the kernel stack are in - user memory. The user-level state is contained in the - CPU registers. - - - bank 0 interruption-handling mode: - This is the non-interruptible state which all - interruption-handlers start execution in. The user-level - state remains in the CPU registers and some kernel state may - be stored in bank 0 of registers r16-r31. - -In contrast, fsys-mode has the following special properties: - - - execution is at privilege level 0 (most-privileged) - - - CPU registers may contain a mixture of user-level and kernel-level - state (it is the responsibility of the kernel to ensure that no - security-sensitive kernel-level state is leaked back to - user-level) - - - execution is interruptible and preemptible (an fsys-mode handler - can disable interrupts and avoid all other interruption-sources - to avoid preemption) - - - neither the memory-stack nor the register-stack can be trusted while - in fsys-mode (they point to the user-level stacks, which may - be invalid, or completely bogus addresses) - -In summary, fsys-mode is much more similar to running in user-mode -than it is to running in kernel-mode. Of course, given that the -privilege level is at level 0, this means that fsys-mode requires some -care (see below). - - -How to tell fsys-mode -===================== - -Linux operates in fsys-mode when (a) the privilege level is 0 (most -privileged) and (b) the stacks have NOT been switched to kernel memory -yet. For convenience, the header file provides -three macros:: - - user_mode(regs) - user_stack(task,regs) - fsys_mode(task,regs) - -The "regs" argument is a pointer to a pt_regs structure. The "task" -argument is a pointer to the task structure to which the "regs" -pointer belongs to. user_mode() returns TRUE if the CPU state pointed -to by "regs" was executing in user mode (privilege level 3). -user_stack() returns TRUE if the state pointed to by "regs" was -executing on the user-level stack(s). Finally, fsys_mode() returns -TRUE if the CPU state pointed to by "regs" was executing in fsys-mode. -The fsys_mode() macro is equivalent to the expression:: - - !user_mode(regs) && user_stack(task,regs) - -How to write an fsyscall handler -================================ - -The file arch/ia64/kernel/fsys.S contains a table of fsyscall-handlers -(fsyscall_table). This table contains one entry for each system call. -By default, a system call is handled by fsys_fallback_syscall(). This -routine takes care of entering (full) kernel mode and calling the -normal Linux system call handler. For performance-critical system -calls, it is possible to write a hand-tuned fsyscall_handler. For -example, fsys.S contains fsys_getpid(), which is a hand-tuned version -of the getpid() system call. - -The entry and exit-state of an fsyscall handler is as follows: - -Machine state on entry to fsyscall handler ------------------------------------------- - - ========= =============================================================== - r10 0 - r11 saved ar.pfs (a user-level value) - r15 system call number - r16 "current" task pointer (in normal kernel-mode, this is in r13) - r32-r39 system call arguments - b6 return address (a user-level value) - ar.pfs previous frame-state (a user-level value) - PSR.be cleared to zero (i.e., little-endian byte order is in effect) - - all other registers may contain values passed in from user-mode - ========= =============================================================== - -Required machine state on exit to fsyscall handler --------------------------------------------------- - - ========= =========================================================== - r11 saved ar.pfs (as passed into the fsyscall handler) - r15 system call number (as passed into the fsyscall handler) - r32-r39 system call arguments (as passed into the fsyscall handler) - b6 return address (as passed into the fsyscall handler) - ar.pfs previous frame-state (as passed into the fsyscall handler) - ========= =========================================================== - -Fsyscall handlers can execute with very little overhead, but with that -speed comes a set of restrictions: - - * Fsyscall-handlers MUST check for any pending work in the flags - member of the thread-info structure and if any of the - TIF_ALLWORK_MASK flags are set, the handler needs to fall back on - doing a full system call (by calling fsys_fallback_syscall). - - * Fsyscall-handlers MUST preserve incoming arguments (r32-r39, r11, - r15, b6, and ar.pfs) because they will be needed in case of a - system call restart. Of course, all "preserved" registers also - must be preserved, in accordance to the normal calling conventions. - - * Fsyscall-handlers MUST check argument registers for containing a - NaT value before using them in any way that could trigger a - NaT-consumption fault. If a system call argument is found to - contain a NaT value, an fsyscall-handler may return immediately - with r8=EINVAL, r10=-1. - - * Fsyscall-handlers MUST NOT use the "alloc" instruction or perform - any other operation that would trigger mandatory RSE - (register-stack engine) traffic. - - * Fsyscall-handlers MUST NOT write to any stacked registers because - it is not safe to assume that user-level called a handler with the - proper number of arguments. - - * Fsyscall-handlers need to be careful when accessing per-CPU variables: - unless proper safe-guards are taken (e.g., interruptions are avoided), - execution may be pre-empted and resumed on another CPU at any given - time. - - * Fsyscall-handlers must be careful not to leak sensitive kernel' - information back to user-level. In particular, before returning to - user-level, care needs to be taken to clear any scratch registers - that could contain sensitive information (note that the current - task pointer is not considered sensitive: it's already exposed - through ar.k6). - - * Fsyscall-handlers MUST NOT access user-memory without first - validating access-permission (this can be done typically via - probe.r.fault and/or probe.w.fault) and without guarding against - memory access exceptions (this can be done with the EX() macros - defined by asmmacro.h). - -The above restrictions may seem draconian, but remember that it's -possible to trade off some of the restrictions by paying a slightly -higher overhead. For example, if an fsyscall-handler could benefit -from the shadow register bank, it could temporarily disable PSR.i and -PSR.ic, switch to bank 0 (bsw.0) and then use the shadow registers as -needed. In other words, following the above rules yields extremely -fast system call execution (while fully preserving system call -semantics), but there is also a lot of flexibility in handling more -complicated cases. - -Signal handling -=============== - -The delivery of (asynchronous) signals must be delayed until fsys-mode -is exited. This is accomplished with the help of the lower-privilege -transfer trap: arch/ia64/kernel/process.c:do_notify_resume_user() -checks whether the interrupted task was in fsys-mode and, if so, sets -PSR.lp and returns immediately. When fsys-mode is exited via the -"br.ret" instruction that lowers the privilege level, a trap will -occur. The trap handler clears PSR.lp again and returns immediately. -The kernel exit path then checks for and delivers any pending signals. - -PSR Handling -============ - -The "epc" instruction doesn't change the contents of PSR at all. This -is in contrast to a regular interruption, which clears almost all -bits. Because of that, some care needs to be taken to ensure things -work as expected. The following discussion describes how each PSR bit -is handled. - -======= ======================================================================= -PSR.be Cleared when entering fsys-mode. A srlz.d instruction is used - to ensure the CPU is in little-endian mode before the first - load/store instruction is executed. PSR.be is normally NOT - restored upon return from an fsys-mode handler. In other - words, user-level code must not rely on PSR.be being preserved - across a system call. -PSR.up Unchanged. -PSR.ac Unchanged. -PSR.mfl Unchanged. Note: fsys-mode handlers must not write-registers! -PSR.mfh Unchanged. Note: fsys-mode handlers must not write-registers! -PSR.ic Unchanged. Note: fsys-mode handlers can clear the bit, if needed. -PSR.i Unchanged. Note: fsys-mode handlers can clear the bit, if needed. -PSR.pk Unchanged. -PSR.dt Unchanged. -PSR.dfl Unchanged. Note: fsys-mode handlers must not write-registers! -PSR.dfh Unchanged. Note: fsys-mode handlers must not write-registers! -PSR.sp Unchanged. -PSR.pp Unchanged. -PSR.di Unchanged. -PSR.si Unchanged. -PSR.db Unchanged. The kernel prevents user-level from setting a hardware - breakpoint that triggers at any privilege level other than - 3 (user-mode). -PSR.lp Unchanged. -PSR.tb Lazy redirect. If a taken-branch trap occurs while in - fsys-mode, the trap-handler modifies the saved machine state - such that execution resumes in the gate page at - syscall_via_break(), with privilege level 3. Note: the - taken branch would occur on the branch invoking the - fsyscall-handler, at which point, by definition, a syscall - restart is still safe. If the system call number is invalid, - the fsys-mode handler will return directly to user-level. This - return will trigger a taken-branch trap, but since the trap is - taken _after_ restoring the privilege level, the CPU has already - left fsys-mode, so no special treatment is needed. -PSR.rt Unchanged. -PSR.cpl Cleared to 0. -PSR.is Unchanged (guaranteed to be 0 on entry to the gate page). -PSR.mc Unchanged. -PSR.it Unchanged (guaranteed to be 1). -PSR.id Unchanged. Note: the ia64 linux kernel never sets this bit. -PSR.da Unchanged. Note: the ia64 linux kernel never sets this bit. -PSR.dd Unchanged. Note: the ia64 linux kernel never sets this bit. -PSR.ss Lazy redirect. If set, "epc" will cause a Single Step Trap to - be taken. The trap handler then modifies the saved machine - state such that execution resumes in the gate page at - syscall_via_break(), with privilege level 3. -PSR.ri Unchanged. -PSR.ed Unchanged. Note: This bit could only have an effect if an fsys-mode - handler performed a speculative load that gets NaTted. If so, this - would be the normal & expected behavior, so no special treatment is - needed. -PSR.bn Unchanged. Note: fsys-mode handlers may clear the bit, if needed. - Doing so requires clearing PSR.i and PSR.ic as well. -PSR.ia Unchanged. Note: the ia64 linux kernel never sets this bit. -======= ======================================================================= - -Using fast system calls -======================= - -To use fast system calls, userspace applications need simply call -__kernel_syscall_via_epc(). For example - --- example fgettimeofday() call -- - --- fgettimeofday.S -- - -:: - - #include - - GLOBAL_ENTRY(fgettimeofday) - .prologue - .save ar.pfs, r11 - mov r11 = ar.pfs - .body - - mov r2 = 0xa000000000020660;; // gate address - // found by inspection of System.map for the - // __kernel_syscall_via_epc() function. See - // below for how to do this for real. - - mov b7 = r2 - mov r15 = 1087 // gettimeofday syscall - ;; - br.call.sptk.many b6 = b7 - ;; - - .restore sp - - mov ar.pfs = r11 - br.ret.sptk.many rp;; // return to caller - END(fgettimeofday) - --- end fgettimeofday.S -- - -In reality, getting the gate address is accomplished by two extra -values passed via the ELF auxiliary vector (include/asm-ia64/elf.h) - - * AT_SYSINFO : is the address of __kernel_syscall_via_epc() - * AT_SYSINFO_EHDR : is the address of the kernel gate ELF DSO - -The ELF DSO is a pre-linked library that is mapped in by the kernel at -the gate page. It is a proper ELF shared object so, with a dynamic -loader that recognises the library, you should be able to make calls to -the exported functions within it as with any other shared library. -AT_SYSINFO points into the kernel DSO at the -__kernel_syscall_via_epc() function for historical reasons (it was -used before the kernel DSO) and as a convenience. diff --git a/Documentation/arch/ia64/ia64.rst b/Documentation/arch/ia64/ia64.rst deleted file mode 100644 index b725019a94..0000000000 --- a/Documentation/arch/ia64/ia64.rst +++ /dev/null @@ -1,49 +0,0 @@ -=========================================== -Linux kernel release for the IA-64 Platform -=========================================== - - These are the release notes for Linux since version 2.4 for IA-64 - platform. This document provides information specific to IA-64 - ONLY, to get additional information about the Linux kernel also - read the original Linux README provided with the kernel. - -Installing the Kernel -===================== - - - IA-64 kernel installation is the same as the other platforms, see - original README for details. - - -Software Requirements -===================== - - Compiling and running this kernel requires an IA-64 compliant GCC - compiler. And various software packages also compiled with an - IA-64 compliant GCC compiler. - - -Configuring the Kernel -====================== - - Configuration is the same, see original README for details. - - -Compiling the Kernel: - - - Compiling this kernel doesn't differ from other platform so read - the original README for details BUT make sure you have an IA-64 - compliant GCC compiler. - -IA-64 Specifics -=============== - - - General issues: - - * Hardly any performance tuning has been done. Obvious targets - include the library routines (IP checksum, etc.). Less - obvious targets include making sure we don't flush the TLB - needlessly, etc. - - * SMP locks cleanup/optimization - - * IA32 support. Currently experimental. It mostly works. diff --git a/Documentation/arch/ia64/index.rst b/Documentation/arch/ia64/index.rst deleted file mode 100644 index 761f2154df..0000000000 --- a/Documentation/arch/ia64/index.rst +++ /dev/null @@ -1,19 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -================== -IA-64 Architecture -================== - -.. toctree:: - :maxdepth: 1 - - ia64 - aliasing - efirtc - err_inject - fsys - irq-redir - mca - serial - - features diff --git a/Documentation/arch/ia64/irq-redir.rst b/Documentation/arch/ia64/irq-redir.rst deleted file mode 100644 index 6bbbbe4f73..0000000000 --- a/Documentation/arch/ia64/irq-redir.rst +++ /dev/null @@ -1,80 +0,0 @@ -============================== -IRQ affinity on IA64 platforms -============================== - -07.01.2002, Erich Focht - - -By writing to /proc/irq/IRQ#/smp_affinity the interrupt routing can be -controlled. The behavior on IA64 platforms is slightly different from -that described in Documentation/core-api/irq/irq-affinity.rst for i386 systems. - -Because of the usage of SAPIC mode and physical destination mode the -IRQ target is one particular CPU and cannot be a mask of several -CPUs. Only the first non-zero bit is taken into account. - - -Usage examples -============== - -The target CPU has to be specified as a hexadecimal CPU mask. The -first non-zero bit is the selected CPU. This format has been kept for -compatibility reasons with i386. - -Set the delivery mode of interrupt 41 to fixed and route the -interrupts to CPU #3 (logical CPU number) (2^3=0x08):: - - echo "8" >/proc/irq/41/smp_affinity - -Set the default route for IRQ number 41 to CPU 6 in lowest priority -delivery mode (redirectable):: - - echo "r 40" >/proc/irq/41/smp_affinity - -The output of the command:: - - cat /proc/irq/IRQ#/smp_affinity - -gives the target CPU mask for the specified interrupt vector. If the CPU -mask is preceded by the character "r", the interrupt is redirectable -(i.e. lowest priority mode routing is used), otherwise its route is -fixed. - - - -Initialization and default behavior -=================================== - -If the platform features IRQ redirection (info provided by SAL) all -IO-SAPIC interrupts are initialized with CPU#0 as their default target -and the routing is the so called "lowest priority mode" (actually -fixed SAPIC mode with hint). The XTP chipset registers are used as hints -for the IRQ routing. Currently in Linux XTP registers can have three -values: - - - minimal for an idle task, - - normal if any other task runs, - - maximal if the CPU is going to be switched off. - -The IRQ is routed to the CPU with lowest XTP register value, the -search begins at the default CPU. Therefore most of the interrupts -will be handled by CPU #0. - -If the platform doesn't feature interrupt redirection IOSAPIC fixed -routing is used. The target CPUs are distributed in a round robin -manner. IRQs will be routed only to the selected target CPUs. Check -with:: - - cat /proc/interrupts - - - -Comments -======== - -On large (multi-node) systems it is recommended to route the IRQs to -the node to which the corresponding device is connected. -For systems like the NEC AzusA we get IRQ node-affinity for free. This -is because usually the chipsets on each node redirect the interrupts -only to their own CPUs (as they cannot see the XTP registers on the -other nodes). diff --git a/Documentation/arch/ia64/mca.rst b/Documentation/arch/ia64/mca.rst deleted file mode 100644 index 08270bba44..0000000000 --- a/Documentation/arch/ia64/mca.rst +++ /dev/null @@ -1,198 +0,0 @@ -============================================================= -An ad-hoc collection of notes on IA64 MCA and INIT processing -============================================================= - -Feel free to update it with notes about any area that is not clear. - ---- - -MCA/INIT are completely asynchronous. They can occur at any time, when -the OS is in any state. Including when one of the cpus is already -holding a spinlock. Trying to get any lock from MCA/INIT state is -asking for deadlock. Also the state of structures that are protected -by locks is indeterminate, including linked lists. - ---- - -The complicated ia64 MCA process. All of this is mandated by Intel's -specification for ia64 SAL, error recovery and unwind, it is not as -if we have a choice here. - -* MCA occurs on one cpu, usually due to a double bit memory error. - This is the monarch cpu. - -* SAL sends an MCA rendezvous interrupt (which is a normal interrupt) - to all the other cpus, the slaves. - -* Slave cpus that receive the MCA interrupt call down into SAL, they - end up spinning disabled while the MCA is being serviced. - -* If any slave cpu was already spinning disabled when the MCA occurred - then it cannot service the MCA interrupt. SAL waits ~20 seconds then - sends an unmaskable INIT event to the slave cpus that have not - already rendezvoused. - -* Because MCA/INIT can be delivered at any time, including when the cpu - is down in PAL in physical mode, the registers at the time of the - event are _completely_ undefined. In particular the MCA/INIT - handlers cannot rely on the thread pointer, PAL physical mode can - (and does) modify TP. It is allowed to do that as long as it resets - TP on return. However MCA/INIT events expose us to these PAL - internal TP changes. Hence curr_task(). - -* If an MCA/INIT event occurs while the kernel was running (not user - space) and the kernel has called PAL then the MCA/INIT handler cannot - assume that the kernel stack is in a fit state to be used. Mainly - because PAL may or may not maintain the stack pointer internally. - Because the MCA/INIT handlers cannot trust the kernel stack, they - have to use their own, per-cpu stacks. The MCA/INIT stacks are - preformatted with just enough task state to let the relevant handlers - do their job. - -* Unlike most other architectures, the ia64 struct task is embedded in - the kernel stack[1]. So switching to a new kernel stack means that - we switch to a new task as well. Because various bits of the kernel - assume that current points into the struct task, switching to a new - stack also means a new value for current. - -* Once all slaves have rendezvoused and are spinning disabled, the - monarch is entered. The monarch now tries to diagnose the problem - and decide if it can recover or not. - -* Part of the monarch's job is to look at the state of all the other - tasks. The only way to do that on ia64 is to call the unwinder, - as mandated by Intel. - -* The starting point for the unwind depends on whether a task is - running or not. That is, whether it is on a cpu or is blocked. The - monarch has to determine whether or not a task is on a cpu before it - knows how to start unwinding it. The tasks that received an MCA or - INIT event are no longer running, they have been converted to blocked - tasks. But (and its a big but), the cpus that received the MCA - rendezvous interrupt are still running on their normal kernel stacks! - -* To distinguish between these two cases, the monarch must know which - tasks are on a cpu and which are not. Hence each slave cpu that - switches to an MCA/INIT stack, registers its new stack using - set_curr_task(), so the monarch can tell that the _original_ task is - no longer running on that cpu. That gives us a decent chance of - getting a valid backtrace of the _original_ task. - -* MCA/INIT can be nested, to a depth of 2 on any cpu. In the case of a - nested error, we want diagnostics on the MCA/INIT handler that - failed, not on the task that was originally running. Again this - requires set_curr_task() so the MCA/INIT handlers can register their - own stack as running on that cpu. Then a recursive error gets a - trace of the failing handler's "task". - -[1] - My (Keith Owens) original design called for ia64 to separate its - struct task and the kernel stacks. Then the MCA/INIT data would be - chained stacks like i386 interrupt stacks. But that required - radical surgery on the rest of ia64, plus extra hard wired TLB - entries with its associated performance degradation. David - Mosberger vetoed that approach. Which meant that separate kernel - stacks meant separate "tasks" for the MCA/INIT handlers. - ---- - -INIT is less complicated than MCA. Pressing the nmi button or using -the equivalent command on the management console sends INIT to all -cpus. SAL picks one of the cpus as the monarch and the rest are -slaves. All the OS INIT handlers are entered at approximately the same -time. The OS monarch prints the state of all tasks and returns, after -which the slaves return and the system resumes. - -At least that is what is supposed to happen. Alas there are broken -versions of SAL out there. Some drive all the cpus as monarchs. Some -drive them all as slaves. Some drive one cpu as monarch, wait for that -cpu to return from the OS then drive the rest as slaves. Some versions -of SAL cannot even cope with returning from the OS, they spin inside -SAL on resume. The OS INIT code has workarounds for some of these -broken SAL symptoms, but some simply cannot be fixed from the OS side. - ---- - -The scheduler hooks used by ia64 (curr_task, set_curr_task) are layer -violations. Unfortunately MCA/INIT start off as massive layer -violations (can occur at _any_ time) and they build from there. - -At least ia64 makes an attempt at recovering from hardware errors, but -it is a difficult problem because of the asynchronous nature of these -errors. When processing an unmaskable interrupt we sometimes need -special code to cope with our inability to take any locks. - ---- - -How is ia64 MCA/INIT different from x86 NMI? - -* x86 NMI typically gets delivered to one cpu. MCA/INIT gets sent to - all cpus. - -* x86 NMI cannot be nested. MCA/INIT can be nested, to a depth of 2 - per cpu. - -* x86 has a separate struct task which points to one of multiple kernel - stacks. ia64 has the struct task embedded in the single kernel - stack, so switching stack means switching task. - -* x86 does not call the BIOS so the NMI handler does not have to worry - about any registers having changed. MCA/INIT can occur while the cpu - is in PAL in physical mode, with undefined registers and an undefined - kernel stack. - -* i386 backtrace is not very sensitive to whether a process is running - or not. ia64 unwind is very, very sensitive to whether a process is - running or not. - ---- - -What happens when MCA/INIT is delivered what a cpu is running user -space code? - -The user mode registers are stored in the RSE area of the MCA/INIT on -entry to the OS and are restored from there on return to SAL, so user -mode registers are preserved across a recoverable MCA/INIT. Since the -OS has no idea what unwind data is available for the user space stack, -MCA/INIT never tries to backtrace user space. Which means that the OS -does not bother making the user space process look like a blocked task, -i.e. the OS does not copy pt_regs and switch_stack to the user space -stack. Also the OS has no idea how big the user space RSE and memory -stacks are, which makes it too risky to copy the saved state to a user -mode stack. - ---- - -How do we get a backtrace on the tasks that were running when MCA/INIT -was delivered? - -mca.c:::ia64_mca_modify_original_stack(). That identifies and -verifies the original kernel stack, copies the dirty registers from -the MCA/INIT stack's RSE to the original stack's RSE, copies the -skeleton struct pt_regs and switch_stack to the original stack, fills -in the skeleton structures from the PAL minstate area and updates the -original stack's thread.ksp. That makes the original stack look -exactly like any other blocked task, i.e. it now appears to be -sleeping. To get a backtrace, just start with thread.ksp for the -original task and unwind like any other sleeping task. - ---- - -How do we identify the tasks that were running when MCA/INIT was -delivered? - -If the previous task has been verified and converted to a blocked -state, then sos->prev_task on the MCA/INIT stack is updated to point to -the previous task. You can look at that field in dumps or debuggers. -To help distinguish between the handler and the original tasks, -handlers have _TIF_MCA_INIT set in thread_info.flags. - -The sos data is always in the MCA/INIT handler stack, at offset -MCA_SOS_OFFSET. You can get that value from mca_asm.h or calculate it -as KERNEL_STACK_SIZE - sizeof(struct pt_regs) - sizeof(struct -ia64_sal_os_state), with 16 byte alignment for all structures. - -Also the comm field of the MCA/INIT task is modified to include the pid -of the original task, for humans to use. For example, a comm field of -'MCA 12159' means that pid 12159 was running when the MCA was -delivered. diff --git a/Documentation/arch/ia64/serial.rst b/Documentation/arch/ia64/serial.rst deleted file mode 100644 index 1de70c305a..0000000000 --- a/Documentation/arch/ia64/serial.rst +++ /dev/null @@ -1,165 +0,0 @@ -============== -Serial Devices -============== - -Serial Device Naming -==================== - - As of 2.6.10, serial devices on ia64 are named based on the - order of ACPI and PCI enumeration. The first device in the - ACPI namespace (if any) becomes /dev/ttyS0, the second becomes - /dev/ttyS1, etc., and PCI devices are named sequentially - starting after the ACPI devices. - - Prior to 2.6.10, there were confusing exceptions to this: - - - Firmware on some machines (mostly from HP) provides an HCDP - table[1] that tells the kernel about devices that can be used - as a serial console. If the user specified "console=ttyS0" - or the EFI ConOut path contained only UART devices, the - kernel registered the device described by the HCDP as - /dev/ttyS0. - - - If there was no HCDP, we assumed there were UARTs at the - legacy COM port addresses (I/O ports 0x3f8 and 0x2f8), so - the kernel registered those as /dev/ttyS0 and /dev/ttyS1. - - Any additional ACPI or PCI devices were registered sequentially - after /dev/ttyS0 as they were discovered. - - With an HCDP, device names changed depending on EFI configuration - and "console=" arguments. Without an HCDP, device names didn't - change, but we registered devices that might not really exist. - - For example, an HP rx1600 with a single built-in serial port - (described in the ACPI namespace) plus an MP[2] (a PCI device) has - these ports: - - ========== ========== ============ ============ ======= - Type MMIO pre-2.6.10 pre-2.6.10 2.6.10+ - address - (EFI console (EFI console - on builtin) on MP port) - ========== ========== ============ ============ ======= - builtin 0xff5e0000 ttyS0 ttyS1 ttyS0 - MP UPS 0xf8031000 ttyS1 ttyS2 ttyS1 - MP Console 0xf8030000 ttyS2 ttyS0 ttyS2 - MP 2 0xf8030010 ttyS3 ttyS3 ttyS3 - MP 3 0xf8030038 ttyS4 ttyS4 ttyS4 - ========== ========== ============ ============ ======= - -Console Selection -================= - - EFI knows what your console devices are, but it doesn't tell the - kernel quite enough to actually locate them. The DIG64 HCDP - table[1] does tell the kernel where potential serial console - devices are, but not all firmware supplies it. Also, EFI supports - multiple simultaneous consoles and doesn't tell the kernel which - should be the "primary" one. - - So how do you tell Linux which console device to use? - - - If your firmware supplies the HCDP, it is simplest to - configure EFI with a single device (either a UART or a VGA - card) as the console. Then you don't need to tell Linux - anything; the kernel will automatically use the EFI console. - - (This works only in 2.6.6 or later; prior to that you had - to specify "console=ttyS0" to get a serial console.) - - - Without an HCDP, Linux defaults to a VGA console unless you - specify a "console=" argument. - - NOTE: Don't assume that a serial console device will be /dev/ttyS0. - It might be ttyS1, ttyS2, etc. Make sure you have the appropriate - entries in /etc/inittab (for getty) and /etc/securetty (to allow - root login). - -Early Serial Console -==================== - - The kernel can't start using a serial console until it knows where - the device lives. Normally this happens when the driver enumerates - all the serial devices, which can happen a minute or more after the - kernel starts booting. - - 2.6.10 and later kernels have an "early uart" driver that works - very early in the boot process. The kernel will automatically use - this if the user supplies an argument like "console=uart,io,0x3f8", - or if the EFI console path contains only a UART device and the - firmware supplies an HCDP. - -Troubleshooting Serial Console Problems -======================================= - - No kernel output after elilo prints "Uncompressing Linux... done": - - - You specified "console=ttyS0" but Linux changed the device - to which ttyS0 refers. Configure exactly one EFI console - device[3] and remove the "console=" option. - - - The EFI console path contains both a VGA device and a UART. - EFI and elilo use both, but Linux defaults to VGA. Remove - the VGA device from the EFI console path[3]. - - - Multiple UARTs selected as EFI console devices. EFI and - elilo use all selected devices, but Linux uses only one. - Make sure only one UART is selected in the EFI console - path[3]. - - - You're connected to an HP MP port[2] but have a non-MP UART - selected as EFI console device. EFI uses the MP as a - console device even when it isn't explicitly selected. - Either move the console cable to the non-MP UART, or change - the EFI console path[3] to the MP UART. - - Long pause (60+ seconds) between "Uncompressing Linux... done" and - start of kernel output: - - - No early console because you used "console=ttyS". Remove - the "console=" option if your firmware supplies an HCDP. - - - If you don't have an HCDP, the kernel doesn't know where - your console lives until the driver discovers serial - devices. Use "console=uart,io,0x3f8" (or appropriate - address for your machine). - - Kernel and init script output works fine, but no "login:" prompt: - - - Add getty entry to /etc/inittab for console tty. Look for - the "Adding console on ttyS" message that tells you which - device is the console. - - "login:" prompt, but can't login as root: - - - Add entry to /etc/securetty for console tty. - - No ACPI serial devices found in 2.6.17 or later: - - - Turn on CONFIG_PNP and CONFIG_PNPACPI. Prior to 2.6.17, ACPI - serial devices were discovered by 8250_acpi. In 2.6.17, - 8250_acpi was replaced by the combination of 8250_pnp and - CONFIG_PNPACPI. - - - -[1] - http://www.dig64.org/specifications/agreement - The table was originally defined as the "HCDP" for "Headless - Console/Debug Port." The current version is the "PCDP" for - "Primary Console and Debug Port Devices." - -[2] - The HP MP (management processor) is a PCI device that provides - several UARTs. One of the UARTs is often used as a console; the - EFI Boot Manager identifies it as "Acpi(HWP0002,700)/Pci(...)/Uart". - The external connection is usually a 25-pin connector, and a - special dongle converts that to three 9-pin connectors, one of - which is labelled "Console." - -[3] - EFI console devices are configured using the EFI Boot Manager - "Boot option maintenance" menu. You may have to interrupt the - boot sequence to use this menu, and you will have to reset the - box after changing console configuration. diff --git a/Documentation/arch/index.rst b/Documentation/arch/index.rst index 84b80255b8..3f9962e45c 100644 --- a/Documentation/arch/index.rst +++ b/Documentation/arch/index.rst @@ -12,15 +12,14 @@ implementation. arc/index arm/index arm64/index - ia64/index loongarch/index m68k/index mips/index nios2/index openrisc/index parisc/index - ../powerpc/index - ../riscv/index + powerpc/index + riscv/index s390/index sh/index sparc/index diff --git a/Documentation/arch/loongarch/introduction.rst b/Documentation/arch/loongarch/introduction.rst index 8c568cfc21..5e6db78abe 100644 --- a/Documentation/arch/loongarch/introduction.rst +++ b/Documentation/arch/loongarch/introduction.rst @@ -375,9 +375,9 @@ Developer web site of Loongson and LoongArch (Software and Documentation): Documentation of LoongArch ISA: - https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.02-CN.pdf (in Chinese) + https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.10-CN.pdf (in Chinese) - https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.02-EN.pdf (in English) + https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.10-EN.pdf (in English) Documentation of LoongArch ELF psABI: diff --git a/Documentation/arch/powerpc/associativity.rst b/Documentation/arch/powerpc/associativity.rst new file mode 100644 index 0000000000..4d01c73685 --- /dev/null +++ b/Documentation/arch/powerpc/associativity.rst @@ -0,0 +1,105 @@ +============================ +NUMA resource associativity +============================ + +Associativity represents the groupings of the various platform resources into +domains of substantially similar mean performance relative to resources outside +of that domain. Resources subsets of a given domain that exhibit better +performance relative to each other than relative to other resources subsets +are represented as being members of a sub-grouping domain. This performance +characteristic is presented in terms of NUMA node distance within the Linux kernel. +From the platform view, these groups are also referred to as domains. + +PAPR interface currently supports different ways of communicating these resource +grouping details to the OS. These are referred to as Form 0, Form 1 and Form2 +associativity grouping. Form 0 is the oldest format and is now considered deprecated. + +Hypervisor indicates the type/form of associativity used via "ibm,architecture-vec-5 property". +Bit 0 of byte 5 in the "ibm,architecture-vec-5" property indicates usage of Form 0 or Form 1. +A value of 1 indicates the usage of Form 1 associativity. For Form 2 associativity +bit 2 of byte 5 in the "ibm,architecture-vec-5" property is used. + +Form 0 +------ +Form 0 associativity supports only two NUMA distances (LOCAL and REMOTE). + +Form 1 +------ +With Form 1 a combination of ibm,associativity-reference-points, and ibm,associativity +device tree properties are used to determine the NUMA distance between resource groups/domains. + +The “ibm,associativity” property contains a list of one or more numbers (domainID) +representing the resource’s platform grouping domains. + +The “ibm,associativity-reference-points” property contains a list of one or more numbers +(domainID index) that represents the 1 based ordinal in the associativity lists. +The list of domainID indexes represents an increasing hierarchy of resource grouping. + +ex: +{ primary domainID index, secondary domainID index, tertiary domainID index.. } + +Linux kernel uses the domainID at the primary domainID index as the NUMA node id. +Linux kernel computes NUMA distance between two domains by recursively comparing +if they belong to the same higher-level domains. For mismatch at every higher +level of the resource group, the kernel doubles the NUMA distance between the +comparing domains. + +Form 2 +------- +Form 2 associativity format adds separate device tree properties representing NUMA node distance +thereby making the node distance computation flexible. Form 2 also allows flexible primary +domain numbering. With numa distance computation now detached from the index value in +"ibm,associativity-reference-points" property, Form 2 allows a large number of primary domain +ids at the same domainID index representing resource groups of different performance/latency +characteristics. + +Hypervisor indicates the usage of FORM2 associativity using bit 2 of byte 5 in the +"ibm,architecture-vec-5" property. + +"ibm,numa-lookup-index-table" property contains a list of one or more numbers representing +the domainIDs present in the system. The offset of the domainID in this property is +used as an index while computing numa distance information via "ibm,numa-distance-table". + +prop-encoded-array: The number N of the domainIDs encoded as with encode-int, followed by +N domainID encoded as with encode-int + +For ex: +"ibm,numa-lookup-index-table" = {4, 0, 8, 250, 252}. The offset of domainID 8 (2) is used when +computing the distance of domain 8 from other domains present in the system. For the rest of +this document, this offset will be referred to as domain distance offset. + +"ibm,numa-distance-table" property contains a list of one or more numbers representing the NUMA +distance between resource groups/domains present in the system. + +prop-encoded-array: The number N of the distance values encoded as with encode-int, followed by +N distance values encoded as with encode-bytes. The max distance value we could encode is 255. +The number N must be equal to the square of m where m is the number of domainIDs in the +numa-lookup-index-table. + +For ex: +ibm,numa-lookup-index-table = <3 0 8 40>; +ibm,numa-distace-table = <9>, /bits/ 8 < 10 20 80 20 10 160 80 160 10>; + +:: + + | 0 8 40 + --|------------ + | + 0 | 10 20 80 + | + 8 | 20 10 160 + | + 40| 80 160 10 + +A possible "ibm,associativity" property for resources in node 0, 8 and 40 + +{ 3, 6, 7, 0 } +{ 3, 6, 9, 8 } +{ 3, 6, 7, 40} + +With "ibm,associativity-reference-points" { 0x3 } + +"ibm,lookup-index-table" helps in having a compact representation of distance matrix. +Since domainID can be sparse, the matrix of distances can also be effectively sparse. +With "ibm,lookup-index-table" we can achieve a compact representation of +distance information. diff --git a/Documentation/arch/powerpc/booting.rst b/Documentation/arch/powerpc/booting.rst new file mode 100644 index 0000000000..11aa440f98 --- /dev/null +++ b/Documentation/arch/powerpc/booting.rst @@ -0,0 +1,110 @@ +.. SPDX-License-Identifier: GPL-2.0 + +DeviceTree Booting +------------------ + +During the development of the Linux/ppc64 kernel, and more specifically, the +addition of new platform types outside of the old IBM pSeries/iSeries pair, it +was decided to enforce some strict rules regarding the kernel entry and +bootloader <-> kernel interfaces, in order to avoid the degeneration that had +become the ppc32 kernel entry point and the way a new platform should be added +to the kernel. The legacy iSeries platform breaks those rules as it predates +this scheme, but no new board support will be accepted in the main tree that +doesn't follow them properly. In addition, since the advent of the arch/powerpc +merged architecture for ppc32 and ppc64, new 32-bit platforms and 32-bit +platforms which move into arch/powerpc will be required to use these rules as +well. + +The main requirement that will be defined in more detail below is the presence +of a device-tree whose format is defined after Open Firmware specification. +However, in order to make life easier to embedded board vendors, the kernel +doesn't require the device-tree to represent every device in the system and only +requires some nodes and properties to be present. For example, the kernel does +not require you to create a node for every PCI device in the system. It is a +requirement to have a node for PCI host bridges in order to provide interrupt +routing information and memory/IO ranges, among others. It is also recommended +to define nodes for on chip devices and other buses that don't specifically fit +in an existing OF specification. This creates a great flexibility in the way the +kernel can then probe those and match drivers to device, without having to hard +code all sorts of tables. It also makes it more flexible for board vendors to do +minor hardware upgrades without significantly impacting the kernel code or +cluttering it with special cases. + + +Entry point +~~~~~~~~~~~ + +There is one single entry point to the kernel, at the start +of the kernel image. That entry point supports two calling +conventions: + + a) Boot from Open Firmware. If your firmware is compatible + with Open Firmware (IEEE 1275) or provides an OF compatible + client interface API (support for "interpret" callback of + forth words isn't required), you can enter the kernel with: + + r5 : OF callback pointer as defined by IEEE 1275 + bindings to powerpc. Only the 32-bit client interface + is currently supported + + r3, r4 : address & length of an initrd if any or 0 + + The MMU is either on or off; the kernel will run the + trampoline located in arch/powerpc/kernel/prom_init.c to + extract the device-tree and other information from open + firmware and build a flattened device-tree as described + in b). prom_init() will then re-enter the kernel using + the second method. This trampoline code runs in the + context of the firmware, which is supposed to handle all + exceptions during that time. + + b) Direct entry with a flattened device-tree block. This entry + point is called by a) after the OF trampoline and can also be + called directly by a bootloader that does not support the Open + Firmware client interface. It is also used by "kexec" to + implement "hot" booting of a new kernel from a previous + running one. This method is what I will describe in more + details in this document, as method a) is simply standard Open + Firmware, and thus should be implemented according to the + various standard documents defining it and its binding to the + PowerPC platform. The entry point definition then becomes: + + r3 : physical pointer to the device-tree block + (defined in chapter II) in RAM + + r4 : physical pointer to the kernel itself. This is + used by the assembly code to properly disable the MMU + in case you are entering the kernel with MMU enabled + and a non-1:1 mapping. + + r5 : NULL (as to differentiate with method a) + +Note about SMP entry: Either your firmware puts your other +CPUs in some sleep loop or spin loop in ROM where you can get +them out via a soft reset or some other means, in which case +you don't need to care, or you'll have to enter the kernel +with all CPUs. The way to do that with method b) will be +described in a later revision of this document. + +Board supports (platforms) are not exclusive config options. An +arbitrary set of board supports can be built in a single kernel +image. The kernel will "know" what set of functions to use for a +given platform based on the content of the device-tree. Thus, you +should: + + a) add your platform support as a _boolean_ option in + arch/powerpc/Kconfig, following the example of PPC_PSERIES, + PPC_PMAC and PPC_MAPLE. The latter is probably a good + example of a board support to start from. + + b) create your main platform file as + "arch/powerpc/platforms/myplatform/myboard_setup.c" and add it + to the Makefile under the condition of your ``CONFIG_`` + option. This file will define a structure of type "ppc_md" + containing the various callbacks that the generic code will + use to get to your platform specific code + +A kernel image may support multiple platforms, but only if the +platforms feature the same core architecture. A single kernel build +cannot support both configurations with Book E and configurations +with classic Powerpc architectures. diff --git a/Documentation/arch/powerpc/bootwrapper.rst b/Documentation/arch/powerpc/bootwrapper.rst new file mode 100644 index 0000000000..cdfa2bc842 --- /dev/null +++ b/Documentation/arch/powerpc/bootwrapper.rst @@ -0,0 +1,131 @@ +======================== +The PowerPC boot wrapper +======================== + +Copyright (C) Secret Lab Technologies Ltd. + +PowerPC image targets compresses and wraps the kernel image (vmlinux) with +a boot wrapper to make it usable by the system firmware. There is no +standard PowerPC firmware interface, so the boot wrapper is designed to +be adaptable for each kind of image that needs to be built. + +The boot wrapper can be found in the arch/powerpc/boot/ directory. The +Makefile in that directory has targets for all the available image types. +The different image types are used to support all of the various firmware +interfaces found on PowerPC platforms. OpenFirmware is the most commonly +used firmware type on general purpose PowerPC systems from Apple, IBM and +others. U-Boot is typically found on embedded PowerPC hardware, but there +are a handful of other firmware implementations which are also popular. Each +firmware interface requires a different image format. + +The boot wrapper is built from the makefile in arch/powerpc/boot/Makefile and +it uses the wrapper script (arch/powerpc/boot/wrapper) to generate target +image. The details of the build system is discussed in the next section. +Currently, the following image format targets exist: + + ==================== ======================================================== + cuImage.%: Backwards compatible uImage for older version of + U-Boot (for versions that don't understand the device + tree). This image embeds a device tree blob inside + the image. The boot wrapper, kernel and device tree + are all embedded inside the U-Boot uImage file format + with boot wrapper code that extracts data from the old + bd_info structure and loads the data into the device + tree before jumping into the kernel. + + Because of the series of #ifdefs found in the + bd_info structure used in the old U-Boot interfaces, + cuImages are platform specific. Each specific + U-Boot platform has a different platform init file + which populates the embedded device tree with data + from the platform specific bd_info file. The platform + specific cuImage platform init code can be found in + `arch/powerpc/boot/cuboot.*.c`. Selection of the correct + cuImage init code for a specific board can be found in + the wrapper structure. + + dtbImage.%: Similar to zImage, except device tree blob is embedded + inside the image instead of provided by firmware. The + output image file can be either an elf file or a flat + binary depending on the platform. + + dtbImages are used on systems which do not have an + interface for passing a device tree directly. + dtbImages are similar to simpleImages except that + dtbImages have platform specific code for extracting + data from the board firmware, but simpleImages do not + talk to the firmware at all. + + PlayStation 3 support uses dtbImage. So do Embedded + Planet boards using the PlanetCore firmware. Board + specific initialization code is typically found in a + file named arch/powerpc/boot/.c; but this + can be overridden by the wrapper script. + + simpleImage.%: Firmware independent compressed image that does not + depend on any particular firmware interface and embeds + a device tree blob. This image is a flat binary that + can be loaded to any location in RAM and jumped to. + Firmware cannot pass any configuration data to the + kernel with this image type and it depends entirely on + the embedded device tree for all information. + + treeImage.%; Image format for used with OpenBIOS firmware found + on some ppc4xx hardware. This image embeds a device + tree blob inside the image. + + uImage: Native image format used by U-Boot. The uImage target + does not add any boot code. It just wraps a compressed + vmlinux in the uImage data structure. This image + requires a version of U-Boot that is able to pass + a device tree to the kernel at boot. If using an older + version of U-Boot, then you need to use a cuImage + instead. + + zImage.%: Image format which does not embed a device tree. + Used by OpenFirmware and other firmware interfaces + which are able to supply a device tree. This image + expects firmware to provide the device tree at boot. + Typically, if you have general purpose PowerPC + hardware then you want this image format. + ==================== ======================================================== + +Image types which embed a device tree blob (simpleImage, dtbImage, treeImage, +and cuImage) all generate the device tree blob from a file in the +arch/powerpc/boot/dts/ directory. The Makefile selects the correct device +tree source based on the name of the target. Therefore, if the kernel is +built with 'make treeImage.walnut', then the build system will use +arch/powerpc/boot/dts/walnut.dts to build treeImage.walnut. + +Two special targets called 'zImage' and 'zImage.initrd' also exist. These +targets build all the default images as selected by the kernel configuration. +Default images are selected by the boot wrapper Makefile +(arch/powerpc/boot/Makefile) by adding targets to the $image-y variable. Look +at the Makefile to see which default image targets are available. + +How it is built +--------------- +arch/powerpc is designed to support multiplatform kernels, which means +that a single vmlinux image can be booted on many different target boards. +It also means that the boot wrapper must be able to wrap for many kinds of +images on a single build. The design decision was made to not use any +conditional compilation code (#ifdef, etc) in the boot wrapper source code. +All of the boot wrapper pieces are buildable at any time regardless of the +kernel configuration. Building all the wrapper bits on every kernel build +also ensures that obscure parts of the wrapper are at the very least compile +tested in a large variety of environments. + +The wrapper is adapted for different image types at link time by linking in +just the wrapper bits that are appropriate for the image type. The 'wrapper +script' (found in arch/powerpc/boot/wrapper) is called by the Makefile and +is responsible for selecting the correct wrapper bits for the image type. +The arguments are well documented in the script's comment block, so they +are not repeated here. However, it is worth mentioning that the script +uses the -p (platform) argument as the main method of deciding which wrapper +bits to compile in. Look for the large 'case "$platform" in' block in the +middle of the script. This is also the place where platform specific fixups +can be selected by changing the link order. + +In particular, care should be taken when working with cuImages. cuImage +wrapper bits are very board specific and care should be taken to make sure +the target you are trying to build is supported by the wrapper bits. diff --git a/Documentation/arch/powerpc/cpu_families.rst b/Documentation/arch/powerpc/cpu_families.rst new file mode 100644 index 0000000000..eb7e60649b --- /dev/null +++ b/Documentation/arch/powerpc/cpu_families.rst @@ -0,0 +1,237 @@ +============ +CPU Families +============ + +This document tries to summarise some of the different cpu families that exist +and are supported by arch/powerpc. + + +Book3S (aka sPAPR) +------------------ + +- Hash MMU (except 603 and e300) +- Radix MMU (POWER9 and later) +- Software loaded TLB (603 and e300) +- Selectable Software loaded TLB in addition to hash MMU (755, 7450, e600) +- Mix of 32 & 64 bit:: + + +--------------+ +----------------+ + | Old POWER | --------------> | RS64 (threads) | + +--------------+ +----------------+ + | + | + v + +--------------+ +----------------+ +------+ + | 601 | --------------> | 603 | ---> | e300 | + +--------------+ +----------------+ +------+ + | | + | | + v v + +--------------+ +-----+ +----------------+ +-------+ + | 604 | | 755 | <--- | 750 (G3) | ---> | 750CX | + +--------------+ +-----+ +----------------+ +-------+ + | | | + | | | + v v v + +--------------+ +----------------+ +-------+ + | 620 (64 bit) | | 7400 | | 750CL | + +--------------+ +----------------+ +-------+ + | | | + | | | + v v v + +--------------+ +----------------+ +-------+ + | POWER3/630 | | 7410 | | 750FX | + +--------------+ +----------------+ +-------+ + | | + | | + v v + +--------------+ +----------------+ + | POWER3+ | | 7450 | + +--------------+ +----------------+ + | | + | | + v v + +--------------+ +----------------+ + | POWER4 | | 7455 | + +--------------+ +----------------+ + | | + | | + v v + +--------------+ +-------+ +----------------+ + | POWER4+ | --> | 970 | | 7447 | + +--------------+ +-------+ +----------------+ + | | | + | | | + v v v + +--------------+ +-------+ +----------------+ + | POWER5 | | 970FX | | 7448 | + +--------------+ +-------+ +----------------+ + | | | + | | | + v v v + +--------------+ +-------+ +----------------+ + | POWER5+ | | 970MP | | e600 | + +--------------+ +-------+ +----------------+ + | + | + v + +--------------+ + | POWER5++ | + +--------------+ + | + | + v + +--------------+ +-------+ + | POWER6 | <-?-> | Cell | + +--------------+ +-------+ + | + | + v + +--------------+ + | POWER7 | + +--------------+ + | + | + v + +--------------+ + | POWER7+ | + +--------------+ + | + | + v + +--------------+ + | POWER8 | + +--------------+ + | + | + v + +--------------+ + | POWER9 | + +--------------+ + | + | + v + +--------------+ + | POWER10 | + +--------------+ + + + +---------------+ + | PA6T (64 bit) | + +---------------+ + + +IBM BookE +--------- + +- Software loaded TLB. +- All 32 bit:: + + +--------------+ + | 401 | + +--------------+ + | + | + v + +--------------+ + | 403 | + +--------------+ + | + | + v + +--------------+ + | 405 | + +--------------+ + | + | + v + +--------------+ + | 440 | + +--------------+ + | + | + v + +--------------+ +----------------+ + | 450 | --> | BG/P | + +--------------+ +----------------+ + | + | + v + +--------------+ + | 460 | + +--------------+ + | + | + v + +--------------+ + | 476 | + +--------------+ + + +Motorola/Freescale 8xx +---------------------- + +- Software loaded with hardware assist. +- All 32 bit:: + + +-------------+ + | MPC8xx Core | + +-------------+ + + +Freescale BookE +--------------- + +- Software loaded TLB. +- e6500 adds HW loaded indirect TLB entries. +- Mix of 32 & 64 bit:: + + +--------------+ + | e200 | + +--------------+ + + + +--------------------------------+ + | e500 | + +--------------------------------+ + | + | + v + +--------------------------------+ + | e500v2 | + +--------------------------------+ + | + | + v + +--------------------------------+ + | e500mc (Book3e) | + +--------------------------------+ + | + | + v + +--------------------------------+ + | e5500 (64 bit) | + +--------------------------------+ + | + | + v + +--------------------------------+ + | e6500 (HW TLB) (Multithreaded) | + +--------------------------------+ + + +IBM A2 core +----------- + +- Book3E, software loaded TLB + HW loaded indirect TLB entries. +- 64 bit:: + + +--------------+ +----------------+ + | A2 core | --> | WSP | + +--------------+ +----------------+ + | + | + v + +--------------+ + | BG/Q | + +--------------+ diff --git a/Documentation/arch/powerpc/cpu_features.rst b/Documentation/arch/powerpc/cpu_features.rst new file mode 100644 index 0000000000..b7bcdd2f41 --- /dev/null +++ b/Documentation/arch/powerpc/cpu_features.rst @@ -0,0 +1,60 @@ +============ +CPU Features +============ + +Hollis Blanchard +5 Jun 2002 + +This document describes the system (including self-modifying code) used in the +PPC Linux kernel to support a variety of PowerPC CPUs without requiring +compile-time selection. + +Early in the boot process the ppc32 kernel detects the current CPU type and +chooses a set of features accordingly. Some examples include Altivec support, +split instruction and data caches, and if the CPU supports the DOZE and NAP +sleep modes. + +Detection of the feature set is simple. A list of processors can be found in +arch/powerpc/kernel/cputable.c. The PVR register is masked and compared with +each value in the list. If a match is found, the cpu_features of cur_cpu_spec +is assigned to the feature bitmask for this processor and a __setup_cpu +function is called. + +C code may test 'cur_cpu_spec[smp_processor_id()]->cpu_features' for a +particular feature bit. This is done in quite a few places, for example +in ppc_setup_l2cr(). + +Implementing cpufeatures in assembly is a little more involved. There are +several paths that are performance-critical and would suffer if an array +index, structure dereference, and conditional branch were added. To avoid the +performance penalty but still allow for runtime (rather than compile-time) CPU +selection, unused code is replaced by 'nop' instructions. This nop'ing is +based on CPU 0's capabilities, so a multi-processor system with non-identical +processors will not work (but such a system would likely have other problems +anyways). + +After detecting the processor type, the kernel patches out sections of code +that shouldn't be used by writing nop's over it. Using cpufeatures requires +just 2 macros (found in arch/powerpc/include/asm/cputable.h), as seen in head.S +transfer_to_handler:: + + #ifdef CONFIG_ALTIVEC + BEGIN_FTR_SECTION + mfspr r22,SPRN_VRSAVE /* if G4, save vrsave register value */ + stw r22,THREAD_VRSAVE(r23) + END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + #endif /* CONFIG_ALTIVEC */ + +If CPU 0 supports Altivec, the code is left untouched. If it doesn't, both +instructions are replaced with nop's. + +The END_FTR_SECTION macro has two simpler variations: END_FTR_SECTION_IFSET +and END_FTR_SECTION_IFCLR. These simply test if a flag is set (in +cur_cpu_spec[0]->cpu_features) or is cleared, respectively. These two macros +should be used in the majority of cases. + +The END_FTR_SECTION macros are implemented by storing information about this +code in the '__ftr_fixup' ELF section. When do_cpu_ftr_fixups +(arch/powerpc/kernel/misc.S) is invoked, it will iterate over the records in +__ftr_fixup, and if the required feature is not present it will loop writing +nop's from each BEGIN_FTR_SECTION to END_FTR_SECTION. diff --git a/Documentation/arch/powerpc/cxl.rst b/Documentation/arch/powerpc/cxl.rst new file mode 100644 index 0000000000..d2d7705761 --- /dev/null +++ b/Documentation/arch/powerpc/cxl.rst @@ -0,0 +1,469 @@ +==================================== +Coherent Accelerator Interface (CXL) +==================================== + +Introduction +============ + + The coherent accelerator interface is designed to allow the + coherent connection of accelerators (FPGAs and other devices) to a + POWER system. These devices need to adhere to the Coherent + Accelerator Interface Architecture (CAIA). + + IBM refers to this as the Coherent Accelerator Processor Interface + or CAPI. In the kernel it's referred to by the name CXL to avoid + confusion with the ISDN CAPI subsystem. + + Coherent in this context means that the accelerator and CPUs can + both access system memory directly and with the same effective + addresses. + + +Hardware overview +================= + + :: + + POWER8/9 FPGA + +----------+ +---------+ + | | | | + | CPU | | AFU | + | | | | + | | | | + | | | | + +----------+ +---------+ + | PHB | | | + | +------+ | PSL | + | | CAPP |<------>| | + +---+------+ PCIE +---------+ + + The POWER8/9 chip has a Coherently Attached Processor Proxy (CAPP) + unit which is part of the PCIe Host Bridge (PHB). This is managed + by Linux by calls into OPAL. Linux doesn't directly program the + CAPP. + + The FPGA (or coherently attached device) consists of two parts. + The POWER Service Layer (PSL) and the Accelerator Function Unit + (AFU). The AFU is used to implement specific functionality behind + the PSL. The PSL, among other things, provides memory address + translation services to allow each AFU direct access to userspace + memory. + + The AFU is the core part of the accelerator (eg. the compression, + crypto etc function). The kernel has no knowledge of the function + of the AFU. Only userspace interacts directly with the AFU. + + The PSL provides the translation and interrupt services that the + AFU needs. This is what the kernel interacts with. For example, if + the AFU needs to read a particular effective address, it sends + that address to the PSL, the PSL then translates it, fetches the + data from memory and returns it to the AFU. If the PSL has a + translation miss, it interrupts the kernel and the kernel services + the fault. The context to which this fault is serviced is based on + who owns that acceleration function. + + - POWER8 and PSL Version 8 are compliant to the CAIA Version 1.0. + - POWER9 and PSL Version 9 are compliant to the CAIA Version 2.0. + + This PSL Version 9 provides new features such as: + + * Interaction with the nest MMU on the P9 chip. + * Native DMA support. + * Supports sending ASB_Notify messages for host thread wakeup. + * Supports Atomic operations. + * etc. + + Cards with a PSL9 won't work on a POWER8 system and cards with a + PSL8 won't work on a POWER9 system. + +AFU Modes +========= + + There are two programming modes supported by the AFU. Dedicated + and AFU directed. AFU may support one or both modes. + + When using dedicated mode only one MMU context is supported. In + this mode, only one userspace process can use the accelerator at + time. + + When using AFU directed mode, up to 16K simultaneous contexts can + be supported. This means up to 16K simultaneous userspace + applications may use the accelerator (although specific AFUs may + support fewer). In this mode, the AFU sends a 16 bit context ID + with each of its requests. This tells the PSL which context is + associated with each operation. If the PSL can't translate an + operation, the ID can also be accessed by the kernel so it can + determine the userspace context associated with an operation. + + +MMIO space +========== + + A portion of the accelerator MMIO space can be directly mapped + from the AFU to userspace. Either the whole space can be mapped or + just a per context portion. The hardware is self describing, hence + the kernel can determine the offset and size of the per context + portion. + + +Interrupts +========== + + AFUs may generate interrupts that are destined for userspace. These + are received by the kernel as hardware interrupts and passed onto + userspace by a read syscall documented below. + + Data storage faults and error interrupts are handled by the kernel + driver. + + +Work Element Descriptor (WED) +============================= + + The WED is a 64-bit parameter passed to the AFU when a context is + started. Its format is up to the AFU hence the kernel has no + knowledge of what it represents. Typically it will be the + effective address of a work queue or status block where the AFU + and userspace can share control and status information. + + + + +User API +======== + +1. AFU character devices +^^^^^^^^^^^^^^^^^^^^^^^^ + + For AFUs operating in AFU directed mode, two character device + files will be created. /dev/cxl/afu0.0m will correspond to a + master context and /dev/cxl/afu0.0s will correspond to a slave + context. Master contexts have access to the full MMIO space an + AFU provides. Slave contexts have access to only the per process + MMIO space an AFU provides. + + For AFUs operating in dedicated process mode, the driver will + only create a single character device per AFU called + /dev/cxl/afu0.0d. This will have access to the entire MMIO space + that the AFU provides (like master contexts in AFU directed). + + The types described below are defined in include/uapi/misc/cxl.h + + The following file operations are supported on both slave and + master devices. + + A userspace library libcxl is available here: + + https://github.com/ibm-capi/libcxl + + This provides a C interface to this kernel API. + +open +---- + + Opens the device and allocates a file descriptor to be used with + the rest of the API. + + A dedicated mode AFU only has one context and only allows the + device to be opened once. + + An AFU directed mode AFU can have many contexts, the device can be + opened once for each context that is available. + + When all available contexts are allocated the open call will fail + and return -ENOSPC. + + Note: + IRQs need to be allocated for each context, which may limit + the number of contexts that can be created, and therefore + how many times the device can be opened. The POWER8 CAPP + supports 2040 IRQs and 3 are used by the kernel, so 2037 are + left. If 1 IRQ is needed per context, then only 2037 + contexts can be allocated. If 4 IRQs are needed per context, + then only 2037/4 = 509 contexts can be allocated. + + +ioctl +----- + + CXL_IOCTL_START_WORK: + Starts the AFU context and associates it with the current + process. Once this ioctl is successfully executed, all memory + mapped into this process is accessible to this AFU context + using the same effective addresses. No additional calls are + required to map/unmap memory. The AFU memory context will be + updated as userspace allocates and frees memory. This ioctl + returns once the AFU context is started. + + Takes a pointer to a struct cxl_ioctl_start_work + + :: + + struct cxl_ioctl_start_work { + __u64 flags; + __u64 work_element_descriptor; + __u64 amr; + __s16 num_interrupts; + __s16 reserved1; + __s32 reserved2; + __u64 reserved3; + __u64 reserved4; + __u64 reserved5; + __u64 reserved6; + }; + + flags: + Indicates which optional fields in the structure are + valid. + + work_element_descriptor: + The Work Element Descriptor (WED) is a 64-bit argument + defined by the AFU. Typically this is an effective + address pointing to an AFU specific structure + describing what work to perform. + + amr: + Authority Mask Register (AMR), same as the powerpc + AMR. This field is only used by the kernel when the + corresponding CXL_START_WORK_AMR value is specified in + flags. If not specified the kernel will use a default + value of 0. + + num_interrupts: + Number of userspace interrupts to request. This field + is only used by the kernel when the corresponding + CXL_START_WORK_NUM_IRQS value is specified in flags. + If not specified the minimum number required by the + AFU will be allocated. The min and max number can be + obtained from sysfs. + + reserved fields: + For ABI padding and future extensions + + CXL_IOCTL_GET_PROCESS_ELEMENT: + Get the current context id, also known as the process element. + The value is returned from the kernel as a __u32. + + +mmap +---- + + An AFU may have an MMIO space to facilitate communication with the + AFU. If it does, the MMIO space can be accessed via mmap. The size + and contents of this area are specific to the particular AFU. The + size can be discovered via sysfs. + + In AFU directed mode, master contexts are allowed to map all of + the MMIO space and slave contexts are allowed to only map the per + process MMIO space associated with the context. In dedicated + process mode the entire MMIO space can always be mapped. + + This mmap call must be done after the START_WORK ioctl. + + Care should be taken when accessing MMIO space. Only 32 and 64-bit + accesses are supported by POWER8. Also, the AFU will be designed + with a specific endianness, so all MMIO accesses should consider + endianness (recommend endian(3) variants like: le64toh(), + be64toh() etc). These endian issues equally apply to shared memory + queues the WED may describe. + + +read +---- + + Reads events from the AFU. Blocks if no events are pending + (unless O_NONBLOCK is supplied). Returns -EIO in the case of an + unrecoverable error or if the card is removed. + + read() will always return an integral number of events. + + The buffer passed to read() must be at least 4K bytes. + + The result of the read will be a buffer of one or more events, + each event is of type struct cxl_event, of varying size:: + + struct cxl_event { + struct cxl_event_header header; + union { + struct cxl_event_afu_interrupt irq; + struct cxl_event_data_storage fault; + struct cxl_event_afu_error afu_error; + }; + }; + + The struct cxl_event_header is defined as + + :: + + struct cxl_event_header { + __u16 type; + __u16 size; + __u16 process_element; + __u16 reserved1; + }; + + type: + This defines the type of event. The type determines how + the rest of the event is structured. These types are + described below and defined by enum cxl_event_type. + + size: + This is the size of the event in bytes including the + struct cxl_event_header. The start of the next event can + be found at this offset from the start of the current + event. + + process_element: + Context ID of the event. + + reserved field: + For future extensions and padding. + + If the event type is CXL_EVENT_AFU_INTERRUPT then the event + structure is defined as + + :: + + struct cxl_event_afu_interrupt { + __u16 flags; + __u16 irq; /* Raised AFU interrupt number */ + __u32 reserved1; + }; + + flags: + These flags indicate which optional fields are present + in this struct. Currently all fields are mandatory. + + irq: + The IRQ number sent by the AFU. + + reserved field: + For future extensions and padding. + + If the event type is CXL_EVENT_DATA_STORAGE then the event + structure is defined as + + :: + + struct cxl_event_data_storage { + __u16 flags; + __u16 reserved1; + __u32 reserved2; + __u64 addr; + __u64 dsisr; + __u64 reserved3; + }; + + flags: + These flags indicate which optional fields are present in + this struct. Currently all fields are mandatory. + + address: + The address that the AFU unsuccessfully attempted to + access. Valid accesses will be handled transparently by the + kernel but invalid accesses will generate this event. + + dsisr: + This field gives information on the type of fault. It is a + copy of the DSISR from the PSL hardware when the address + fault occurred. The form of the DSISR is as defined in the + CAIA. + + reserved fields: + For future extensions + + If the event type is CXL_EVENT_AFU_ERROR then the event structure + is defined as + + :: + + struct cxl_event_afu_error { + __u16 flags; + __u16 reserved1; + __u32 reserved2; + __u64 error; + }; + + flags: + These flags indicate which optional fields are present in + this struct. Currently all fields are Mandatory. + + error: + Error status from the AFU. Defined by the AFU. + + reserved fields: + For future extensions and padding + + +2. Card character device (powerVM guest only) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + In a powerVM guest, an extra character device is created for the + card. The device is only used to write (flash) a new image on the + FPGA accelerator. Once the image is written and verified, the + device tree is updated and the card is reset to reload the updated + image. + +open +---- + + Opens the device and allocates a file descriptor to be used with + the rest of the API. The device can only be opened once. + +ioctl +----- + +CXL_IOCTL_DOWNLOAD_IMAGE / CXL_IOCTL_VALIDATE_IMAGE: + Starts and controls flashing a new FPGA image. Partial + reconfiguration is not supported (yet), so the image must contain + a copy of the PSL and AFU(s). Since an image can be quite large, + the caller may have to iterate, splitting the image in smaller + chunks. + + Takes a pointer to a struct cxl_adapter_image:: + + struct cxl_adapter_image { + __u64 flags; + __u64 data; + __u64 len_data; + __u64 len_image; + __u64 reserved1; + __u64 reserved2; + __u64 reserved3; + __u64 reserved4; + }; + + flags: + These flags indicate which optional fields are present in + this struct. Currently all fields are mandatory. + + data: + Pointer to a buffer with part of the image to write to the + card. + + len_data: + Size of the buffer pointed to by data. + + len_image: + Full size of the image. + + +Sysfs Class +=========== + + A cxl sysfs class is added under /sys/class/cxl to facilitate + enumeration and tuning of the accelerators. Its layout is + described in Documentation/ABI/testing/sysfs-class-cxl + + +Udev rules +========== + + The following udev rules could be used to create a symlink to the + most logical chardev to use in any programming mode (afuX.Yd for + dedicated, afuX.Ys for afu directed), since the API is virtually + identical for each:: + + SUBSYSTEM=="cxl", ATTRS{mode}=="dedicated_process", SYMLINK="cxl/%b" + SUBSYSTEM=="cxl", ATTRS{mode}=="afu_directed", \ + KERNEL=="afu[0-9]*.[0-9]*s", SYMLINK="cxl/%b" diff --git a/Documentation/arch/powerpc/cxlflash.rst b/Documentation/arch/powerpc/cxlflash.rst new file mode 100644 index 0000000000..e8f488acfa --- /dev/null +++ b/Documentation/arch/powerpc/cxlflash.rst @@ -0,0 +1,433 @@ +================================ +Coherent Accelerator (CXL) Flash +================================ + +Introduction +============ + + The IBM Power architecture provides support for CAPI (Coherent + Accelerator Power Interface), which is available to certain PCIe slots + on Power 8 systems. CAPI can be thought of as a special tunneling + protocol through PCIe that allow PCIe adapters to look like special + purpose co-processors which can read or write an application's + memory and generate page faults. As a result, the host interface to + an adapter running in CAPI mode does not require the data buffers to + be mapped to the device's memory (IOMMU bypass) nor does it require + memory to be pinned. + + On Linux, Coherent Accelerator (CXL) kernel services present CAPI + devices as a PCI device by implementing a virtual PCI host bridge. + This abstraction simplifies the infrastructure and programming + model, allowing for drivers to look similar to other native PCI + device drivers. + + CXL provides a mechanism by which user space applications can + directly talk to a device (network or storage) bypassing the typical + kernel/device driver stack. The CXL Flash Adapter Driver enables a + user space application direct access to Flash storage. + + The CXL Flash Adapter Driver is a kernel module that sits in the + SCSI stack as a low level device driver (below the SCSI disk and + protocol drivers) for the IBM CXL Flash Adapter. This driver is + responsible for the initialization of the adapter, setting up the + special path for user space access, and performing error recovery. It + communicates directly the Flash Accelerator Functional Unit (AFU) + as described in Documentation/arch/powerpc/cxl.rst. + + The cxlflash driver supports two, mutually exclusive, modes of + operation at the device (LUN) level: + + - Any flash device (LUN) can be configured to be accessed as a + regular disk device (i.e.: /dev/sdc). This is the default mode. + + - Any flash device (LUN) can be configured to be accessed from + user space with a special block library. This mode further + specifies the means of accessing the device and provides for + either raw access to the entire LUN (referred to as direct + or physical LUN access) or access to a kernel/AFU-mediated + partition of the LUN (referred to as virtual LUN access). The + segmentation of a disk device into virtual LUNs is assisted + by special translation services provided by the Flash AFU. + +Overview +======== + + The Coherent Accelerator Interface Architecture (CAIA) introduces a + concept of a master context. A master typically has special privileges + granted to it by the kernel or hypervisor allowing it to perform AFU + wide management and control. The master may or may not be involved + directly in each user I/O, but at the minimum is involved in the + initial setup before the user application is allowed to send requests + directly to the AFU. + + The CXL Flash Adapter Driver establishes a master context with the + AFU. It uses memory mapped I/O (MMIO) for this control and setup. The + Adapter Problem Space Memory Map looks like this:: + + +-------------------------------+ + | 512 * 64 KB User MMIO | + | (per context) | + | User Accessible | + +-------------------------------+ + | 512 * 128 B per context | + | Provisioning and Control | + | Trusted Process accessible | + +-------------------------------+ + | 64 KB Global | + | Trusted Process accessible | + +-------------------------------+ + + This driver configures itself into the SCSI software stack as an + adapter driver. The driver is the only entity that is considered a + Trusted Process to program the Provisioning and Control and Global + areas in the MMIO Space shown above. The master context driver + discovers all LUNs attached to the CXL Flash adapter and instantiates + scsi block devices (/dev/sdb, /dev/sdc etc.) for each unique LUN + seen from each path. + + Once these scsi block devices are instantiated, an application + written to a specification provided by the block library may get + access to the Flash from user space (without requiring a system call). + + This master context driver also provides a series of ioctls for this + block library to enable this user space access. The driver supports + two modes for accessing the block device. + + The first mode is called a virtual mode. In this mode a single scsi + block device (/dev/sdb) may be carved up into any number of distinct + virtual LUNs. The virtual LUNs may be resized as long as the sum of + the sizes of all the virtual LUNs, along with the meta-data associated + with it does not exceed the physical capacity. + + The second mode is called the physical mode. In this mode a single + block device (/dev/sdb) may be opened directly by the block library + and the entire space for the LUN is available to the application. + + Only the physical mode provides persistence of the data. i.e. The + data written to the block device will survive application exit and + restart and also reboot. The virtual LUNs do not persist (i.e. do + not survive after the application terminates or the system reboots). + + +Block library API +================= + + Applications intending to get access to the CXL Flash from user + space should use the block library, as it abstracts the details of + interfacing directly with the cxlflash driver that are necessary for + performing administrative actions (i.e.: setup, tear down, resize). + The block library can be thought of as a 'user' of services, + implemented as IOCTLs, that are provided by the cxlflash driver + specifically for devices (LUNs) operating in user space access + mode. While it is not a requirement that applications understand + the interface between the block library and the cxlflash driver, + a high-level overview of each supported service (IOCTL) is provided + below. + + The block library can be found on GitHub: + http://github.com/open-power/capiflash + + +CXL Flash Driver LUN IOCTLs +=========================== + + Users, such as the block library, that wish to interface with a flash + device (LUN) via user space access need to use the services provided + by the cxlflash driver. As these services are implemented as ioctls, + a file descriptor handle must first be obtained in order to establish + the communication channel between a user and the kernel. This file + descriptor is obtained by opening the device special file associated + with the scsi disk device (/dev/sdb) that was created during LUN + discovery. As per the location of the cxlflash driver within the + SCSI protocol stack, this open is actually not seen by the cxlflash + driver. Upon successful open, the user receives a file descriptor + (herein referred to as fd1) that should be used for issuing the + subsequent ioctls listed below. + + The structure definitions for these IOCTLs are available in: + uapi/scsi/cxlflash_ioctl.h + +DK_CXLFLASH_ATTACH +------------------ + + This ioctl obtains, initializes, and starts a context using the CXL + kernel services. These services specify a context id (u16) by which + to uniquely identify the context and its allocated resources. The + services additionally provide a second file descriptor (herein + referred to as fd2) that is used by the block library to initiate + memory mapped I/O (via mmap()) to the CXL flash device and poll for + completion events. This file descriptor is intentionally installed by + this driver and not the CXL kernel services to allow for intermediary + notification and access in the event of a non-user-initiated close(), + such as a killed process. This design point is described in further + detail in the description for the DK_CXLFLASH_DETACH ioctl. + + There are a few important aspects regarding the "tokens" (context id + and fd2) that are provided back to the user: + + - These tokens are only valid for the process under which they + were created. The child of a forked process cannot continue + to use the context id or file descriptor created by its parent + (see DK_CXLFLASH_VLUN_CLONE for further details). + + - These tokens are only valid for the lifetime of the context and + the process under which they were created. Once either is + destroyed, the tokens are to be considered stale and subsequent + usage will result in errors. + + - A valid adapter file descriptor (fd2 >= 0) is only returned on + the initial attach for a context. Subsequent attaches to an + existing context (DK_CXLFLASH_ATTACH_REUSE_CONTEXT flag present) + do not provide the adapter file descriptor as it was previously + made known to the application. + + - When a context is no longer needed, the user shall detach from + the context via the DK_CXLFLASH_DETACH ioctl. When this ioctl + returns with a valid adapter file descriptor and the return flag + DK_CXLFLASH_APP_CLOSE_ADAP_FD is present, the application _must_ + close the adapter file descriptor following a successful detach. + + - When this ioctl returns with a valid fd2 and the return flag + DK_CXLFLASH_APP_CLOSE_ADAP_FD is present, the application _must_ + close fd2 in the following circumstances: + + + Following a successful detach of the last user of the context + + Following a successful recovery on the context's original fd2 + + In the child process of a fork(), following a clone ioctl, + on the fd2 associated with the source context + + - At any time, a close on fd2 will invalidate the tokens. Applications + should exercise caution to only close fd2 when appropriate (outlined + in the previous bullet) to avoid premature loss of I/O. + +DK_CXLFLASH_USER_DIRECT +----------------------- + This ioctl is responsible for transitioning the LUN to direct + (physical) mode access and configuring the AFU for direct access from + user space on a per-context basis. Additionally, the block size and + last logical block address (LBA) are returned to the user. + + As mentioned previously, when operating in user space access mode, + LUNs may be accessed in whole or in part. Only one mode is allowed + at a time and if one mode is active (outstanding references exist), + requests to use the LUN in a different mode are denied. + + The AFU is configured for direct access from user space by adding an + entry to the AFU's resource handle table. The index of the entry is + treated as a resource handle that is returned to the user. The user + is then able to use the handle to reference the LUN during I/O. + +DK_CXLFLASH_USER_VIRTUAL +------------------------ + This ioctl is responsible for transitioning the LUN to virtual mode + of access and configuring the AFU for virtual access from user space + on a per-context basis. Additionally, the block size and last logical + block address (LBA) are returned to the user. + + As mentioned previously, when operating in user space access mode, + LUNs may be accessed in whole or in part. Only one mode is allowed + at a time and if one mode is active (outstanding references exist), + requests to use the LUN in a different mode are denied. + + The AFU is configured for virtual access from user space by adding + an entry to the AFU's resource handle table. The index of the entry + is treated as a resource handle that is returned to the user. The + user is then able to use the handle to reference the LUN during I/O. + + By default, the virtual LUN is created with a size of 0. The user + would need to use the DK_CXLFLASH_VLUN_RESIZE ioctl to adjust the grow + the virtual LUN to a desired size. To avoid having to perform this + resize for the initial creation of the virtual LUN, the user has the + option of specifying a size as part of the DK_CXLFLASH_USER_VIRTUAL + ioctl, such that when success is returned to the user, the + resource handle that is provided is already referencing provisioned + storage. This is reflected by the last LBA being a non-zero value. + + When a LUN is accessible from more than one port, this ioctl will + return with the DK_CXLFLASH_ALL_PORTS_ACTIVE return flag set. This + provides the user with a hint that I/O can be retried in the event + of an I/O error as the LUN can be reached over multiple paths. + +DK_CXLFLASH_VLUN_RESIZE +----------------------- + This ioctl is responsible for resizing a previously created virtual + LUN and will fail if invoked upon a LUN that is not in virtual + mode. Upon success, an updated last LBA is returned to the user + indicating the new size of the virtual LUN associated with the + resource handle. + + The partitioning of virtual LUNs is jointly mediated by the cxlflash + driver and the AFU. An allocation table is kept for each LUN that is + operating in the virtual mode and used to program a LUN translation + table that the AFU references when provided with a resource handle. + + This ioctl can return -EAGAIN if an AFU sync operation takes too long. + In addition to returning a failure to user, cxlflash will also schedule + an asynchronous AFU reset. Should the user choose to retry the operation, + it is expected to succeed. If this ioctl fails with -EAGAIN, the user + can either retry the operation or treat it as a failure. + +DK_CXLFLASH_RELEASE +------------------- + This ioctl is responsible for releasing a previously obtained + reference to either a physical or virtual LUN. This can be + thought of as the inverse of the DK_CXLFLASH_USER_DIRECT or + DK_CXLFLASH_USER_VIRTUAL ioctls. Upon success, the resource handle + is no longer valid and the entry in the resource handle table is + made available to be used again. + + As part of the release process for virtual LUNs, the virtual LUN + is first resized to 0 to clear out and free the translation tables + associated with the virtual LUN reference. + +DK_CXLFLASH_DETACH +------------------ + This ioctl is responsible for unregistering a context with the + cxlflash driver and release outstanding resources that were + not explicitly released via the DK_CXLFLASH_RELEASE ioctl. Upon + success, all "tokens" which had been provided to the user from the + DK_CXLFLASH_ATTACH onward are no longer valid. + + When the DK_CXLFLASH_APP_CLOSE_ADAP_FD flag was returned on a successful + attach, the application _must_ close the fd2 associated with the context + following the detach of the final user of the context. + +DK_CXLFLASH_VLUN_CLONE +---------------------- + This ioctl is responsible for cloning a previously created + context to a more recently created context. It exists solely to + support maintaining user space access to storage after a process + forks. Upon success, the child process (which invoked the ioctl) + will have access to the same LUNs via the same resource handle(s) + as the parent, but under a different context. + + Context sharing across processes is not supported with CXL and + therefore each fork must be met with establishing a new context + for the child process. This ioctl simplifies the state management + and playback required by a user in such a scenario. When a process + forks, child process can clone the parents context by first creating + a context (via DK_CXLFLASH_ATTACH) and then using this ioctl to + perform the clone from the parent to the child. + + The clone itself is fairly simple. The resource handle and lun + translation tables are copied from the parent context to the child's + and then synced with the AFU. + + When the DK_CXLFLASH_APP_CLOSE_ADAP_FD flag was returned on a successful + attach, the application _must_ close the fd2 associated with the source + context (still resident/accessible in the parent process) following the + clone. This is to avoid a stale entry in the file descriptor table of the + child process. + + This ioctl can return -EAGAIN if an AFU sync operation takes too long. + In addition to returning a failure to user, cxlflash will also schedule + an asynchronous AFU reset. Should the user choose to retry the operation, + it is expected to succeed. If this ioctl fails with -EAGAIN, the user + can either retry the operation or treat it as a failure. + +DK_CXLFLASH_VERIFY +------------------ + This ioctl is used to detect various changes such as the capacity of + the disk changing, the number of LUNs visible changing, etc. In cases + where the changes affect the application (such as a LUN resize), the + cxlflash driver will report the changed state to the application. + + The user calls in when they want to validate that a LUN hasn't been + changed in response to a check condition. As the user is operating out + of band from the kernel, they will see these types of events without + the kernel's knowledge. When encountered, the user's architected + behavior is to call in to this ioctl, indicating what they want to + verify and passing along any appropriate information. For now, only + verifying a LUN change (ie: size different) with sense data is + supported. + +DK_CXLFLASH_RECOVER_AFU +----------------------- + This ioctl is used to drive recovery (if such an action is warranted) + of a specified user context. Any state associated with the user context + is re-established upon successful recovery. + + User contexts are put into an error condition when the device needs to + be reset or is terminating. Users are notified of this error condition + by seeing all 0xF's on an MMIO read. Upon encountering this, the + architected behavior for a user is to call into this ioctl to recover + their context. A user may also call into this ioctl at any time to + check if the device is operating normally. If a failure is returned + from this ioctl, the user is expected to gracefully clean up their + context via release/detach ioctls. Until they do, the context they + hold is not relinquished. The user may also optionally exit the process + at which time the context/resources they held will be freed as part of + the release fop. + + When the DK_CXLFLASH_APP_CLOSE_ADAP_FD flag was returned on a successful + attach, the application _must_ unmap and close the fd2 associated with the + original context following this ioctl returning success and indicating that + the context was recovered (DK_CXLFLASH_RECOVER_AFU_CONTEXT_RESET). + +DK_CXLFLASH_MANAGE_LUN +---------------------- + This ioctl is used to switch a LUN from a mode where it is available + for file-system access (legacy), to a mode where it is set aside for + exclusive user space access (superpipe). In case a LUN is visible + across multiple ports and adapters, this ioctl is used to uniquely + identify each LUN by its World Wide Node Name (WWNN). + + +CXL Flash Driver Host IOCTLs +============================ + + Each host adapter instance that is supported by the cxlflash driver + has a special character device associated with it to enable a set of + host management function. These character devices are hosted in a + class dedicated for cxlflash and can be accessed via `/dev/cxlflash/*`. + + Applications can be written to perform various functions using the + host ioctl APIs below. + + The structure definitions for these IOCTLs are available in: + uapi/scsi/cxlflash_ioctl.h + +HT_CXLFLASH_LUN_PROVISION +------------------------- + This ioctl is used to create and delete persistent LUNs on cxlflash + devices that lack an external LUN management interface. It is only + valid when used with AFUs that support the LUN provision capability. + + When sufficient space is available, LUNs can be created by specifying + the target port to host the LUN and a desired size in 4K blocks. Upon + success, the LUN ID and WWID of the created LUN will be returned and + the SCSI bus can be scanned to detect the change in LUN topology. Note + that partial allocations are not supported. Should a creation fail due + to a space issue, the target port can be queried for its current LUN + geometry. + + To remove a LUN, the device must first be disassociated from the Linux + SCSI subsystem. The LUN deletion can then be initiated by specifying a + target port and LUN ID. Upon success, the LUN geometry associated with + the port will be updated to reflect new number of provisioned LUNs and + available capacity. + + To query the LUN geometry of a port, the target port is specified and + upon success, the following information is presented: + + - Maximum number of provisioned LUNs allowed for the port + - Current number of provisioned LUNs for the port + - Maximum total capacity of provisioned LUNs for the port (4K blocks) + - Current total capacity of provisioned LUNs for the port (4K blocks) + + With this information, the number of available LUNs and capacity can be + can be calculated. + +HT_CXLFLASH_AFU_DEBUG +--------------------- + This ioctl is used to debug AFUs by supporting a command pass-through + interface. It is only valid when used with AFUs that support the AFU + debug capability. + + With exception of buffer management, AFU debug commands are opaque to + cxlflash and treated as pass-through. For debug commands that do require + data transfer, the user supplies an adequately sized data buffer and must + specify the data transfer direction with respect to the host. There is a + maximum transfer size of 256K imposed. Note that partial read completions + are not supported - when errors are experienced with a host read data + transfer, the data buffer is not copied back to the user. diff --git a/Documentation/arch/powerpc/dawr-power9.rst b/Documentation/arch/powerpc/dawr-power9.rst new file mode 100644 index 0000000000..310f2e0cea --- /dev/null +++ b/Documentation/arch/powerpc/dawr-power9.rst @@ -0,0 +1,101 @@ +===================== +DAWR issues on POWER9 +===================== + +On older POWER9 processors, the Data Address Watchpoint Register (DAWR) can +cause a checkstop if it points to cache inhibited (CI) memory. Currently Linux +has no way to distinguish CI memory when configuring the DAWR, so on affected +systems, the DAWR is disabled. + +Affected processor revisions +============================ + +This issue is only present on processors prior to v2.3. The revision can be +found in /proc/cpuinfo:: + + processor : 0 + cpu : POWER9, altivec supported + clock : 3800.000000MHz + revision : 2.3 (pvr 004e 1203) + +On a system with the issue, the DAWR is disabled as detailed below. + +Technical Details: +================== + +DAWR has 6 different ways of being set. +1) ptrace +2) h_set_mode(DAWR) +3) h_set_dabr() +4) kvmppc_set_one_reg() +5) xmon + +For ptrace, we now advertise zero breakpoints on POWER9 via the +PPC_PTRACE_GETHWDBGINFO call. This results in GDB falling back to +software emulation of the watchpoint (which is slow). + +h_set_mode(DAWR) and h_set_dabr() will now return an error to the +guest on a POWER9 host. Current Linux guests ignore this error, so +they will silently not get the DAWR. + +kvmppc_set_one_reg() will store the value in the vcpu but won't +actually set it on POWER9 hardware. This is done so we don't break +migration from POWER8 to POWER9, at the cost of silently losing the +DAWR on the migration. + +For xmon, the 'bd' command will return an error on P9. + +Consequences for users +====================== + +For GDB watchpoints (ie 'watch' command) on POWER9 bare metal , GDB +will accept the command. Unfortunately since there is no hardware +support for the watchpoint, GDB will software emulate the watchpoint +making it run very slowly. + +The same will also be true for any guests started on a POWER9 +host. The watchpoint will fail and GDB will fall back to software +emulation. + +If a guest is started on a POWER8 host, GDB will accept the watchpoint +and configure the hardware to use the DAWR. This will run at full +speed since it can use the hardware emulation. Unfortunately if this +guest is migrated to a POWER9 host, the watchpoint will be lost on the +POWER9. Loads and stores to the watchpoint locations will not be +trapped in GDB. The watchpoint is remembered, so if the guest is +migrated back to the POWER8 host, it will start working again. + +Force enabling the DAWR +======================= +Kernels (since ~v5.2) have an option to force enable the DAWR via:: + + echo Y > /sys/kernel/debug/powerpc/dawr_enable_dangerous + +This enables the DAWR even on POWER9. + +This is a dangerous setting, USE AT YOUR OWN RISK. + +Some users may not care about a bad user crashing their box +(ie. single user/desktop systems) and really want the DAWR. This +allows them to force enable DAWR. + +This flag can also be used to disable DAWR access. Once this is +cleared, all DAWR access should be cleared immediately and your +machine once again safe from crashing. + +Userspace may get confused by toggling this. If DAWR is force +enabled/disabled between getting the number of breakpoints (via +PTRACE_GETHWDBGINFO) and setting the breakpoint, userspace will get an +inconsistent view of what's available. Similarly for guests. + +For the DAWR to be enabled in a KVM guest, the DAWR needs to be force +enabled in the host AND the guest. For this reason, this won't work on +POWERVM as it doesn't allow the HCALL to work. Writes of 'Y' to the +dawr_enable_dangerous file will fail if the hypervisor doesn't support +writing the DAWR. + +To double check the DAWR is working, run this kernel selftest: + + tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c + +Any errors/failures/skips mean something is wrong. diff --git a/Documentation/arch/powerpc/dexcr.rst b/Documentation/arch/powerpc/dexcr.rst new file mode 100644 index 0000000000..615a631f51 --- /dev/null +++ b/Documentation/arch/powerpc/dexcr.rst @@ -0,0 +1,58 @@ +.. SPDX-License-Identifier: GPL-2.0-or-later + +========================================== +DEXCR (Dynamic Execution Control Register) +========================================== + +Overview +======== + +The DEXCR is a privileged special purpose register (SPR) introduced in +PowerPC ISA 3.1B (Power10) that allows per-cpu control over several dynamic +execution behaviours. These behaviours include speculation (e.g., indirect +branch target prediction) and enabling return-oriented programming (ROP) +protection instructions. + +The execution control is exposed in hardware as up to 32 bits ('aspects') in +the DEXCR. Each aspect controls a certain behaviour, and can be set or cleared +to enable/disable the aspect. There are several variants of the DEXCR for +different purposes: + +DEXCR + A privileged SPR that can control aspects for userspace and kernel space +HDEXCR + A hypervisor-privileged SPR that can control aspects for the hypervisor and + enforce aspects for the kernel and userspace. +UDEXCR + An optional ultravisor-privileged SPR that can control aspects for the ultravisor. + +Userspace can examine the current DEXCR state using a dedicated SPR that +provides a non-privileged read-only view of the userspace DEXCR aspects. +There is also an SPR that provides a read-only view of the hypervisor enforced +aspects, which ORed with the userspace DEXCR view gives the effective DEXCR +state for a process. + + +Configuration +============= + +The DEXCR is currently unconfigurable. All threads are run with the +NPHIE aspect enabled. + + +coredump and ptrace +=================== + +The userspace values of the DEXCR and HDEXCR (in this order) are exposed under +``NT_PPC_DEXCR``. These are each 64 bits and readonly, and are intended to +assist with core dumps. The DEXCR may be made writable in future. The top 32 +bits of both registers (corresponding to the non-userspace bits) are masked off. + +If the kernel config ``CONFIG_CHECKPOINT_RESTORE`` is enabled, then +``NT_PPC_HASHKEYR`` is available and exposes the HASHKEYR value of the process +for reading and writing. This is a tradeoff between increased security and +checkpoint/restore support: a process should normally have no need to know its +secret key, but restoring a process requires setting its original key. The key +therefore appears in core dumps, and an attacker may be able to retrieve it from +a coredump and effectively bypass ROP protection on any threads that share this +key (potentially all threads from the same parent that have not run ``exec()``). diff --git a/Documentation/arch/powerpc/dscr.rst b/Documentation/arch/powerpc/dscr.rst new file mode 100644 index 0000000000..f735ec5375 --- /dev/null +++ b/Documentation/arch/powerpc/dscr.rst @@ -0,0 +1,87 @@ +=================================== +DSCR (Data Stream Control Register) +=================================== + +DSCR register in powerpc allows user to have some control of prefetch of data +stream in the processor. Please refer to the ISA documents or related manual +for more detailed information regarding how to use this DSCR to attain this +control of the prefetches . This document here provides an overview of kernel +support for DSCR, related kernel objects, its functionalities and exported +user interface. + +(A) Data Structures: + + (1) thread_struct:: + + dscr /* Thread DSCR value */ + dscr_inherit /* Thread has changed default DSCR */ + + (2) PACA:: + + dscr_default /* per-CPU DSCR default value */ + + (3) sysfs.c:: + + dscr_default /* System DSCR default value */ + +(B) Scheduler Changes: + + Scheduler will write the per-CPU DSCR default which is stored in the + CPU's PACA value into the register if the thread has dscr_inherit value + cleared which means that it has not changed the default DSCR till now. + If the dscr_inherit value is set which means that it has changed the + default DSCR value, scheduler will write the changed value which will + now be contained in thread struct's dscr into the register instead of + the per-CPU default PACA based DSCR value. + + NOTE: Please note here that the system wide global DSCR value never + gets used directly in the scheduler process context switch at all. + +(C) SYSFS Interface: + + - Global DSCR default: /sys/devices/system/cpu/dscr_default + - CPU specific DSCR default: /sys/devices/system/cpu/cpuN/dscr + + Changing the global DSCR default in the sysfs will change all the CPU + specific DSCR defaults immediately in their PACA structures. Again if + the current process has the dscr_inherit clear, it also writes the new + value into every CPU's DSCR register right away and updates the current + thread's DSCR value as well. + + Changing the CPU specific DSCR default value in the sysfs does exactly + the same thing as above but unlike the global one above, it just changes + stuff for that particular CPU instead for all the CPUs on the system. + +(D) User Space Instructions: + + The DSCR register can be accessed in the user space using any of these + two SPR numbers available for that purpose. + + (1) Problem state SPR: 0x03 (Un-privileged, POWER8 only) + (2) Privileged state SPR: 0x11 (Privileged) + + Accessing DSCR through privileged SPR number (0x11) from user space + works, as it is emulated following an illegal instruction exception + inside the kernel. Both mfspr and mtspr instructions are emulated. + + Accessing DSCR through user level SPR (0x03) from user space will first + create a facility unavailable exception. Inside this exception handler + all mfspr instruction based read attempts will get emulated and returned + where as the first mtspr instruction based write attempts will enable + the DSCR facility for the next time around (both for read and write) by + setting DSCR facility in the FSCR register. + +(E) Specifics about 'dscr_inherit': + + The thread struct element 'dscr_inherit' represents whether the thread + in question has attempted and changed the DSCR itself using any of the + following methods. This element signifies whether the thread wants to + use the CPU default DSCR value or its own changed DSCR value in the + kernel. + + (1) mtspr instruction (SPR number 0x03) + (2) mtspr instruction (SPR number 0x11) + (3) ptrace interface (Explicitly set user DSCR value) + + Any child of the process created after this event in the process inherits + this same behaviour as well. diff --git a/Documentation/arch/powerpc/eeh-pci-error-recovery.rst b/Documentation/arch/powerpc/eeh-pci-error-recovery.rst new file mode 100644 index 0000000000..d6643a91bd --- /dev/null +++ b/Documentation/arch/powerpc/eeh-pci-error-recovery.rst @@ -0,0 +1,336 @@ +========================== +PCI Bus EEH Error Recovery +========================== + +Linas Vepstas + +12 January 2005 + + +Overview: +--------- +The IBM POWER-based pSeries and iSeries computers include PCI bus +controller chips that have extended capabilities for detecting and +reporting a large variety of PCI bus error conditions. These features +go under the name of "EEH", for "Enhanced Error Handling". The EEH +hardware features allow PCI bus errors to be cleared and a PCI +card to be "rebooted", without also having to reboot the operating +system. + +This is in contrast to traditional PCI error handling, where the +PCI chip is wired directly to the CPU, and an error would cause +a CPU machine-check/check-stop condition, halting the CPU entirely. +Another "traditional" technique is to ignore such errors, which +can lead to data corruption, both of user data or of kernel data, +hung/unresponsive adapters, or system crashes/lockups. Thus, +the idea behind EEH is that the operating system can become more +reliable and robust by protecting it from PCI errors, and giving +the OS the ability to "reboot"/recover individual PCI devices. + +Future systems from other vendors, based on the PCI-E specification, +may contain similar features. + + +Causes of EEH Errors +-------------------- +EEH was originally designed to guard against hardware failure, such +as PCI cards dying from heat, humidity, dust, vibration and bad +electrical connections. The vast majority of EEH errors seen in +"real life" are due to either poorly seated PCI cards, or, +unfortunately quite commonly, due to device driver bugs, device firmware +bugs, and sometimes PCI card hardware bugs. + +The most common software bug, is one that causes the device to +attempt to DMA to a location in system memory that has not been +reserved for DMA access for that card. This is a powerful feature, +as it prevents what; otherwise, would have been silent memory +corruption caused by the bad DMA. A number of device driver +bugs have been found and fixed in this way over the past few +years. Other possible causes of EEH errors include data or +address line parity errors (for example, due to poor electrical +connectivity due to a poorly seated card), and PCI-X split-completion +errors (due to software, device firmware, or device PCI hardware bugs). +The vast majority of "true hardware failures" can be cured by +physically removing and re-seating the PCI card. + + +Detection and Recovery +---------------------- +In the following discussion, a generic overview of how to detect +and recover from EEH errors will be presented. This is followed +by an overview of how the current implementation in the Linux +kernel does it. The actual implementation is subject to change, +and some of the finer points are still being debated. These +may in turn be swayed if or when other architectures implement +similar functionality. + +When a PCI Host Bridge (PHB, the bus controller connecting the +PCI bus to the system CPU electronics complex) detects a PCI error +condition, it will "isolate" the affected PCI card. Isolation +will block all writes (either to the card from the system, or +from the card to the system), and it will cause all reads to +return all-ff's (0xff, 0xffff, 0xffffffff for 8/16/32-bit reads). +This value was chosen because it is the same value you would +get if the device was physically unplugged from the slot. +This includes access to PCI memory, I/O space, and PCI config +space. Interrupts; however, will continue to be delivered. + +Detection and recovery are performed with the aid of ppc64 +firmware. The programming interfaces in the Linux kernel +into the firmware are referred to as RTAS (Run-Time Abstraction +Services). The Linux kernel does not (should not) access +the EEH function in the PCI chipsets directly, primarily because +there are a number of different chipsets out there, each with +different interfaces and quirks. The firmware provides a +uniform abstraction layer that will work with all pSeries +and iSeries hardware (and be forwards-compatible). + +If the OS or device driver suspects that a PCI slot has been +EEH-isolated, there is a firmware call it can make to determine if +this is the case. If so, then the device driver should put itself +into a consistent state (given that it won't be able to complete any +pending work) and start recovery of the card. Recovery normally +would consist of resetting the PCI device (holding the PCI #RST +line high for two seconds), followed by setting up the device +config space (the base address registers (BAR's), latency timer, +cache line size, interrupt line, and so on). This is followed by a +reinitialization of the device driver. In a worst-case scenario, +the power to the card can be toggled, at least on hot-plug-capable +slots. In principle, layers far above the device driver probably +do not need to know that the PCI card has been "rebooted" in this +way; ideally, there should be at most a pause in Ethernet/disk/USB +I/O while the card is being reset. + +If the card cannot be recovered after three or four resets, the +kernel/device driver should assume the worst-case scenario, that the +card has died completely, and report this error to the sysadmin. +In addition, error messages are reported through RTAS and also through +syslogd (/var/log/messages) to alert the sysadmin of PCI resets. +The correct way to deal with failed adapters is to use the standard +PCI hotplug tools to remove and replace the dead card. + + +Current PPC64 Linux EEH Implementation +-------------------------------------- +At this time, a generic EEH recovery mechanism has been implemented, +so that individual device drivers do not need to be modified to support +EEH recovery. This generic mechanism piggy-backs on the PCI hotplug +infrastructure, and percolates events up through the userspace/udev +infrastructure. Following is a detailed description of how this is +accomplished. + +EEH must be enabled in the PHB's very early during the boot process, +and if a PCI slot is hot-plugged. The former is performed by +eeh_init() in arch/powerpc/platforms/pseries/eeh.c, and the later by +drivers/pci/hotplug/pSeries_pci.c calling in to the eeh.c code. +EEH must be enabled before a PCI scan of the device can proceed. +Current Power5 hardware will not work unless EEH is enabled; +although older Power4 can run with it disabled. Effectively, +EEH can no longer be turned off. PCI devices *must* be +registered with the EEH code; the EEH code needs to know about +the I/O address ranges of the PCI device in order to detect an +error. Given an arbitrary address, the routine +pci_get_device_by_addr() will find the pci device associated +with that address (if any). + +The default arch/powerpc/include/asm/io.h macros readb(), inb(), insb(), +etc. include a check to see if the i/o read returned all-0xff's. +If so, these make a call to eeh_dn_check_failure(), which in turn +asks the firmware if the all-ff's value is the sign of a true EEH +error. If it is not, processing continues as normal. The grand +total number of these false alarms or "false positives" can be +seen in /proc/ppc64/eeh (subject to change). Normally, almost +all of these occur during boot, when the PCI bus is scanned, where +a large number of 0xff reads are part of the bus scan procedure. + +If a frozen slot is detected, code in +arch/powerpc/platforms/pseries/eeh.c will print a stack trace to +syslog (/var/log/messages). This stack trace has proven to be very +useful to device-driver authors for finding out at what point the EEH +error was detected, as the error itself usually occurs slightly +beforehand. + +Next, it uses the Linux kernel notifier chain/work queue mechanism to +allow any interested parties to find out about the failure. Device +drivers, or other parts of the kernel, can use +`eeh_register_notifier(struct notifier_block *)` to find out about EEH +events. The event will include a pointer to the pci device, the +device node and some state info. Receivers of the event can "do as +they wish"; the default handler will be described further in this +section. + +To assist in the recovery of the device, eeh.c exports the +following functions: + +rtas_set_slot_reset() + assert the PCI #RST line for 1/8th of a second +rtas_configure_bridge() + ask firmware to configure any PCI bridges + located topologically under the pci slot. +eeh_save_bars() and eeh_restore_bars(): + save and restore the PCI + config-space info for a device and any devices under it. + + +A handler for the EEH notifier_block events is implemented in +drivers/pci/hotplug/pSeries_pci.c, called handle_eeh_events(). +It saves the device BAR's and then calls rpaphp_unconfig_pci_adapter(). +This last call causes the device driver for the card to be stopped, +which causes uevents to go out to user space. This triggers +user-space scripts that might issue commands such as "ifdown eth0" +for ethernet cards, and so on. This handler then sleeps for 5 seconds, +hoping to give the user-space scripts enough time to complete. +It then resets the PCI card, reconfigures the device BAR's, and +any bridges underneath. It then calls rpaphp_enable_pci_slot(), +which restarts the device driver and triggers more user-space +events (for example, calling "ifup eth0" for ethernet cards). + + +Device Shutdown and User-Space Events +------------------------------------- +This section documents what happens when a pci slot is unconfigured, +focusing on how the device driver gets shut down, and on how the +events get delivered to user-space scripts. + +Following is an example sequence of events that cause a device driver +close function to be called during the first phase of an EEH reset. +The following sequence is an example of the pcnet32 device driver:: + + rpa_php_unconfig_pci_adapter (struct slot *) // in rpaphp_pci.c + { + calls + pci_remove_bus_device (struct pci_dev *) // in /drivers/pci/remove.c + { + calls + pci_destroy_dev (struct pci_dev *) + { + calls + device_unregister (&dev->dev) // in /drivers/base/core.c + { + calls + device_del (struct device *) + { + calls + bus_remove_device() // in /drivers/base/bus.c + { + calls + device_release_driver() + { + calls + struct device_driver->remove() which is just + pci_device_remove() // in /drivers/pci/pci_driver.c + { + calls + struct pci_driver->remove() which is just + pcnet32_remove_one() // in /drivers/net/pcnet32.c + { + calls + unregister_netdev() // in /net/core/dev.c + { + calls + dev_close() // in /net/core/dev.c + { + calls dev->stop(); + which is just pcnet32_close() // in pcnet32.c + { + which does what you wanted + to stop the device + } + } + } + which + frees pcnet32 device driver memory + } + }}}}}} + + +in drivers/pci/pci_driver.c, +struct device_driver->remove() is just pci_device_remove() +which calls struct pci_driver->remove() which is pcnet32_remove_one() +which calls unregister_netdev() (in net/core/dev.c) +which calls dev_close() (in net/core/dev.c) +which calls dev->stop() which is pcnet32_close() +which then does the appropriate shutdown. + +--- + +Following is the analogous stack trace for events sent to user-space +when the pci device is unconfigured:: + + rpa_php_unconfig_pci_adapter() { // in rpaphp_pci.c + calls + pci_remove_bus_device (struct pci_dev *) { // in /drivers/pci/remove.c + calls + pci_destroy_dev (struct pci_dev *) { + calls + device_unregister (&dev->dev) { // in /drivers/base/core.c + calls + device_del(struct device * dev) { // in /drivers/base/core.c + calls + kobject_del() { //in /libs/kobject.c + calls + kobject_uevent() { // in /libs/kobject.c + calls + kset_uevent() { // in /lib/kobject.c + calls + kset->uevent_ops->uevent() // which is really just + a call to + dev_uevent() { // in /drivers/base/core.c + calls + dev->bus->uevent() which is really just a call to + pci_uevent () { // in drivers/pci/hotplug.c + which prints device name, etc.... + } + } + then kobject_uevent() sends a netlink uevent to userspace + --> userspace uevent + (during early boot, nobody listens to netlink events and + kobject_uevent() executes uevent_helper[], which runs the + event process /sbin/hotplug) + } + } + kobject_del() then calls sysfs_remove_dir(), which would + trigger any user-space daemon that was watching /sysfs, + and notice the delete event. + + +Pro's and Con's of the Current Design +------------------------------------- +There are several issues with the current EEH software recovery design, +which may be addressed in future revisions. But first, note that the +big plus of the current design is that no changes need to be made to +individual device drivers, so that the current design throws a wide net. +The biggest negative of the design is that it potentially disturbs +network daemons and file systems that didn't need to be disturbed. + +- A minor complaint is that resetting the network card causes + user-space back-to-back ifdown/ifup burps that potentially disturb + network daemons, that didn't need to even know that the pci + card was being rebooted. + +- A more serious concern is that the same reset, for SCSI devices, + causes havoc to mounted file systems. Scripts cannot post-facto + unmount a file system without flushing pending buffers, but this + is impossible, because I/O has already been stopped. Thus, + ideally, the reset should happen at or below the block layer, + so that the file systems are not disturbed. + + Reiserfs does not tolerate errors returned from the block device. + Ext3fs seems to be tolerant, retrying reads/writes until it does + succeed. Both have been only lightly tested in this scenario. + + The SCSI-generic subsystem already has built-in code for performing + SCSI device resets, SCSI bus resets, and SCSI host-bus-adapter + (HBA) resets. These are cascaded into a chain of attempted + resets if a SCSI command fails. These are completely hidden + from the block layer. It would be very natural to add an EEH + reset into this chain of events. + +- If a SCSI error occurs for the root device, all is lost unless + the sysadmin had the foresight to run /bin, /sbin, /etc, /var + and so on, out of ramdisk/tmpfs. + + +Conclusions +----------- +There's forward progress ... diff --git a/Documentation/arch/powerpc/elf_hwcaps.rst b/Documentation/arch/powerpc/elf_hwcaps.rst new file mode 100644 index 0000000000..4c896cf077 --- /dev/null +++ b/Documentation/arch/powerpc/elf_hwcaps.rst @@ -0,0 +1,231 @@ +.. _elf_hwcaps_powerpc: + +================== +POWERPC ELF HWCAPs +================== + +This document describes the usage and semantics of the powerpc ELF HWCAPs. + + +1. Introduction +--------------- + +Some hardware or software features are only available on some CPU +implementations, and/or with certain kernel configurations, but have no other +discovery mechanism available to userspace code. The kernel exposes the +presence of these features to userspace through a set of flags called HWCAPs, +exposed in the auxiliary vector. + +Userspace software can test for features by acquiring the AT_HWCAP or +AT_HWCAP2 entry of the auxiliary vector, and testing whether the relevant +flags are set, e.g.:: + + bool floating_point_is_present(void) + { + unsigned long HWCAPs = getauxval(AT_HWCAP); + if (HWCAPs & PPC_FEATURE_HAS_FPU) + return true; + + return false; + } + +Where software relies on a feature described by a HWCAP, it should check the +relevant HWCAP flag to verify that the feature is present before attempting to +make use of the feature. + +HWCAP is the preferred method to test for the presence of a feature rather +than probing through other means, which may not be reliable or may cause +unpredictable behaviour. + +Software that targets a particular platform does not necessarily have to +test for required or implied features. For example if the program requires +FPU, VMX, VSX, it is not necessary to test those HWCAPs, and it may be +impossible to do so if the compiler generates code requiring those features. + +2. Facilities +------------- + +The Power ISA uses the term "facility" to describe a class of instructions, +registers, interrupts, etc. The presence or absence of a facility indicates +whether this class is available to be used, but the specifics depend on the +ISA version. For example, if the VSX facility is available, the VSX +instructions that can be used differ between the v3.0B and v3.1B ISA +versions. + +3. Categories +------------- + +The Power ISA before v3.0 uses the term "category" to describe certain +classes of instructions and operating modes which may be optional or +mutually exclusive, the exact meaning of the HWCAP flag may depend on +context, e.g., the presence of the BOOKE feature implies that the server +category is not implemented. + +4. HWCAP allocation +------------------- + +HWCAPs are allocated as described in Power Architecture 64-Bit ELF V2 ABI +Specification (which will be reflected in the kernel's uapi headers). + +5. The HWCAPs exposed in AT_HWCAP +--------------------------------- + +PPC_FEATURE_32 + 32-bit CPU + +PPC_FEATURE_64 + 64-bit CPU (userspace may be running in 32-bit mode). + +PPC_FEATURE_601_INSTR + The processor is PowerPC 601. + Unused in the kernel since f0ed73f3fa2c ("powerpc: Remove PowerPC 601") + +PPC_FEATURE_HAS_ALTIVEC + Vector (aka Altivec, VMX) facility is available. + +PPC_FEATURE_HAS_FPU + Floating point facility is available. + +PPC_FEATURE_HAS_MMU + Memory management unit is present and enabled. + +PPC_FEATURE_HAS_4xxMAC + The processor is 40x or 44x family. + +PPC_FEATURE_UNIFIED_CACHE + The processor has a unified L1 cache for instructions and data, as + found in NXP e200. + Unused in the kernel since 39c8bf2b3cc1 ("powerpc: Retire e200 core (mpc555x processor)") + +PPC_FEATURE_HAS_SPE + Signal Processing Engine facility is available. + +PPC_FEATURE_HAS_EFP_SINGLE + Embedded Floating Point single precision operations are available. + +PPC_FEATURE_HAS_EFP_DOUBLE + Embedded Floating Point double precision operations are available. + +PPC_FEATURE_NO_TB + The timebase facility (mftb instruction) is not available. + This is a 601 specific HWCAP, so if it is known that the processor + running is not a 601, via other HWCAPs or other means, it is not + required to test this bit before using the timebase. + Unused in the kernel since f0ed73f3fa2c ("powerpc: Remove PowerPC 601") + +PPC_FEATURE_POWER4 + The processor is POWER4 or PPC970/FX/MP. + POWER4 support dropped from the kernel since 471d7ff8b51b ("powerpc/64s: Remove POWER4 support") + +PPC_FEATURE_POWER5 + The processor is POWER5. + +PPC_FEATURE_POWER5_PLUS + The processor is POWER5+. + +PPC_FEATURE_CELL + The processor is Cell. + +PPC_FEATURE_BOOKE + The processor implements the embedded category ("BookE") architecture. + +PPC_FEATURE_SMT + The processor implements SMT. + +PPC_FEATURE_ICACHE_SNOOP + The processor icache is coherent with the dcache, and instruction storage + can be made consistent with data storage for the purpose of executing + instructions with the sequence (as described in, e.g., POWER9 Processor + User's Manual, 4.6.2.2 Instruction Cache Block Invalidate (icbi)):: + + sync + icbi (to any address) + isync + +PPC_FEATURE_ARCH_2_05 + The processor supports the v2.05 userlevel architecture. Processors + supporting later architectures DO NOT set this feature. + +PPC_FEATURE_PA6T + The processor is PA6T. + +PPC_FEATURE_HAS_DFP + DFP facility is available. + +PPC_FEATURE_POWER6_EXT + The processor is POWER6. + +PPC_FEATURE_ARCH_2_06 + The processor supports the v2.06 userlevel architecture. Processors + supporting later architectures also set this feature. + +PPC_FEATURE_HAS_VSX + VSX facility is available. + +PPC_FEATURE_PSERIES_PERFMON_COMPAT + The processor supports architected PMU events in the range 0xE0-0xFF. + +PPC_FEATURE_TRUE_LE + The processor supports true little-endian mode. + +PPC_FEATURE_PPC_LE + The processor supports "PowerPC Little-Endian", that uses address + munging to make storage access appear to be little-endian, but the + data is stored in a different format that is unsuitable to be + accessed by other agents not running in this mode. + +6. The HWCAPs exposed in AT_HWCAP2 +---------------------------------- + +PPC_FEATURE2_ARCH_2_07 + The processor supports the v2.07 userlevel architecture. Processors + supporting later architectures also set this feature. + +PPC_FEATURE2_HTM + Transactional Memory feature is available. + +PPC_FEATURE2_DSCR + DSCR facility is available. + +PPC_FEATURE2_EBB + EBB facility is available. + +PPC_FEATURE2_ISEL + isel instruction is available. This is superseded by ARCH_2_07 and + later. + +PPC_FEATURE2_TAR + TAR facility is available. + +PPC_FEATURE2_VEC_CRYPTO + v2.07 crypto instructions are available. + +PPC_FEATURE2_HTM_NOSC + System calls fail if called in a transactional state, see + Documentation/arch/powerpc/syscall64-abi.rst + +PPC_FEATURE2_ARCH_3_00 + The processor supports the v3.0B / v3.0C userlevel architecture. Processors + supporting later architectures also set this feature. + +PPC_FEATURE2_HAS_IEEE128 + IEEE 128-bit binary floating point is supported with VSX + quad-precision instructions and data types. + +PPC_FEATURE2_DARN + darn instruction is available. + +PPC_FEATURE2_SCV + The scv 0 instruction may be used for system calls, see + Documentation/arch/powerpc/syscall64-abi.rst. + +PPC_FEATURE2_HTM_NO_SUSPEND + A limited Transactional Memory facility that does not support suspend is + available, see Documentation/arch/powerpc/transactional_memory.rst. + +PPC_FEATURE2_ARCH_3_1 + The processor supports the v3.1 userlevel architecture. Processors + supporting later architectures also set this feature. + +PPC_FEATURE2_MMA + MMA facility is available. diff --git a/Documentation/arch/powerpc/elfnote.rst b/Documentation/arch/powerpc/elfnote.rst new file mode 100644 index 0000000000..3ec8d61e9a --- /dev/null +++ b/Documentation/arch/powerpc/elfnote.rst @@ -0,0 +1,41 @@ +========================== +ELF Note PowerPC Namespace +========================== + +The PowerPC namespace in an ELF Note of the kernel binary is used to store +capabilities and information which can be used by a bootloader or userland. + +Types and Descriptors +--------------------- + +The types to be used with the "PowerPC" namespace are defined in [#f1]_. + + 1) PPC_ELFNOTE_CAPABILITIES + +Define the capabilities supported/required by the kernel. This type uses a +bitmap as "descriptor" field. Each bit is described below: + +- Ultravisor-capable bit (PowerNV only). + +.. code-block:: c + + #define PPCCAP_ULTRAVISOR_BIT (1 << 0) + +Indicate that the powerpc kernel binary knows how to run in an +ultravisor-enabled system. + +In an ultravisor-enabled system, some machine resources are now controlled +by the ultravisor. If the kernel is not ultravisor-capable, but it ends up +being run on a machine with ultravisor, the kernel will probably crash +trying to access ultravisor resources. For instance, it may crash in early +boot trying to set the partition table entry 0. + +In an ultravisor-enabled system, a bootloader could warn the user or prevent +the kernel from being run if the PowerPC ultravisor capability doesn't exist +or the Ultravisor-capable bit is not set. + +References +---------- + +.. [#f1] arch/powerpc/include/asm/elfnote.h + diff --git a/Documentation/arch/powerpc/features.rst b/Documentation/arch/powerpc/features.rst new file mode 100644 index 0000000000..ee4b95e042 --- /dev/null +++ b/Documentation/arch/powerpc/features.rst @@ -0,0 +1,3 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. kernel-feat:: features powerpc diff --git a/Documentation/arch/powerpc/firmware-assisted-dump.rst b/Documentation/arch/powerpc/firmware-assisted-dump.rst new file mode 100644 index 0000000000..e363fc4852 --- /dev/null +++ b/Documentation/arch/powerpc/firmware-assisted-dump.rst @@ -0,0 +1,381 @@ +====================== +Firmware-Assisted Dump +====================== + +July 2011 + +The goal of firmware-assisted dump is to enable the dump of +a crashed system, and to do so from a fully-reset system, and +to minimize the total elapsed time until the system is back +in production use. + +- Firmware-Assisted Dump (FADump) infrastructure is intended to replace + the existing phyp assisted dump. +- Fadump uses the same firmware interfaces and memory reservation model + as phyp assisted dump. +- Unlike phyp dump, FADump exports the memory dump through /proc/vmcore + in the ELF format in the same way as kdump. This helps us reuse the + kdump infrastructure for dump capture and filtering. +- Unlike phyp dump, userspace tool does not need to refer any sysfs + interface while reading /proc/vmcore. +- Unlike phyp dump, FADump allows user to release all the memory reserved + for dump, with a single operation of echo 1 > /sys/kernel/fadump_release_mem. +- Once enabled through kernel boot parameter, FADump can be + started/stopped through /sys/kernel/fadump_registered interface (see + sysfs files section below) and can be easily integrated with kdump + service start/stop init scripts. + +Comparing with kdump or other strategies, firmware-assisted +dump offers several strong, practical advantages: + +- Unlike kdump, the system has been reset, and loaded + with a fresh copy of the kernel. In particular, + PCI and I/O devices have been reinitialized and are + in a clean, consistent state. +- Once the dump is copied out, the memory that held the dump + is immediately available to the running kernel. And therefore, + unlike kdump, FADump doesn't need a 2nd reboot to get back + the system to the production configuration. + +The above can only be accomplished by coordination with, +and assistance from the Power firmware. The procedure is +as follows: + +- The first kernel registers the sections of memory with the + Power firmware for dump preservation during OS initialization. + These registered sections of memory are reserved by the first + kernel during early boot. + +- When system crashes, the Power firmware will copy the registered + low memory regions (boot memory) from source to destination area. + It will also save hardware PTE's. + + NOTE: + The term 'boot memory' means size of the low memory chunk + that is required for a kernel to boot successfully when + booted with restricted memory. By default, the boot memory + size will be the larger of 5% of system RAM or 256MB. + Alternatively, user can also specify boot memory size + through boot parameter 'crashkernel=' which will override + the default calculated size. Use this option if default + boot memory size is not sufficient for second kernel to + boot successfully. For syntax of crashkernel= parameter, + refer to Documentation/admin-guide/kdump/kdump.rst. If any + offset is provided in crashkernel= parameter, it will be + ignored as FADump uses a predefined offset to reserve memory + for boot memory dump preservation in case of a crash. + +- After the low memory (boot memory) area has been saved, the + firmware will reset PCI and other hardware state. It will + *not* clear the RAM. It will then launch the bootloader, as + normal. + +- The freshly booted kernel will notice that there is a new node + (rtas/ibm,kernel-dump on pSeries or ibm,opal/dump/mpipl-boot + on OPAL platform) in the device tree, indicating that + there is crash data available from a previous boot. During + the early boot OS will reserve rest of the memory above + boot memory size effectively booting with restricted memory + size. This will make sure that this kernel (also, referred + to as second kernel or capture kernel) will not touch any + of the dump memory area. + +- User-space tools will read /proc/vmcore to obtain the contents + of memory, which holds the previous crashed kernel dump in ELF + format. The userspace tools may copy this info to disk, or + network, nas, san, iscsi, etc. as desired. + +- Once the userspace tool is done saving dump, it will echo + '1' to /sys/kernel/fadump_release_mem to release the reserved + memory back to general use, except the memory required for + next firmware-assisted dump registration. + + e.g.:: + + # echo 1 > /sys/kernel/fadump_release_mem + +Please note that the firmware-assisted dump feature +is only available on POWER6 and above systems on pSeries +(PowerVM) platform and POWER9 and above systems with OP940 +or later firmware versions on PowerNV (OPAL) platform. +Note that, OPAL firmware exports ibm,opal/dump node when +FADump is supported on PowerNV platform. + +On OPAL based machines, system first boots into an intermittent +kernel (referred to as petitboot kernel) before booting into the +capture kernel. This kernel would have minimal kernel and/or +userspace support to process crash data. Such kernel needs to +preserve previously crash'ed kernel's memory for the subsequent +capture kernel boot to process this crash data. Kernel config +option CONFIG_PRESERVE_FA_DUMP has to be enabled on such kernel +to ensure that crash data is preserved to process later. + +-- On OPAL based machines (PowerNV), if the kernel is build with + CONFIG_OPAL_CORE=y, OPAL memory at the time of crash is also + exported as /sys/firmware/opal/mpipl/core file. This procfs file is + helpful in debugging OPAL crashes with GDB. The kernel memory + used for exporting this procfs file can be released by echo'ing + '1' to /sys/firmware/opal/mpipl/release_core node. + + e.g. + # echo 1 > /sys/firmware/opal/mpipl/release_core + +Implementation details: +----------------------- + +During boot, a check is made to see if firmware supports +this feature on that particular machine. If it does, then +we check to see if an active dump is waiting for us. If yes +then everything but boot memory size of RAM is reserved during +early boot (See Fig. 2). This area is released once we finish +collecting the dump from user land scripts (e.g. kdump scripts) +that are run. If there is dump data, then the +/sys/kernel/fadump_release_mem file is created, and the reserved +memory is held. + +If there is no waiting dump data, then only the memory required to +hold CPU state, HPTE region, boot memory dump, FADump header and +elfcore header, is usually reserved at an offset greater than boot +memory size (see Fig. 1). This area is *not* released: this region +will be kept permanently reserved, so that it can act as a receptacle +for a copy of the boot memory content in addition to CPU state and +HPTE region, in the case a crash does occur. + +Since this reserved memory area is used only after the system crash, +there is no point in blocking this significant chunk of memory from +production kernel. Hence, the implementation uses the Linux kernel's +Contiguous Memory Allocator (CMA) for memory reservation if CMA is +configured for kernel. With CMA reservation this memory will be +available for applications to use it, while kernel is prevented from +using it. With this FADump will still be able to capture all of the +kernel memory and most of the user space memory except the user pages +that were present in CMA region:: + + o Memory Reservation during first kernel + + Low memory Top of memory + 0 boot memory size |<--- Reserved dump area --->| | + | | | Permanent Reservation | | + V V | | V + +-----------+-----/ /---+---+----+-------+-----+-----+----+--+ + | | |///|////| DUMP | HDR | ELF |////| | + +-----------+-----/ /---+---+----+-------+-----+-----+----+--+ + | ^ ^ ^ ^ ^ + | | | | | | + \ CPU HPTE / | | + ------------------------------ | | + Boot memory content gets transferred | | + to reserved area by firmware at the | | + time of crash. | | + FADump Header | + (meta area) | + | + | + Metadata: This area holds a metadata structure whose + address is registered with f/w and retrieved in the + second kernel after crash, on platforms that support + tags (OPAL). Having such structure with info needed + to process the crashdump eases dump capture process. + + Fig. 1 + + + o Memory Reservation during second kernel after crash + + Low memory Top of memory + 0 boot memory size | + | |<------------ Crash preserved area ------------>| + V V |<--- Reserved dump area --->| | + +-----------+-----/ /---+---+----+-------+-----+-----+----+--+ + | | |///|////| DUMP | HDR | ELF |////| | + +-----------+-----/ /---+---+----+-------+-----+-----+----+--+ + | | + V V + Used by second /proc/vmcore + kernel to boot + + +---+ + |///| -> Regions (CPU, HPTE & Metadata) marked like this in the above + +---+ figures are not always present. For example, OPAL platform + does not have CPU & HPTE regions while Metadata region is + not supported on pSeries currently. + + Fig. 2 + + +Currently the dump will be copied from /proc/vmcore to a new file upon +user intervention. The dump data available through /proc/vmcore will be +in ELF format. Hence the existing kdump infrastructure (kdump scripts) +to save the dump works fine with minor modifications. KDump scripts on +major Distro releases have already been modified to work seamlessly (no +user intervention in saving the dump) when FADump is used, instead of +KDump, as dump mechanism. + +The tools to examine the dump will be same as the ones +used for kdump. + +How to enable firmware-assisted dump (FADump): +---------------------------------------------- + +1. Set config option CONFIG_FA_DUMP=y and build kernel. +2. Boot into linux kernel with 'fadump=on' kernel cmdline option. + By default, FADump reserved memory will be initialized as CMA area. + Alternatively, user can boot linux kernel with 'fadump=nocma' to + prevent FADump to use CMA. +3. Optionally, user can also set 'crashkernel=' kernel cmdline + to specify size of the memory to reserve for boot memory dump + preservation. + +NOTE: + 1. 'fadump_reserve_mem=' parameter has been deprecated. Instead + use 'crashkernel=' to specify size of the memory to reserve + for boot memory dump preservation. + 2. If firmware-assisted dump fails to reserve memory then it + will fallback to existing kdump mechanism if 'crashkernel=' + option is set at kernel cmdline. + 3. if user wants to capture all of user space memory and ok with + reserved memory not available to production system, then + 'fadump=nocma' kernel parameter can be used to fallback to + old behaviour. + +Sysfs/debugfs files: +-------------------- + +Firmware-assisted dump feature uses sysfs file system to hold +the control files and debugfs file to display memory reserved region. + +Here is the list of files under kernel sysfs: + + /sys/kernel/fadump_enabled + This is used to display the FADump status. + + - 0 = FADump is disabled + - 1 = FADump is enabled + + This interface can be used by kdump init scripts to identify if + FADump is enabled in the kernel and act accordingly. + + /sys/kernel/fadump_registered + This is used to display the FADump registration status as well + as to control (start/stop) the FADump registration. + + - 0 = FADump is not registered. + - 1 = FADump is registered and ready to handle system crash. + + To register FADump echo 1 > /sys/kernel/fadump_registered and + echo 0 > /sys/kernel/fadump_registered for un-register and stop the + FADump. Once the FADump is un-registered, the system crash will not + be handled and vmcore will not be captured. This interface can be + easily integrated with kdump service start/stop. + + /sys/kernel/fadump/mem_reserved + + This is used to display the memory reserved by FADump for saving the + crash dump. + + /sys/kernel/fadump_release_mem + This file is available only when FADump is active during + second kernel. This is used to release the reserved memory + region that are held for saving crash dump. To release the + reserved memory echo 1 to it:: + + echo 1 > /sys/kernel/fadump_release_mem + + After echo 1, the content of the /sys/kernel/debug/powerpc/fadump_region + file will change to reflect the new memory reservations. + + The existing userspace tools (kdump infrastructure) can be easily + enhanced to use this interface to release the memory reserved for + dump and continue without 2nd reboot. + +Note: /sys/kernel/fadump_release_opalcore sysfs has moved to + /sys/firmware/opal/mpipl/release_core + + /sys/firmware/opal/mpipl/release_core + + This file is available only on OPAL based machines when FADump is + active during capture kernel. This is used to release the memory + used by the kernel to export /sys/firmware/opal/mpipl/core file. To + release this memory, echo '1' to it: + + echo 1 > /sys/firmware/opal/mpipl/release_core + +Note: The following FADump sysfs files are deprecated. + ++----------------------------------+--------------------------------+ +| Deprecated | Alternative | ++----------------------------------+--------------------------------+ +| /sys/kernel/fadump_enabled | /sys/kernel/fadump/enabled | ++----------------------------------+--------------------------------+ +| /sys/kernel/fadump_registered | /sys/kernel/fadump/registered | ++----------------------------------+--------------------------------+ +| /sys/kernel/fadump_release_mem | /sys/kernel/fadump/release_mem | ++----------------------------------+--------------------------------+ + +Here is the list of files under powerpc debugfs: +(Assuming debugfs is mounted on /sys/kernel/debug directory.) + + /sys/kernel/debug/powerpc/fadump_region + This file shows the reserved memory regions if FADump is + enabled otherwise this file is empty. The output format + is:: + + : [-] bytes, Dumped: + + and for kernel DUMP region is: + + DUMP: Src: , Dest: , Size: , Dumped: # bytes + + e.g. + Contents when FADump is registered during first kernel:: + + # cat /sys/kernel/debug/powerpc/fadump_region + CPU : [0x0000006ffb0000-0x0000006fff001f] 0x40020 bytes, Dumped: 0x0 + HPTE: [0x0000006fff0020-0x0000006fff101f] 0x1000 bytes, Dumped: 0x0 + DUMP: [0x0000006fff1020-0x0000007fff101f] 0x10000000 bytes, Dumped: 0x0 + + Contents when FADump is active during second kernel:: + + # cat /sys/kernel/debug/powerpc/fadump_region + CPU : [0x0000006ffb0000-0x0000006fff001f] 0x40020 bytes, Dumped: 0x40020 + HPTE: [0x0000006fff0020-0x0000006fff101f] 0x1000 bytes, Dumped: 0x1000 + DUMP: [0x0000006fff1020-0x0000007fff101f] 0x10000000 bytes, Dumped: 0x10000000 + : [0x00000010000000-0x0000006ffaffff] 0x5ffb0000 bytes, Dumped: 0x5ffb0000 + + +NOTE: + Please refer to Documentation/filesystems/debugfs.rst on + how to mount the debugfs filesystem. + + +TODO: +----- + - Need to come up with the better approach to find out more + accurate boot memory size that is required for a kernel to + boot successfully when booted with restricted memory. + - The FADump implementation introduces a FADump crash info structure + in the scratch area before the ELF core header. The idea of introducing + this structure is to pass some important crash info data to the second + kernel which will help second kernel to populate ELF core header with + correct data before it gets exported through /proc/vmcore. The current + design implementation does not address a possibility of introducing + additional fields (in future) to this structure without affecting + compatibility. Need to come up with the better approach to address this. + + The possible approaches are: + + 1. Introduce version field for version tracking, bump up the version + whenever a new field is added to the structure in future. The version + field can be used to find out what fields are valid for the current + version of the structure. + 2. Reserve the area of predefined size (say PAGE_SIZE) for this + structure and have unused area as reserved (initialized to zero) + for future field additions. + + The advantage of approach 1 over 2 is we don't need to reserve extra space. + +Author: Mahesh Salgaonkar + +This document is based on the original documentation written for phyp + +assisted dump by Linas Vepstas and Manish Ahuja. diff --git a/Documentation/arch/powerpc/hvcs.rst b/Documentation/arch/powerpc/hvcs.rst new file mode 100644 index 0000000000..6808acde67 --- /dev/null +++ b/Documentation/arch/powerpc/hvcs.rst @@ -0,0 +1,581 @@ +=============================================================== +HVCS IBM "Hypervisor Virtual Console Server" Installation Guide +=============================================================== + +for Linux Kernel 2.6.4+ + +Copyright (C) 2004 IBM Corporation + +.. =========================================================================== +.. NOTE:Eight space tabs are the optimum editor setting for reading this file. +.. =========================================================================== + + +Author(s): Ryan S. Arnold + +Date Created: March, 02, 2004 +Last Changed: August, 24, 2004 + +.. Table of contents: + + 1. Driver Introduction: + 2. System Requirements + 3. Build Options: + 3.1 Built-in: + 3.2 Module: + 4. Installation: + 5. Connection: + 6. Disconnection: + 7. Configuration: + 8. Questions & Answers: + 9. Reporting Bugs: + +1. Driver Introduction: +======================= + +This is the device driver for the IBM Hypervisor Virtual Console Server, +"hvcs". The IBM hvcs provides a tty driver interface to allow Linux user +space applications access to the system consoles of logically partitioned +operating systems (Linux and AIX) running on the same partitioned Power5 +ppc64 system. Physical hardware consoles per partition are not practical +on this hardware so system consoles are accessed by this driver using +firmware interfaces to virtual terminal devices. + +2. System Requirements: +======================= + +This device driver was written using 2.6.4 Linux kernel APIs and will only +build and run on kernels of this version or later. + +This driver was written to operate solely on IBM Power5 ppc64 hardware +though some care was taken to abstract the architecture dependent firmware +calls from the driver code. + +Sysfs must be mounted on the system so that the user can determine which +major and minor numbers are associated with each vty-server. Directions +for sysfs mounting are outside the scope of this document. + +3. Build Options: +================= + +The hvcs driver registers itself as a tty driver. The tty layer +dynamically allocates a block of major and minor numbers in a quantity +requested by the registering driver. The hvcs driver asks the tty layer +for 64 of these major/minor numbers by default to use for hvcs device node +entries. + +If the default number of device entries is adequate then this driver can be +built into the kernel. If not, the default can be over-ridden by inserting +the driver as a module with insmod parameters. + +3.1 Built-in: +------------- + +The following menuconfig example demonstrates selecting to build this +driver into the kernel:: + + Device Drivers ---> + Character devices ---> + <*> IBM Hypervisor Virtual Console Server Support + +Begin the kernel make process. + +3.2 Module: +----------- + +The following menuconfig example demonstrates selecting to build this +driver as a kernel module:: + + Device Drivers ---> + Character devices ---> + IBM Hypervisor Virtual Console Server Support + +The make process will build the following kernel modules: + + - hvcs.ko + - hvcserver.ko + +To insert the module with the default allocation execute the following +commands in the order they appear:: + + insmod hvcserver.ko + insmod hvcs.ko + +The hvcserver module contains architecture specific firmware calls and must +be inserted first, otherwise the hvcs module will not find some of the +symbols it expects. + +To override the default use an insmod parameter as follows (requesting 4 +tty devices as an example):: + + insmod hvcs.ko hvcs_parm_num_devs=4 + +There is a maximum number of dev entries that can be specified on insmod. +We think that 1024 is currently a decent maximum number of server adapters +to allow. This can always be changed by modifying the constant in the +source file before building. + +NOTE: The length of time it takes to insmod the driver seems to be related +to the number of tty interfaces the registering driver requests. + +In order to remove the driver module execute the following command:: + + rmmod hvcs.ko + +The recommended method for installing hvcs as a module is to use depmod to +build a current modules.dep file in /lib/modules/`uname -r` and then +execute:: + + modprobe hvcs hvcs_parm_num_devs=4 + +The modules.dep file indicates that hvcserver.ko needs to be inserted +before hvcs.ko and modprobe uses this file to smartly insert the modules in +the proper order. + +The following modprobe command is used to remove hvcs and hvcserver in the +proper order:: + + modprobe -r hvcs + +4. Installation: +================ + +The tty layer creates sysfs entries which contain the major and minor +numbers allocated for the hvcs driver. The following snippet of "tree" +output of the sysfs directory shows where these numbers are presented:: + + sys/ + |-- *other sysfs base dirs* + | + |-- class + | |-- *other classes of devices* + | | + | `-- tty + | |-- *other tty devices* + | | + | |-- hvcs0 + | | `-- dev + | |-- hvcs1 + | | `-- dev + | |-- hvcs2 + | | `-- dev + | |-- hvcs3 + | | `-- dev + | | + | |-- *other tty devices* + | + |-- *other sysfs base dirs* + +For the above examples the following output is a result of cat'ing the +"dev" entry in the hvcs directory:: + + Pow5:/sys/class/tty/hvcs0/ # cat dev + 254:0 + + Pow5:/sys/class/tty/hvcs1/ # cat dev + 254:1 + + Pow5:/sys/class/tty/hvcs2/ # cat dev + 254:2 + + Pow5:/sys/class/tty/hvcs3/ # cat dev + 254:3 + +The output from reading the "dev" attribute is the char device major and +minor numbers that the tty layer has allocated for this driver's use. Most +systems running hvcs will already have the device entries created or udev +will do it automatically. + +Given the example output above, to manually create a /dev/hvcs* node entry +mknod can be used as follows:: + + mknod /dev/hvcs0 c 254 0 + mknod /dev/hvcs1 c 254 1 + mknod /dev/hvcs2 c 254 2 + mknod /dev/hvcs3 c 254 3 + +Using mknod to manually create the device entries makes these device nodes +persistent. Once created they will exist prior to the driver insmod. + +Attempting to connect an application to /dev/hvcs* prior to insertion of +the hvcs module will result in an error message similar to the following:: + + "/dev/hvcs*: No such device". + +NOTE: Just because there is a device node present doesn't mean that there +is a vty-server device configured for that node. + +5. Connection +============= + +Since this driver controls devices that provide a tty interface a user can +interact with the device node entries using any standard tty-interactive +method (e.g. "cat", "dd", "echo"). The intent of this driver however, is +to provide real time console interaction with a Linux partition's console, +which requires the use of applications that provide bi-directional, +interactive I/O with a tty device. + +Applications (e.g. "minicom" and "screen") that act as terminal emulators +or perform terminal type control sequence conversion on the data being +passed through them are NOT acceptable for providing interactive console +I/O. These programs often emulate antiquated terminal types (vt100 and +ANSI) and expect inbound data to take the form of one of these supported +terminal types but they either do not convert, or do not _adequately_ +convert, outbound data into the terminal type of the terminal which invoked +them (though screen makes an attempt and can apparently be configured with +much termcap wrestling.) + +For this reason kermit and cu are two of the recommended applications for +interacting with a Linux console via an hvcs device. These programs simply +act as a conduit for data transfer to and from the tty device. They do not +require inbound data to take the form of a particular terminal type, nor do +they cook outbound data to a particular terminal type. + +In order to ensure proper functioning of console applications one must make +sure that once connected to a /dev/hvcs console that the console's $TERM +env variable is set to the exact terminal type of the terminal emulator +used to launch the interactive I/O application. If one is using xterm and +kermit to connect to /dev/hvcs0 when the console prompt becomes available +one should "export TERM=xterm" on the console. This tells ncurses +applications that are invoked from the console that they should output +control sequences that xterm can understand. + +As a precautionary measure an hvcs user should always "exit" from their +session before disconnecting an application such as kermit from the device +node. If this is not done, the next user to connect to the console will +continue using the previous user's logged in session which includes +using the $TERM variable that the previous user supplied. + +Hotplug add and remove of vty-server adapters affects which /dev/hvcs* node +is used to connect to each vty-server adapter. In order to determine which +vty-server adapter is associated with which /dev/hvcs* node a special sysfs +attribute has been added to each vty-server sysfs entry. This entry is +called "index" and showing it reveals an integer that refers to the +/dev/hvcs* entry to use to connect to that device. For instance cating the +index attribute of vty-server adapter 30000004 shows the following:: + + Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat index + 2 + +This index of '2' means that in order to connect to vty-server adapter +30000004 the user should interact with /dev/hvcs2. + +It should be noted that due to the system hotplug I/O capabilities of a +system the /dev/hvcs* entry that interacts with a particular vty-server +adapter is not guaranteed to remain the same across system reboots. Look +in the Q & A section for more on this issue. + +6. Disconnection +================ + +As a security feature to prevent the delivery of stale data to an +unintended target the Power5 system firmware disables the fetching of data +and discards that data when a connection between a vty-server and a vty has +been severed. As an example, when a vty-server is immediately disconnected +from a vty following output of data to the vty the vty adapter may not have +enough time between when it received the data interrupt and when the +connection was severed to fetch the data from firmware before the fetch is +disabled by firmware. + +When hvcs is being used to serve consoles this behavior is not a huge issue +because the adapter stays connected for large amounts of time following +almost all data writes. When hvcs is being used as a tty conduit to tunnel +data between two partitions [see Q & A below] this is a huge problem +because the standard Linux behavior when cat'ing or dd'ing data to a device +is to open the tty, send the data, and then close the tty. If this driver +manually terminated vty-server connections on tty close this would close +the vty-server and vty connection before the target vty has had a chance to +fetch the data. + +Additionally, disconnecting a vty-server and vty only on module removal or +adapter removal is impractical because other vty-servers in other +partitions may require the usage of the target vty at any time. + +Due to this behavioral restriction disconnection of vty-servers from the +connected vty is a manual procedure using a write to a sysfs attribute +outlined below, on the other hand the initial vty-server connection to a +vty is established automatically by this driver. Manual vty-server +connection is never required. + +In order to terminate the connection between a vty-server and vty the +"vterm_state" sysfs attribute within each vty-server's sysfs entry is used. +Reading this attribute reveals the current connection state of the +vty-server adapter. A zero means that the vty-server is not connected to a +vty. A one indicates that a connection is active. + +Writing a '0' (zero) to the vterm_state attribute will disconnect the VTERM +connection between the vty-server and target vty ONLY if the vterm_state +previously read '1'. The write directive is ignored if the vterm_state +read '0' or if any value other than '0' was written to the vterm_state +attribute. The following example will show the method used for verifying +the vty-server connection status and disconnecting a vty-server connection:: + + Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat vterm_state + 1 + + Pow5:/sys/bus/vio/drivers/hvcs/30000004 # echo 0 > vterm_state + + Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat vterm_state + 0 + +All vty-server connections are automatically terminated when the device is +hotplug removed and when the module is removed. + +7. Configuration +================ + +Each vty-server has a sysfs entry in the /sys/devices/vio directory, which +is symlinked in several other sysfs tree directories, notably under the +hvcs driver entry, which looks like the following example:: + + Pow5:/sys/bus/vio/drivers/hvcs # ls + . .. 30000003 30000004 rescan + +By design, firmware notifies the hvcs driver of vty-server lifetimes and +partner vty removals but not the addition of partner vtys. Since an HMC +Super Admin can add partner info dynamically we have provided the hvcs +driver sysfs directory with the "rescan" update attribute which will query +firmware and update the partner info for all the vty-servers that this +driver manages. Writing a '1' to the attribute triggers the update. An +explicit example follows: + + Pow5:/sys/bus/vio/drivers/hvcs # echo 1 > rescan + +Reading the attribute will indicate a state of '1' or '0'. A one indicates +that an update is in process. A zero indicates that an update has +completed or was never executed. + +Vty-server entries in this directory are a 32 bit partition unique unit +address that is created by firmware. An example vty-server sysfs entry +looks like the following:: + + Pow5:/sys/bus/vio/drivers/hvcs/30000004 # ls + . current_vty devspec name partner_vtys + .. index partner_clcs vterm_state + +Each entry is provided, by default with a "name" attribute. Reading the +"name" attribute will reveal the device type as shown in the following +example:: + + Pow5:/sys/bus/vio/drivers/hvcs/30000003 # cat name + vty-server + +Each entry is also provided, by default, with a "devspec" attribute which +reveals the full device specification when read, as shown in the following +example:: + + Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat devspec + /vdevice/vty-server@30000004 + +Each vty-server sysfs dir is provided with two read-only attributes that +provide lists of easily parsed partner vty data: "partner_vtys" and +"partner_clcs":: + + Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat partner_vtys + 30000000 + 30000001 + 30000002 + 30000000 + 30000000 + + Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat partner_clcs + U5112.428.103048A-V3-C0 + U5112.428.103048A-V3-C2 + U5112.428.103048A-V3-C3 + U5112.428.103048A-V4-C0 + U5112.428.103048A-V5-C0 + +Reading partner_vtys returns a list of partner vtys. Vty unit address +numbering is only per-partition-unique so entries will frequently repeat. + +Reading partner_clcs returns a list of "converged location codes" which are +composed of a system serial number followed by "-V*", where the '*' is the +target partition number, and "-C*", where the '*' is the slot of the +adapter. The first vty partner corresponds to the first clc item, the +second vty partner to the second clc item, etc. + +A vty-server can only be connected to a single vty at a time. The entry, +"current_vty" prints the clc of the currently selected partner vty when +read. + +The current_vty can be changed by writing a valid partner clc to the entry +as in the following example:: + + Pow5:/sys/bus/vio/drivers/hvcs/30000004 # echo U5112.428.10304 + 8A-V4-C0 > current_vty + +Changing the current_vty when a vty-server is already connected to a vty +does not affect the current connection. The change takes effect when the +currently open connection is freed. + +Information on the "vterm_state" attribute was covered earlier on the +chapter entitled "disconnection". + +8. Questions & Answers: +======================= + +Q: What are the security concerns involving hvcs? + +A: There are three main security concerns: + + 1. The creator of the /dev/hvcs* nodes has the ability to restrict + the access of the device entries to certain users or groups. It + may be best to create a special hvcs group privilege for providing + access to system consoles. + + 2. To provide network security when grabbing the console it is + suggested that the user connect to the console hosting partition + using a secure method, such as SSH or sit at a hardware console. + + 3. Make sure to exit the user session when done with a console or + the next vty-server connection (which may be from another + partition) will experience the previously logged in session. + +--------------------------------------------------------------------------- + +Q: How do I multiplex a console that I grab through hvcs so that other +people can see it: + +A: You can use "screen" to directly connect to the /dev/hvcs* device and +setup a session on your machine with the console group privileges. As +pointed out earlier by default screen doesn't provide the termcap settings +for most terminal emulators to provide adequate character conversion from +term type "screen" to others. This means that curses based programs may +not display properly in screen sessions. + +--------------------------------------------------------------------------- + +Q: Why are the colors all messed up? +Q: Why are the control characters acting strange or not working? +Q: Why is the console output all strange and unintelligible? + +A: Please see the preceding section on "Connection" for a discussion of how +applications can affect the display of character control sequences. +Additionally, just because you logged into the console using and xterm +doesn't mean someone else didn't log into the console with the HMC console +(vt320) before you and leave the session logged in. The best thing to do +is to export TERM to the terminal type of your terminal emulator when you +get the console. Additionally make sure to "exit" the console before you +disconnect from the console. This will ensure that the next user gets +their own TERM type set when they login. + +--------------------------------------------------------------------------- + +Q: When I try to CONNECT kermit to an hvcs device I get: +"Sorry, can't open connection: /dev/hvcs*"What is happening? + +A: Some other Power5 console mechanism has a connection to the vty and +isn't giving it up. You can try to force disconnect the consoles from the +HMC by right clicking on the partition and then selecting "close terminal". +Otherwise you have to hunt down the people who have console authority. It +is possible that you already have the console open using another kermit +session and just forgot about it. Please review the console options for +Power5 systems to determine the many ways a system console can be held. + +OR + +A: Another user may not have a connectivity method currently attached to a +/dev/hvcs device but the vterm_state may reveal that they still have the +vty-server connection established. They need to free this using the method +outlined in the section on "Disconnection" in order for others to connect +to the target vty. + +OR + +A: The user profile you are using to execute kermit probably doesn't have +permissions to use the /dev/hvcs* device. + +OR + +A: You probably haven't inserted the hvcs.ko module yet but the /dev/hvcs* +entry still exists (on systems without udev). + +OR + +A: There is not a corresponding vty-server device that maps to an existing +/dev/hvcs* entry. + +--------------------------------------------------------------------------- + +Q: When I try to CONNECT kermit to an hvcs device I get: +"Sorry, write access to UUCP lockfile directory denied." + +A: The /dev/hvcs* entry you have specified doesn't exist where you said it +does? Maybe you haven't inserted the module (on systems with udev). + +--------------------------------------------------------------------------- + +Q: If I already have one Linux partition installed can I use hvcs on said +partition to provide the console for the install of a second Linux +partition? + +A: Yes granted that your are connected to the /dev/hvcs* device using +kermit or cu or some other program that doesn't provide terminal emulation. + +--------------------------------------------------------------------------- + +Q: Can I connect to more than one partition's console at a time using this +driver? + +A: Yes. Of course this means that there must be more than one vty-server +configured for this partition and each must point to a disconnected vty. + +--------------------------------------------------------------------------- + +Q: Does the hvcs driver support dynamic (hotplug) addition of devices? + +A: Yes, if you have dlpar and hotplug enabled for your system and it has +been built into the kernel the hvcs drivers is configured to dynamically +handle additions of new devices and removals of unused devices. + +--------------------------------------------------------------------------- + +Q: For some reason /dev/hvcs* doesn't map to the same vty-server adapter +after a reboot. What happened? + +A: Assignment of vty-server adapters to /dev/hvcs* entries is always done +in the order that the adapters are exposed. Due to hotplug capabilities of +this driver assignment of hotplug added vty-servers may be in a different +order than how they would be exposed on module load. Rebooting or +reloading the module after dynamic addition may result in the /dev/hvcs* +and vty-server coupling changing if a vty-server adapter was added in a +slot between two other vty-server adapters. Refer to the section above +on how to determine which vty-server goes with which /dev/hvcs* node. +Hint; look at the sysfs "index" attribute for the vty-server. + +--------------------------------------------------------------------------- + +Q: Can I use /dev/hvcs* as a conduit to another partition and use a tty +device on that partition as the other end of the pipe? + +A: Yes, on Power5 platforms the hvc_console driver provides a tty interface +for extra /dev/hvc* devices (where /dev/hvc0 is most likely the console). +In order to get a tty conduit working between the two partitions the HMC +Super Admin must create an additional "serial server" for the target +partition with the HMC gui which will show up as /dev/hvc* when the target +partition is rebooted. + +The HMC Super Admin then creates an additional "serial client" for the +current partition and points this at the target partition's newly created +"serial server" adapter (remember the slot). This shows up as an +additional /dev/hvcs* device. + +Now a program on the target system can be configured to read or write to +/dev/hvc* and another program on the current partition can be configured to +read or write to /dev/hvcs*. Now you have a tty conduit between two +partitions. + +--------------------------------------------------------------------------- + +9. Reporting Bugs: +================== + +The proper channel for reporting bugs is either through the Linux OS +distribution company that provided your OS or by posting issues to the +PowerPC development mailing list at: + +linuxppc-dev@lists.ozlabs.org + +This request is to provide a documented and searchable public exchange +of the problems and solutions surrounding this driver for the benefit of +all users. diff --git a/Documentation/arch/powerpc/imc.rst b/Documentation/arch/powerpc/imc.rst new file mode 100644 index 0000000000..633bcee7dc --- /dev/null +++ b/Documentation/arch/powerpc/imc.rst @@ -0,0 +1,199 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. _imc: + +=================================== +IMC (In-Memory Collection Counters) +=================================== + +Anju T Sudhakar, 10 May 2019 + +.. contents:: + :depth: 3 + + +Basic overview +============== + +IMC (In-Memory collection counters) is a hardware monitoring facility that +collects large numbers of hardware performance events at Nest level (these are +on-chip but off-core), Core level and Thread level. + +The Nest PMU counters are handled by a Nest IMC microcode which runs in the OCC +(On-Chip Controller) complex. The microcode collects the counter data and moves +the nest IMC counter data to memory. + +The Core and Thread IMC PMU counters are handled in the core. Core level PMU +counters give us the IMC counters' data per core and thread level PMU counters +give us the IMC counters' data per CPU thread. + +OPAL obtains the IMC PMU and supported events information from the IMC Catalog +and passes on to the kernel via the device tree. The event's information +contains: + +- Event name +- Event Offset +- Event description + +and possibly also: + +- Event scale +- Event unit + +Some PMUs may have a common scale and unit values for all their supported +events. For those cases, the scale and unit properties for those events must be +inherited from the PMU. + +The event offset in the memory is where the counter data gets accumulated. + +IMC catalog is available at: + https://github.com/open-power/ima-catalog + +The kernel discovers the IMC counters information in the device tree at the +`imc-counters` device node which has a compatible field +`ibm,opal-in-memory-counters`. From the device tree, the kernel parses the PMUs +and their event's information and register the PMU and its attributes in the +kernel. + +IMC example usage +================= + +.. code-block:: sh + + # perf list + [...] + nest_mcs01/PM_MCS01_64B_RD_DISP_PORT01/ [Kernel PMU event] + nest_mcs01/PM_MCS01_64B_RD_DISP_PORT23/ [Kernel PMU event] + [...] + core_imc/CPM_0THRD_NON_IDLE_PCYC/ [Kernel PMU event] + core_imc/CPM_1THRD_NON_IDLE_INST/ [Kernel PMU event] + [...] + thread_imc/CPM_0THRD_NON_IDLE_PCYC/ [Kernel PMU event] + thread_imc/CPM_1THRD_NON_IDLE_INST/ [Kernel PMU event] + +To see per chip data for nest_mcs0/PM_MCS_DOWN_128B_DATA_XFER_MC0/: + +.. code-block:: sh + + # ./perf stat -e "nest_mcs01/PM_MCS01_64B_WR_DISP_PORT01/" -a --per-socket + +To see non-idle instructions for core 0: + +.. code-block:: sh + + # ./perf stat -e "core_imc/CPM_NON_IDLE_INST/" -C 0 -I 1000 + +To see non-idle instructions for a "make": + +.. code-block:: sh + + # ./perf stat -e "thread_imc/CPM_NON_IDLE_PCYC/" make + + +IMC Trace-mode +=============== + +POWER9 supports two modes for IMC which are the Accumulation mode and Trace +mode. In Accumulation mode, event counts are accumulated in system Memory. +Hypervisor then reads the posted counts periodically or when requested. In IMC +Trace mode, the 64 bit trace SCOM value is initialized with the event +information. The CPMCxSEL and CPMC_LOAD in the trace SCOM, specifies the event +to be monitored and the sampling duration. On each overflow in the CPMCxSEL, +hardware snapshots the program counter along with event counts and writes into +memory pointed by LDBAR. + +LDBAR is a 64 bit special purpose per thread register, it has bits to indicate +whether hardware is configured for accumulation or trace mode. + +LDBAR Register Layout +--------------------- + + +-------+----------------------+ + | 0 | Enable/Disable | + +-------+----------------------+ + | 1 | 0: Accumulation Mode | + | +----------------------+ + | | 1: Trace Mode | + +-------+----------------------+ + | 2:3 | Reserved | + +-------+----------------------+ + | 4-6 | PB scope | + +-------+----------------------+ + | 7 | Reserved | + +-------+----------------------+ + | 8:50 | Counter Address | + +-------+----------------------+ + | 51:63 | Reserved | + +-------+----------------------+ + +TRACE_IMC_SCOM bit representation +--------------------------------- + + +-------+------------+ + | 0:1 | SAMPSEL | + +-------+------------+ + | 2:33 | CPMC_LOAD | + +-------+------------+ + | 34:40 | CPMC1SEL | + +-------+------------+ + | 41:47 | CPMC2SEL | + +-------+------------+ + | 48:50 | BUFFERSIZE | + +-------+------------+ + | 51:63 | RESERVED | + +-------+------------+ + +CPMC_LOAD contains the sampling duration. SAMPSEL and CPMCxSEL determines the +event to count. BUFFERSIZE indicates the memory range. On each overflow, +hardware snapshots the program counter along with event counts and updates the +memory and reloads the CMPC_LOAD value for the next sampling duration. IMC +hardware does not support exceptions, so it quietly wraps around if memory +buffer reaches the end. + +*Currently the event monitored for trace-mode is fixed as cycle.* + +Trace IMC example usage +======================= + +.. code-block:: sh + + # perf list + [....] + trace_imc/trace_cycles/ [Kernel PMU event] + +To record an application/process with trace-imc event: + +.. code-block:: sh + + # perf record -e trace_imc/trace_cycles/ yes > /dev/null + [ perf record: Woken up 1 times to write data ] + [ perf record: Captured and wrote 0.012 MB perf.data (21 samples) ] + +The `perf.data` generated, can be read using perf report. + +Benefits of using IMC trace-mode +================================ + +PMI (Performance Monitoring Interrupts) interrupt handling is avoided, since IMC +trace mode snapshots the program counter and updates to the memory. And this +also provide a way for the operating system to do instruction sampling in real +time without PMI processing overhead. + +Performance data using `perf top` with and without trace-imc event. + +PMI interrupts count when `perf top` command is executed without trace-imc event. + +.. code-block:: sh + + # grep PMI /proc/interrupts + PMI: 0 0 0 0 Performance monitoring interrupts + # ./perf top + ... + # grep PMI /proc/interrupts + PMI: 39735 8710 17338 17801 Performance monitoring interrupts + # ./perf top -e trace_imc/trace_cycles/ + ... + # grep PMI /proc/interrupts + PMI: 39735 8710 17338 17801 Performance monitoring interrupts + + +That is, the PMI interrupt counts do not increment when using the `trace_imc` event. diff --git a/Documentation/arch/powerpc/index.rst b/Documentation/arch/powerpc/index.rst new file mode 100644 index 0000000000..9749f6dc25 --- /dev/null +++ b/Documentation/arch/powerpc/index.rst @@ -0,0 +1,49 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======= +powerpc +======= + +.. toctree:: + :maxdepth: 1 + + associativity + booting + bootwrapper + cpu_families + cpu_features + cxl + cxlflash + dawr-power9 + dexcr + dscr + eeh-pci-error-recovery + elf_hwcaps + elfnote + firmware-assisted-dump + hvcs + imc + isa-versions + kaslr-booke32 + mpc52xx + kvm-nested + papr_hcalls + pci_iov_resource_on_powernv + pmu-ebb + ptrace + qe_firmware + syscall64-abi + transactional_memory + ultravisor + vas-api + vcpudispatch_stats + vmemmap_dedup + + features + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/arch/powerpc/isa-versions.rst b/Documentation/arch/powerpc/isa-versions.rst new file mode 100644 index 0000000000..a8d6b6028b --- /dev/null +++ b/Documentation/arch/powerpc/isa-versions.rst @@ -0,0 +1,101 @@ +========================== +CPU to ISA Version Mapping +========================== + +Mapping of some CPU versions to relevant ISA versions. + +Note Power4 and Power4+ are not supported. + +========= ==================================================================== +CPU Architecture version +========= ==================================================================== +Power10 Power ISA v3.1 +Power9 Power ISA v3.0B +Power8 Power ISA v2.07 +e6500 Power ISA v2.06 with some exceptions +e5500 Power ISA v2.06 with some exceptions, no Altivec +Power7 Power ISA v2.06 +Power6 Power ISA v2.05 +PA6T Power ISA v2.04 +Cell PPU - Power ISA v2.02 with some minor exceptions + - Plus Altivec/VMX ~= 2.03 +Power5++ Power ISA v2.04 (no VMX) +Power5+ Power ISA v2.03 +Power5 - PowerPC User Instruction Set Architecture Book I v2.02 + - PowerPC Virtual Environment Architecture Book II v2.02 + - PowerPC Operating Environment Architecture Book III v2.02 +PPC970 - PowerPC User Instruction Set Architecture Book I v2.01 + - PowerPC Virtual Environment Architecture Book II v2.01 + - PowerPC Operating Environment Architecture Book III v2.01 + - Plus Altivec/VMX ~= 2.03 +Power4+ - PowerPC User Instruction Set Architecture Book I v2.01 + - PowerPC Virtual Environment Architecture Book II v2.01 + - PowerPC Operating Environment Architecture Book III v2.01 +Power4 - PowerPC User Instruction Set Architecture Book I v2.00 + - PowerPC Virtual Environment Architecture Book II v2.00 + - PowerPC Operating Environment Architecture Book III v2.00 +========= ==================================================================== + + +Key Features +------------ + +========== ================== +CPU VMX (aka. Altivec) +========== ================== +Power10 Yes +Power9 Yes +Power8 Yes +e6500 Yes +e5500 No +Power7 Yes +Power6 Yes +PA6T Yes +Cell PPU Yes +Power5++ No +Power5+ No +Power5 No +PPC970 Yes +Power4+ No +Power4 No +========== ================== + +========== ==== +CPU VSX +========== ==== +Power10 Yes +Power9 Yes +Power8 Yes +e6500 No +e5500 No +Power7 Yes +Power6 No +PA6T No +Cell PPU No +Power5++ No +Power5+ No +Power5 No +PPC970 No +Power4+ No +Power4 No +========== ==== + +========== ==================================== +CPU Transactional Memory +========== ==================================== +Power10 No (* see Power ISA v3.1, "Appendix A. Notes on the Removal of Transactional Memory from the Architecture") +Power9 Yes (* see transactional_memory.txt) +Power8 Yes +e6500 No +e5500 No +Power7 No +Power6 No +PA6T No +Cell PPU No +Power5++ No +Power5+ No +Power5 No +PPC970 No +Power4+ No +Power4 No +========== ==================================== diff --git a/Documentation/arch/powerpc/kasan.txt b/Documentation/arch/powerpc/kasan.txt new file mode 100644 index 0000000000..a4f647e4ff --- /dev/null +++ b/Documentation/arch/powerpc/kasan.txt @@ -0,0 +1,58 @@ +KASAN is supported on powerpc on 32-bit and Radix 64-bit only. + +32 bit support +============== + +KASAN is supported on both hash and nohash MMUs on 32-bit. + +The shadow area sits at the top of the kernel virtual memory space above the +fixmap area and occupies one eighth of the total kernel virtual memory space. + +Instrumentation of the vmalloc area is optional, unless built with modules, +in which case it is required. + +64 bit support +============== + +Currently, only the radix MMU is supported. There have been versions for hash +and Book3E processors floating around on the mailing list, but nothing has been +merged. + +KASAN support on Book3S is a bit tricky to get right: + + - It would be good to support inline instrumentation so as to be able to catch + stack issues that cannot be caught with outline mode. + + - Inline instrumentation requires a fixed offset. + + - Book3S runs code with translations off ("real mode") during boot, including a + lot of generic device-tree parsing code which is used to determine MMU + features. + + - Some code - most notably a lot of KVM code - also runs with translations off + after boot. + + - Therefore any offset has to point to memory that is valid with + translations on or off. + +One approach is just to give up on inline instrumentation. This way boot-time +checks can be delayed until after the MMU is set is up, and we can just not +instrument any code that runs with translations off after booting. This is the +current approach. + +To avoid this limitation, the KASAN shadow would have to be placed inside the +linear mapping, using the same high-bits trick we use for the rest of the linear +mapping. This is tricky: + + - We'd like to place it near the start of physical memory. In theory we can do + this at run-time based on how much physical memory we have, but this requires + being able to arbitrarily relocate the kernel, which is basically the tricky + part of KASLR. Not being game to implement both tricky things at once, this + is hopefully something we can revisit once we get KASLR for Book3S. + + - Alternatively, we can place the shadow at the _end_ of memory, but this + requires knowing how much contiguous physical memory a system has _at compile + time_. This is a big hammer, and has some unfortunate consequences: inablity + to handle discontiguous physical memory, total failure to boot on machines + with less memory than specified, and that machines with more memory than + specified can't use it. This was deemed unacceptable. diff --git a/Documentation/arch/powerpc/kaslr-booke32.rst b/Documentation/arch/powerpc/kaslr-booke32.rst new file mode 100644 index 0000000000..5681c1d1b6 --- /dev/null +++ b/Documentation/arch/powerpc/kaslr-booke32.rst @@ -0,0 +1,42 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=========================== +KASLR for Freescale BookE32 +=========================== + +The word KASLR stands for Kernel Address Space Layout Randomization. + +This document tries to explain the implementation of the KASLR for +Freescale BookE32. KASLR is a security feature that deters exploit +attempts relying on knowledge of the location of kernel internals. + +Since CONFIG_RELOCATABLE has already supported, what we need to do is +map or copy kernel to a proper place and relocate. Freescale Book-E +parts expect lowmem to be mapped by fixed TLB entries(TLB1). The TLB1 +entries are not suitable to map the kernel directly in a randomized +region, so we chose to copy the kernel to a proper place and restart to +relocate. + +Entropy is derived from the banner and timer base, which will change every +build and boot. This not so much safe so additionally the bootloader may +pass entropy via the /chosen/kaslr-seed node in device tree. + +We will use the first 512M of the low memory to randomize the kernel +image. The memory will be split in 64M zones. We will use the lower 8 +bit of the entropy to decide the index of the 64M zone. Then we chose a +16K aligned offset inside the 64M zone to put the kernel in:: + + KERNELBASE + + |--> 64M <--| + | | + +---------------+ +----------------+---------------+ + | |....| |kernel| | | + +---------------+ +----------------+---------------+ + | | + |-----> offset <-----| + + kernstart_virt_addr + +To enable KASLR, set CONFIG_RANDOMIZE_BASE = y. If KASLR is enabled and you +want to disable it at runtime, add "nokaslr" to the kernel cmdline. diff --git a/Documentation/arch/powerpc/kvm-nested.rst b/Documentation/arch/powerpc/kvm-nested.rst new file mode 100644 index 0000000000..630602a8aa --- /dev/null +++ b/Documentation/arch/powerpc/kvm-nested.rst @@ -0,0 +1,634 @@ +.. SPDX-License-Identifier: GPL-2.0 + +==================================== +Nested KVM on POWER +==================================== + +Introduction +============ + +This document explains how a guest operating system can act as a +hypervisor and run nested guests through the use of hypercalls, if the +hypervisor has implemented them. The terms L0, L1, and L2 are used to +refer to different software entities. L0 is the hypervisor mode entity +that would normally be called the "host" or "hypervisor". L1 is a +guest virtual machine that is directly run under L0 and is initiated +and controlled by L0. L2 is a guest virtual machine that is initiated +and controlled by L1 acting as a hypervisor. + +Existing API +============ + +Linux/KVM has had support for Nesting as an L0 or L1 since 2018 + +The L0 code was added:: + + commit 8e3f5fc1045dc49fd175b978c5457f5f51e7a2ce + Author: Paul Mackerras + Date: Mon Oct 8 16:31:03 2018 +1100 + KVM: PPC: Book3S HV: Framework and hcall stubs for nested virtualization + +The L1 code was added:: + + commit 360cae313702cdd0b90f82c261a8302fecef030a + Author: Paul Mackerras + Date: Mon Oct 8 16:31:04 2018 +1100 + KVM: PPC: Book3S HV: Nested guest entry via hypercall + +This API works primarily using a single hcall h_enter_nested(). This +call made by the L1 to tell the L0 to start an L2 vCPU with the given +state. The L0 then starts this L2 and runs until an L2 exit condition +is reached. Once the L2 exits, the state of the L2 is given back to +the L1 by the L0. The full L2 vCPU state is always transferred from +and to L1 when the L2 is run. The L0 doesn't keep any state on the L2 +vCPU (except in the short sequence in the L0 on L1 -> L2 entry and L2 +-> L1 exit). + +The only state kept by the L0 is the partition table. The L1 registers +it's partition table using the h_set_partition_table() hcall. All +other state held by the L0 about the L2s is cached state (such as +shadow page tables). + +The L1 may run any L2 or vCPU without first informing the L0. It +simply starts the vCPU using h_enter_nested(). The creation of L2s and +vCPUs is done implicitly whenever h_enter_nested() is called. + +In this document, we call this existing API the v1 API. + +New PAPR API +=============== + +The new PAPR API changes from the v1 API such that the creating L2 and +associated vCPUs is explicit. In this document, we call this the v2 +API. + +h_enter_nested() is replaced with H_GUEST_VCPU_RUN(). Before this can +be called the L1 must explicitly create the L2 using h_guest_create() +and any associated vCPUs() created with h_guest_create_vCPU(). Getting +and setting vCPU state can also be performed using h_guest_{g|s}et +hcall. + +The basic execution flow is for an L1 to create an L2, run it, and +delete it is: + +- L1 and L0 negotiate capabilities with H_GUEST_{G,S}ET_CAPABILITIES() + (normally at L1 boot time). + +- L1 requests the L0 create an L2 with H_GUEST_CREATE() and receives a token + +- L1 requests the L0 create an L2 vCPU with H_GUEST_CREATE_VCPU() + +- L1 and L0 communicate the vCPU state using the H_GUEST_{G,S}ET() hcall + +- L1 requests the L0 runs the vCPU running H_GUEST_VCPU_RUN() hcall + +- L1 deletes L2 with H_GUEST_DELETE() + +More details of the individual hcalls follows: + +HCALL Details +============= + +This documentation is provided to give an overall understating of the +API. It doesn't aim to provide all the details required to implement +an L1 or L0. Latest version of PAPR can be referred to for more details. + +All these HCALLs are made by the L1 to the L0. + +H_GUEST_GET_CAPABILITIES() +-------------------------- + +This is called to get the capabilities of the L0 nested +hypervisor. This includes capabilities such the CPU versions (eg +POWER9, POWER10) that are supported as L2s:: + + H_GUEST_GET_CAPABILITIES(uint64 flags) + + Parameters: + Input: + flags: Reserved + Output: + R3: Return code + R4: Hypervisor Supported Capabilities bitmap 1 + +H_GUEST_SET_CAPABILITIES() +-------------------------- + +This is called to inform the L0 of the capabilities of the L1 +hypervisor. The set of flags passed here are the same as +H_GUEST_GET_CAPABILITIES() + +Typically, GET will be called first and then SET will be called with a +subset of the flags returned from GET. This process allows the L0 and +L1 to negotiate an agreed set of capabilities:: + + H_GUEST_SET_CAPABILITIES(uint64 flags, + uint64 capabilitiesBitmap1) + Parameters: + Input: + flags: Reserved + capabilitiesBitmap1: Only capabilities advertised through + H_GUEST_GET_CAPABILITIES + Output: + R3: Return code + R4: If R3 = H_P2: The number of invalid bitmaps + R5: If R3 = H_P2: The index of first invalid bitmap + +H_GUEST_CREATE() +---------------- + +This is called to create an L2. A unique ID of the L2 created +(similar to an LPID) is returned, which can be used on subsequent HCALLs to +identify the L2:: + + H_GUEST_CREATE(uint64 flags, + uint64 continueToken); + Parameters: + Input: + flags: Reserved + continueToken: Initial call set to -1. Subsequent calls, + after H_Busy or H_LongBusyOrder has been + returned, value that was returned in R4. + Output: + R3: Return code. Notable: + H_Not_Enough_Resources: Unable to create Guest VCPU due to not + enough Hypervisor memory. See H_GUEST_CREATE_GET_STATE(flags = + takeOwnershipOfVcpuState) + R4: If R3 = H_Busy or_H_LongBusyOrder -> continueToken + +H_GUEST_CREATE_VCPU() +--------------------- + +This is called to create a vCPU associated with an L2. The L2 id +(returned from H_GUEST_CREATE()) should be passed it. Also passed in +is a unique (for this L2) vCPUid. This vCPUid is allocated by the +L1:: + + H_GUEST_CREATE_VCPU(uint64 flags, + uint64 guestId, + uint64 vcpuId); + Parameters: + Input: + flags: Reserved + guestId: ID obtained from H_GUEST_CREATE + vcpuId: ID of the vCPU to be created. This must be within the + range of 0 to 2047 + Output: + R3: Return code. Notable: + H_Not_Enough_Resources: Unable to create Guest VCPU due to not + enough Hypervisor memory. See H_GUEST_CREATE_GET_STATE(flags = + takeOwnershipOfVcpuState) + +H_GUEST_GET_STATE() +------------------- + +This is called to get state associated with an L2 (Guest-wide or vCPU specific). +This info is passed via the Guest State Buffer (GSB), a standard format as +explained later in this doc, necessary details below: + +This can get either L2 wide or vcpu specific information. Examples of +L2 wide is the timebase offset or process scoped page table +info. Examples of vCPU specific are GPRs or VSRs. A bit in the flags +parameter specifies if this call is L2 wide or vCPU specific and the +IDs in the GSB must match this. + +The L1 provides a pointer to the GSB as a parameter to this call. Also +provided is the L2 and vCPU IDs associated with the state to set. + +The L1 writes only the IDs and sizes in the GSB. L0 writes the +associated values for each ID in the GSB:: + + H_GUEST_GET_STATE(uint64 flags, + uint64 guestId, + uint64 vcpuId, + uint64 dataBuffer, + uint64 dataBufferSizeInBytes); + Parameters: + Input: + flags: + Bit 0: getGuestWideState: Request state of the Guest instead + of an individual VCPU. + Bit 1: takeOwnershipOfVcpuState Indicate the L1 is taking + over ownership of the VCPU state and that the L0 can free + the storage holding the state. The VCPU state will need to + be returned to the Hypervisor via H_GUEST_SET_STATE prior + to H_GUEST_RUN_VCPU being called for this VCPU. The data + returned in the dataBuffer is in a Hypervisor internal + format. + Bits 2-63: Reserved + guestId: ID obtained from H_GUEST_CREATE + vcpuId: ID of the vCPU pass to H_GUEST_CREATE_VCPU + dataBuffer: A L1 real address of the GSB. + If takeOwnershipOfVcpuState, size must be at least the size + returned by ID=0x0001 + dataBufferSizeInBytes: Size of dataBuffer + Output: + R3: Return code + R4: If R3 = H_Invalid_Element_Id: The array index of the bad + element ID. + If R3 = H_Invalid_Element_Size: The array index of the bad + element size. + If R3 = H_Invalid_Element_Value: The array index of the bad + element value. + +H_GUEST_SET_STATE() +------------------- + +This is called to set L2 wide or vCPU specific L2 state. This info is +passed via the Guest State Buffer (GSB), necessary details below: + +This can set either L2 wide or vcpu specific information. Examples of +L2 wide is the timebase offset or process scoped page table +info. Examples of vCPU specific are GPRs or VSRs. A bit in the flags +parameter specifies if this call is L2 wide or vCPU specific and the +IDs in the GSB must match this. + +The L1 provides a pointer to the GSB as a parameter to this call. Also +provided is the L2 and vCPU IDs associated with the state to set. + +The L1 writes all values in the GSB and the L0 only reads the GSB for +this call:: + + H_GUEST_SET_STATE(uint64 flags, + uint64 guestId, + uint64 vcpuId, + uint64 dataBuffer, + uint64 dataBufferSizeInBytes); + Parameters: + Input: + flags: + Bit 0: getGuestWideState: Request state of the Guest instead + of an individual VCPU. + Bit 1: returnOwnershipOfVcpuState Return Guest VCPU state. See + GET_STATE takeOwnershipOfVcpuState + Bits 2-63: Reserved + guestId: ID obtained from H_GUEST_CREATE + vcpuId: ID of the vCPU pass to H_GUEST_CREATE_VCPU + dataBuffer: A L1 real address of the GSB. + If takeOwnershipOfVcpuState, size must be at least the size + returned by ID=0x0001 + dataBufferSizeInBytes: Size of dataBuffer + Output: + R3: Return code + R4: If R3 = H_Invalid_Element_Id: The array index of the bad + element ID. + If R3 = H_Invalid_Element_Size: The array index of the bad + element size. + If R3 = H_Invalid_Element_Value: The array index of the bad + element value. + +H_GUEST_RUN_VCPU() +------------------ + +This is called to run an L2 vCPU. The L2 and vCPU IDs are passed in as +parameters. The vCPU runs with the state set previously using +H_GUEST_SET_STATE(). When the L2 exits, the L1 will resume from this +hcall. + +This hcall also has associated input and output GSBs. Unlike +H_GUEST_{S,G}ET_STATE(), these GSB pointers are not passed in as +parameters to the hcall (This was done in the interest of +performance). The locations of these GSBs must be preregistered using +the H_GUEST_SET_STATE() call with ID 0x0c00 and 0x0c01 (see table +below). + +The input GSB may contain only VCPU specific elements to be set. This +GSB may also contain zero elements (ie 0 in the first 4 bytes of the +GSB) if nothing needs to be set. + +On exit from the hcall, the output buffer is filled with elements +determined by the L0. The reason for the exit is contained in GPR4 (ie +NIP is put in GPR4). The elements returned depend on the exit +type. For example, if the exit reason is the L2 doing a hcall (GPR4 = +0xc00), then GPR3-12 are provided in the output GSB as this is the +state likely needed to service the hcall. If additional state is +needed, H_GUEST_GET_STATE() may be called by the L1. + +To synthesize interrupts in the L2, when calling H_GUEST_RUN_VCPU() +the L1 may set a flag (as a hcall parameter) and the L0 will +synthesize the interrupt in the L2. Alternatively, the L1 may +synthesize the interrupt itself using H_GUEST_SET_STATE() or the +H_GUEST_RUN_VCPU() input GSB to set the state appropriately:: + + H_GUEST_RUN_VCPU(uint64 flags, + uint64 guestId, + uint64 vcpuId, + uint64 dataBuffer, + uint64 dataBufferSizeInBytes); + Parameters: + Input: + flags: + Bit 0: generateExternalInterrupt: Generate an external interrupt + Bit 1: generatePrivilegedDoorbell: Generate a Privileged Doorbell + Bit 2: sendToSystemReset”: Generate a System Reset Interrupt + Bits 3-63: Reserved + guestId: ID obtained from H_GUEST_CREATE + vcpuId: ID of the vCPU pass to H_GUEST_CREATE_VCPU + Output: + R3: Return code + R4: If R3 = H_Success: The reason L1 VCPU exited (ie. NIA) + 0x000: The VCPU stopped running for an unspecified reason. An + example of this is the Hypervisor stopping a VCPU running + due to an outstanding interrupt for the Host Partition. + 0x980: HDEC + 0xC00: HCALL + 0xE00: HDSI + 0xE20: HISI + 0xE40: HEA + 0xF80: HV Fac Unavail + If R3 = H_Invalid_Element_Id, H_Invalid_Element_Size, or + H_Invalid_Element_Value: R4 is offset of the invalid element + in the input buffer. + +H_GUEST_DELETE() +---------------- + +This is called to delete an L2. All associated vCPUs are also +deleted. No specific vCPU delete call is provided. + +A flag may be provided to delete all guests. This is used to reset the +L0 in the case of kdump/kexec:: + + H_GUEST_DELETE(uint64 flags, + uint64 guestId) + Parameters: + Input: + flags: + Bit 0: deleteAllGuests: deletes all guests + Bits 1-63: Reserved + guestId: ID obtained from H_GUEST_CREATE + Output: + R3: Return code + +Guest State Buffer +================== + +The Guest State Buffer (GSB) is the main method of communicating state +about the L2 between the L1 and L0 via H_GUEST_{G,S}ET() and +H_GUEST_VCPU_RUN() calls. + +State may be associated with a whole L2 (eg timebase offset) or a +specific L2 vCPU (eg. GPR state). Only L2 VCPU state maybe be set by +H_GUEST_VCPU_RUN(). + +All data in the GSB is big endian (as is standard in PAPR) + +The Guest state buffer has a header which gives the number of +elements, followed by the GSB elements themselves. + +GSB header: + ++----------+----------+-------------------------------------------+ +| Offset | Size | Purpose | +| Bytes | Bytes | | ++==========+==========+===========================================+ +| 0 | 4 | Number of elements | ++----------+----------+-------------------------------------------+ +| 4 | | Guest state buffer elements | ++----------+----------+-------------------------------------------+ + +GSB element: + ++----------+----------+-------------------------------------------+ +| Offset | Size | Purpose | +| Bytes | Bytes | | ++==========+==========+===========================================+ +| 0 | 2 | ID | ++----------+----------+-------------------------------------------+ +| 2 | 2 | Size of Value | ++----------+----------+-------------------------------------------+ +| 4 | As above | Value | ++----------+----------+-------------------------------------------+ + +The ID in the GSB element specifies what is to be set. This includes +archtected state like GPRs, VSRs, SPRs, plus also some meta data about +the partition like the timebase offset and partition scoped page +table information. + ++--------+-------+----+--------+----------------------------------+ +| ID | Size | RW | Thread | Details | +| | Bytes | | Guest | | +| | | | Scope | | ++========+=======+====+========+==================================+ +| 0x0000 | | RW | TG | NOP element | ++--------+-------+----+--------+----------------------------------+ +| 0x0001 | 0x08 | R | G | Size of L0 vCPU state. See: | +| | | | | H_GUEST_GET_STATE: | +| | | | | flags = takeOwnershipOfVcpuState | ++--------+-------+----+--------+----------------------------------+ +| 0x0002 | 0x08 | R | G | Size Run vCPU out buffer | ++--------+-------+----+--------+----------------------------------+ +| 0x0003 | 0x04 | RW | G | Logical PVR | ++--------+-------+----+--------+----------------------------------+ +| 0x0004 | 0x08 | RW | G | TB Offset (L1 relative) | ++--------+-------+----+--------+----------------------------------+ +| 0x0005 | 0x18 | RW | G |Partition scoped page tbl info: | +| | | | | | +| | | | |- 0x00 Addr part scope table | +| | | | |- 0x08 Num addr bits | +| | | | |- 0x10 Size root dir | ++--------+-------+----+--------+----------------------------------+ +| 0x0006 | 0x10 | RW | G |Process Table Information: | +| | | | | | +| | | | |- 0x0 Addr proc scope table | +| | | | |- 0x8 Table size. | ++--------+-------+----+--------+----------------------------------+ +| 0x0007-| | | | Reserved | +| 0x0BFF | | | | | ++--------+-------+----+--------+----------------------------------+ +| 0x0C00 | 0x10 | RW | T |Run vCPU Input Buffer: | +| | | | | | +| | | | |- 0x0 Addr of buffer | +| | | | |- 0x8 Buffer Size. | ++--------+-------+----+--------+----------------------------------+ +| 0x0C01 | 0x10 | RW | T |Run vCPU Output Buffer: | +| | | | | | +| | | | |- 0x0 Addr of buffer | +| | | | |- 0x8 Buffer Size. | ++--------+-------+----+--------+----------------------------------+ +| 0x0C02 | 0x08 | RW | T | vCPU VPA Address | ++--------+-------+----+--------+----------------------------------+ +| 0x0C03-| | | | Reserved | +| 0x0FFF | | | | | ++--------+-------+----+--------+----------------------------------+ +| 0x1000-| 0x08 | RW | T | GPR 0-31 | +| 0x101F | | | | | ++--------+-------+----+--------+----------------------------------+ +| 0x1020 | 0x08 | T | T | HDEC expiry TB | ++--------+-------+----+--------+----------------------------------+ +| 0x1021 | 0x08 | RW | T | NIA | ++--------+-------+----+--------+----------------------------------+ +| 0x1022 | 0x08 | RW | T | MSR | ++--------+-------+----+--------+----------------------------------+ +| 0x1023 | 0x08 | RW | T | LR | ++--------+-------+----+--------+----------------------------------+ +| 0x1024 | 0x08 | RW | T | XER | ++--------+-------+----+--------+----------------------------------+ +| 0x1025 | 0x08 | RW | T | CTR | ++--------+-------+----+--------+----------------------------------+ +| 0x1026 | 0x08 | RW | T | CFAR | ++--------+-------+----+--------+----------------------------------+ +| 0x1027 | 0x08 | RW | T | SRR0 | ++--------+-------+----+--------+----------------------------------+ +| 0x1028 | 0x08 | RW | T | SRR1 | ++--------+-------+----+--------+----------------------------------+ +| 0x1029 | 0x08 | RW | T | DAR | ++--------+-------+----+--------+----------------------------------+ +| 0x102A | 0x08 | RW | T | DEC expiry TB | ++--------+-------+----+--------+----------------------------------+ +| 0x102B | 0x08 | RW | T | VTB | ++--------+-------+----+--------+----------------------------------+ +| 0x102C | 0x08 | RW | T | LPCR | ++--------+-------+----+--------+----------------------------------+ +| 0x102D | 0x08 | RW | T | HFSCR | ++--------+-------+----+--------+----------------------------------+ +| 0x102E | 0x08 | RW | T | FSCR | ++--------+-------+----+--------+----------------------------------+ +| 0x102F | 0x08 | RW | T | FPSCR | ++--------+-------+----+--------+----------------------------------+ +| 0x1030 | 0x08 | RW | T | DAWR0 | ++--------+-------+----+--------+----------------------------------+ +| 0x1031 | 0x08 | RW | T | DAWR1 | ++--------+-------+----+--------+----------------------------------+ +| 0x1032 | 0x08 | RW | T | CIABR | ++--------+-------+----+--------+----------------------------------+ +| 0x1033 | 0x08 | RW | T | PURR | ++--------+-------+----+--------+----------------------------------+ +| 0x1034 | 0x08 | RW | T | SPURR | ++--------+-------+----+--------+----------------------------------+ +| 0x1035 | 0x08 | RW | T | IC | ++--------+-------+----+--------+----------------------------------+ +| 0x1036-| 0x08 | RW | T | SPRG 0-3 | +| 0x1039 | | | | | ++--------+-------+----+--------+----------------------------------+ +| 0x103A | 0x08 | W | T | PPR | ++--------+-------+----+--------+----------------------------------+ +| 0x103B | 0x08 | RW | T | MMCR 0-3 | +| 0x103E | | | | | ++--------+-------+----+--------+----------------------------------+ +| 0x103F | 0x08 | RW | T | MMCRA | ++--------+-------+----+--------+----------------------------------+ +| 0x1040 | 0x08 | RW | T | SIER | ++--------+-------+----+--------+----------------------------------+ +| 0x1041 | 0x08 | RW | T | SIER 2 | ++--------+-------+----+--------+----------------------------------+ +| 0x1042 | 0x08 | RW | T | SIER 3 | ++--------+-------+----+--------+----------------------------------+ +| 0x1043 | 0x08 | RW | T | BESCR | ++--------+-------+----+--------+----------------------------------+ +| 0x1044 | 0x08 | RW | T | EBBHR | ++--------+-------+----+--------+----------------------------------+ +| 0x1045 | 0x08 | RW | T | EBBRR | ++--------+-------+----+--------+----------------------------------+ +| 0x1046 | 0x08 | RW | T | AMR | ++--------+-------+----+--------+----------------------------------+ +| 0x1047 | 0x08 | RW | T | IAMR | ++--------+-------+----+--------+----------------------------------+ +| 0x1048 | 0x08 | RW | T | AMOR | ++--------+-------+----+--------+----------------------------------+ +| 0x1049 | 0x08 | RW | T | UAMOR | ++--------+-------+----+--------+----------------------------------+ +| 0x104A | 0x08 | RW | T | SDAR | ++--------+-------+----+--------+----------------------------------+ +| 0x104B | 0x08 | RW | T | SIAR | ++--------+-------+----+--------+----------------------------------+ +| 0x104C | 0x08 | RW | T | DSCR | ++--------+-------+----+--------+----------------------------------+ +| 0x104D | 0x08 | RW | T | TAR | ++--------+-------+----+--------+----------------------------------+ +| 0x104E | 0x08 | RW | T | DEXCR | ++--------+-------+----+--------+----------------------------------+ +| 0x104F | 0x08 | RW | T | HDEXCR | ++--------+-------+----+--------+----------------------------------+ +| 0x1050 | 0x08 | RW | T | HASHKEYR | ++--------+-------+----+--------+----------------------------------+ +| 0x1051 | 0x08 | RW | T | HASHPKEYR | ++--------+-------+----+--------+----------------------------------+ +| 0x1052 | 0x08 | RW | T | CTRL | ++--------+-------+----+--------+----------------------------------+ +| 0x1053-| | | | Reserved | +| 0x1FFF | | | | | ++--------+-------+----+--------+----------------------------------+ +| 0x2000 | 0x04 | RW | T | CR | ++--------+-------+----+--------+----------------------------------+ +| 0x2001 | 0x04 | RW | T | PIDR | ++--------+-------+----+--------+----------------------------------+ +| 0x2002 | 0x04 | RW | T | DSISR | ++--------+-------+----+--------+----------------------------------+ +| 0x2003 | 0x04 | RW | T | VSCR | ++--------+-------+----+--------+----------------------------------+ +| 0x2004 | 0x04 | RW | T | VRSAVE | ++--------+-------+----+--------+----------------------------------+ +| 0x2005 | 0x04 | RW | T | DAWRX0 | ++--------+-------+----+--------+----------------------------------+ +| 0x2006 | 0x04 | RW | T | DAWRX1 | ++--------+-------+----+--------+----------------------------------+ +| 0x2007-| 0x04 | RW | T | PMC 1-6 | +| 0x200c | | | | | ++--------+-------+----+--------+----------------------------------+ +| 0x200D | 0x04 | RW | T | WORT | ++--------+-------+----+--------+----------------------------------+ +| 0x200E | 0x04 | RW | T | PSPB | ++--------+-------+----+--------+----------------------------------+ +| 0x200F-| | | | Reserved | +| 0x2FFF | | | | | ++--------+-------+----+--------+----------------------------------+ +| 0x3000-| 0x10 | RW | T | VSR 0-63 | +| 0x303F | | | | | ++--------+-------+----+--------+----------------------------------+ +| 0x3040-| | | | Reserved | +| 0xEFFF | | | | | ++--------+-------+----+--------+----------------------------------+ +| 0xF000 | 0x08 | R | T | HDAR | ++--------+-------+----+--------+----------------------------------+ +| 0xF001 | 0x04 | R | T | HDSISR | ++--------+-------+----+--------+----------------------------------+ +| 0xF002 | 0x04 | R | T | HEIR | ++--------+-------+----+--------+----------------------------------+ +| 0xF003 | 0x08 | R | T | ASDR | ++--------+-------+----+--------+----------------------------------+ + + +Miscellaneous info +================== + +State not in ptregs/hvregs +-------------------------- + +In the v1 API, some state is not in the ptregs/hvstate. This includes +the vector register and some SPRs. For the L1 to set this state for +the L2, the L1 loads up these hardware registers before the +h_enter_nested() call and the L0 ensures they end up as the L2 state +(by not touching them). + +The v2 API removes this and explicitly sets this state via the GSB. + +L1 Implementation details: Caching state +---------------------------------------- + +In the v1 API, all state is sent from the L1 to the L0 and vice versa +on every h_enter_nested() hcall. If the L0 is not currently running +any L2s, the L0 has no state information about them. The only +exception to this is the location of the partition table, registered +via h_set_partition_table(). + +The v2 API changes this so that the L0 retains the L2 state even when +it's vCPUs are no longer running. This means that the L1 only needs to +communicate with the L0 about L2 state when it needs to modify the L2 +state, or when it's value is out of date. This provides an opportunity +for performance optimisation. + +When a vCPU exits from a H_GUEST_RUN_VCPU() call, the L1 internally +marks all L2 state as invalid. This means that if the L1 wants to know +the L2 state (say via a kvm_get_one_reg() call), it needs call +H_GUEST_GET_STATE() to get that state. Once it's read, it's marked as +valid in L1 until the L2 is run again. + +Also, when an L1 modifies L2 vcpu state, it doesn't need to write it +to the L0 until that L2 vcpu runs again. Hence when the L1 updates +state (say via a kvm_set_one_reg() call), it writes to an internal L1 +copy and only flushes this copy to the L0 when the L2 runs again via +the H_GUEST_VCPU_RUN() input buffer. + +This lazy updating of state by the L1 avoids unnecessary +H_GUEST_{G|S}ET_STATE() calls. diff --git a/Documentation/arch/powerpc/mpc52xx.rst b/Documentation/arch/powerpc/mpc52xx.rst new file mode 100644 index 0000000000..5243b1763f --- /dev/null +++ b/Documentation/arch/powerpc/mpc52xx.rst @@ -0,0 +1,43 @@ +============================= +Linux 2.6.x on MPC52xx family +============================= + +For the latest info, go to https://www.246tNt.com/mpc52xx/ + +To compile/use : + + - U-Boot:: + + # tftpboot 200000 uImage + => tftpboot 400000 pRamdisk + => bootm 200000 400000 + + - DBug:: + + # dn -i zImage.initrd.lite5200 + + +Some remarks: + + - The port is named mpc52xxx, and config options are PPC_MPC52xx. The MGT5100 + is not supported, and I'm not sure anyone is interested in working on it + so. I didn't took 5xxx because there's apparently a lot of 5xxx that have + nothing to do with the MPC5200. I also included the 'MPC' for the same + reason. + - Of course, I inspired myself from the 2.4 port. If you think I forgot to + mention you/your company in the copyright of some code, I'll correct it + ASAP. diff --git a/Documentation/arch/powerpc/papr_hcalls.rst b/Documentation/arch/powerpc/papr_hcalls.rst new file mode 100644 index 0000000000..80d2c0aada --- /dev/null +++ b/Documentation/arch/powerpc/papr_hcalls.rst @@ -0,0 +1,302 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=========================== +Hypercall Op-codes (hcalls) +=========================== + +Overview +========= + +Virtualization on 64-bit Power Book3S Platforms is based on the PAPR +specification [1]_ which describes the run-time environment for a guest +operating system and how it should interact with the hypervisor for +privileged operations. Currently there are two PAPR compliant hypervisors: + +- **IBM PowerVM (PHYP)**: IBM's proprietary hypervisor that supports AIX, + IBM-i and Linux as supported guests (termed as Logical Partitions + or LPARS). It supports the full PAPR specification. + +- **Qemu/KVM**: Supports PPC64 linux guests running on a PPC64 linux host. + Though it only implements a subset of PAPR specification called LoPAPR [2]_. + +On PPC64 arch a guest kernel running on top of a PAPR hypervisor is called +a *pSeries guest*. A pseries guest runs in a supervisor mode (HV=0) and must +issue hypercalls to the hypervisor whenever it needs to perform an action +that is hypervisor privileged [3]_ or for other services managed by the +hypervisor. + +Hence a Hypercall (hcall) is essentially a request by the pseries guest +asking hypervisor to perform a privileged operation on behalf of the guest. The +guest issues a with necessary input operands. The hypervisor after performing +the privilege operation returns a status code and output operands back to the +guest. + +HCALL ABI +========= +The ABI specification for a hcall between a pseries guest and PAPR hypervisor +is covered in section 14.5.3 of ref [2]_. Switch to the Hypervisor context is +done via the instruction **HVCS** that expects the Opcode for hcall is set in *r3* +and any in-arguments for the hcall are provided in registers *r4-r12*. If values +have to be passed through a memory buffer, the data stored in that buffer should be +in Big-endian byte order. + +Once control returns back to the guest after hypervisor has serviced the +'HVCS' instruction the return value of the hcall is available in *r3* and any +out values are returned in registers *r4-r12*. Again like in case of in-arguments, +any out values stored in a memory buffer will be in Big-endian byte order. + +Powerpc arch code provides convenient wrappers named **plpar_hcall_xxx** defined +in a arch specific header [4]_ to issue hcalls from the linux kernel +running as pseries guest. + +Register Conventions +==================== + +Any hcall should follow same register convention as described in section 2.2.1.1 +of "64-Bit ELF V2 ABI Specification: Power Architecture"[5]_. Table below +summarizes these conventions: + ++----------+----------+-------------------------------------------+ +| Register |Volatile | Purpose | +| Range |(Y/N) | | ++==========+==========+===========================================+ +| r0 | Y | Optional-usage | ++----------+----------+-------------------------------------------+ +| r1 | N | Stack Pointer | ++----------+----------+-------------------------------------------+ +| r2 | N | TOC | ++----------+----------+-------------------------------------------+ +| r3 | Y | hcall opcode/return value | ++----------+----------+-------------------------------------------+ +| r4-r10 | Y | in and out values | ++----------+----------+-------------------------------------------+ +| r11 | Y | Optional-usage/Environmental pointer | ++----------+----------+-------------------------------------------+ +| r12 | Y | Optional-usage/Function entry address at | +| | | global entry point | ++----------+----------+-------------------------------------------+ +| r13 | N | Thread-Pointer | ++----------+----------+-------------------------------------------+ +| r14-r31 | N | Local Variables | ++----------+----------+-------------------------------------------+ +| LR | Y | Link Register | ++----------+----------+-------------------------------------------+ +| CTR | Y | Loop Counter | ++----------+----------+-------------------------------------------+ +| XER | Y | Fixed-point exception register. | ++----------+----------+-------------------------------------------+ +| CR0-1 | Y | Condition register fields. | ++----------+----------+-------------------------------------------+ +| CR2-4 | N | Condition register fields. | ++----------+----------+-------------------------------------------+ +| CR5-7 | Y | Condition register fields. | ++----------+----------+-------------------------------------------+ +| Others | N | | ++----------+----------+-------------------------------------------+ + +DRC & DRC Indexes +================= +:: + + DR1 Guest + +--+ +------------+ +---------+ + | | <----> | | | User | + +--+ DRC1 | | DRC | Space | + | PAPR | Index +---------+ + DR2 | Hypervisor | | | + +--+ | | <-----> | Kernel | + | | <----> | | Hcall | | + +--+ DRC2 +------------+ +---------+ + +PAPR hypervisor terms shared hardware resources like PCI devices, NVDIMMs etc +available for use by LPARs as Dynamic Resource (DR). When a DR is allocated to +an LPAR, PHYP creates a data-structure called Dynamic Resource Connector (DRC) +to manage LPAR access. An LPAR refers to a DRC via an opaque 32-bit number +called DRC-Index. The DRC-index value is provided to the LPAR via device-tree +where its present as an attribute in the device tree node associated with the +DR. + +HCALL Return-values +=================== + +After servicing the hcall, hypervisor sets the return-value in *r3* indicating +success or failure of the hcall. In case of a failure an error code indicates +the cause for error. These codes are defined and documented in arch specific +header [4]_. + +In some cases a hcall can potentially take a long time and need to be issued +multiple times in order to be completely serviced. These hcalls will usually +accept an opaque value *continue-token* within there argument list and a +return value of *H_CONTINUE* indicates that hypervisor hasn't still finished +servicing the hcall yet. + +To make such hcalls the guest need to set *continue-token == 0* for the +initial call and use the hypervisor returned value of *continue-token* +for each subsequent hcall until hypervisor returns a non *H_CONTINUE* +return value. + +HCALL Op-codes +============== + +Below is a partial list of HCALLs that are supported by PHYP. For the +corresponding opcode values please look into the arch specific header [4]_: + +**H_SCM_READ_METADATA** + +| Input: *drcIndex, offset, buffer-address, numBytesToRead* +| Out: *numBytesRead* +| Return Value: *H_Success, H_Parameter, H_P2, H_P3, H_Hardware* + +Given a DRC Index of an NVDIMM, read N-bytes from the metadata area +associated with it, at a specified offset and copy it to provided buffer. +The metadata area stores configuration information such as label information, +bad-blocks etc. The metadata area is located out-of-band of NVDIMM storage +area hence a separate access semantics is provided. + +**H_SCM_WRITE_METADATA** + +| Input: *drcIndex, offset, data, numBytesToWrite* +| Out: *None* +| Return Value: *H_Success, H_Parameter, H_P2, H_P4, H_Hardware* + +Given a DRC Index of an NVDIMM, write N-bytes to the metadata area +associated with it, at the specified offset and from the provided buffer. + +**H_SCM_BIND_MEM** + +| Input: *drcIndex, startingScmBlockIndex, numScmBlocksToBind,* +| *targetLogicalMemoryAddress, continue-token* +| Out: *continue-token, targetLogicalMemoryAddress, numScmBlocksToBound* +| Return Value: *H_Success, H_Parameter, H_P2, H_P3, H_P4, H_Overlap,* +| *H_Too_Big, H_P5, H_Busy* + +Given a DRC-Index of an NVDIMM, map a continuous SCM blocks range +*(startingScmBlockIndex, startingScmBlockIndex+numScmBlocksToBind)* to the guest +at *targetLogicalMemoryAddress* within guest physical address space. In +case *targetLogicalMemoryAddress == 0xFFFFFFFF_FFFFFFFF* then hypervisor +assigns a target address to the guest. The HCALL can fail if the Guest has +an active PTE entry to the SCM block being bound. + +**H_SCM_UNBIND_MEM** +| Input: drcIndex, startingScmLogicalMemoryAddress, numScmBlocksToUnbind +| Out: numScmBlocksUnbound +| Return Value: *H_Success, H_Parameter, H_P2, H_P3, H_In_Use, H_Overlap,* +| *H_Busy, H_LongBusyOrder1mSec, H_LongBusyOrder10mSec* + +Given a DRC-Index of an NVDimm, unmap *numScmBlocksToUnbind* SCM blocks starting +at *startingScmLogicalMemoryAddress* from guest physical address space. The +HCALL can fail if the Guest has an active PTE entry to the SCM block being +unbound. + +**H_SCM_QUERY_BLOCK_MEM_BINDING** + +| Input: *drcIndex, scmBlockIndex* +| Out: *Guest-Physical-Address* +| Return Value: *H_Success, H_Parameter, H_P2, H_NotFound* + +Given a DRC-Index and an SCM Block index return the guest physical address to +which the SCM block is mapped to. + +**H_SCM_QUERY_LOGICAL_MEM_BINDING** + +| Input: *Guest-Physical-Address* +| Out: *drcIndex, scmBlockIndex* +| Return Value: *H_Success, H_Parameter, H_P2, H_NotFound* + +Given a guest physical address return which DRC Index and SCM block is mapped +to that address. + +**H_SCM_UNBIND_ALL** + +| Input: *scmTargetScope, drcIndex* +| Out: *None* +| Return Value: *H_Success, H_Parameter, H_P2, H_P3, H_In_Use, H_Busy,* +| *H_LongBusyOrder1mSec, H_LongBusyOrder10mSec* + +Depending on the Target scope unmap all SCM blocks belonging to all NVDIMMs +or all SCM blocks belonging to a single NVDIMM identified by its drcIndex +from the LPAR memory. + +**H_SCM_HEALTH** + +| Input: drcIndex +| Out: *health-bitmap (r4), health-bit-valid-bitmap (r5)* +| Return Value: *H_Success, H_Parameter, H_Hardware* + +Given a DRC Index return the info on predictive failure and overall health of +the PMEM device. The asserted bits in the health-bitmap indicate one or more states +(described in table below) of the PMEM device and health-bit-valid-bitmap indicate +which bits in health-bitmap are valid. The bits are reported in +reverse bit ordering for example a value of 0xC400000000000000 +indicates bits 0, 1, and 5 are valid. + +Health Bitmap Flags: + ++------+-----------------------------------------------------------------------+ +| Bit | Definition | ++======+=======================================================================+ +| 00 | PMEM device is unable to persist memory contents. | +| | If the system is powered down, nothing will be saved. | ++------+-----------------------------------------------------------------------+ +| 01 | PMEM device failed to persist memory contents. Either contents were | +| | not saved successfully on power down or were not restored properly on | +| | power up. | ++------+-----------------------------------------------------------------------+ +| 02 | PMEM device contents are persisted from previous IPL. The data from | +| | the last boot were successfully restored. | ++------+-----------------------------------------------------------------------+ +| 03 | PMEM device contents are not persisted from previous IPL. There was no| +| | data to restore from the last boot. | ++------+-----------------------------------------------------------------------+ +| 04 | PMEM device memory life remaining is critically low | ++------+-----------------------------------------------------------------------+ +| 05 | PMEM device will be garded off next IPL due to failure | ++------+-----------------------------------------------------------------------+ +| 06 | PMEM device contents cannot persist due to current platform health | +| | status. A hardware failure may prevent data from being saved or | +| | restored. | ++------+-----------------------------------------------------------------------+ +| 07 | PMEM device is unable to persist memory contents in certain conditions| ++------+-----------------------------------------------------------------------+ +| 08 | PMEM device is encrypted | ++------+-----------------------------------------------------------------------+ +| 09 | PMEM device has successfully completed a requested erase or secure | +| | erase procedure. | ++------+-----------------------------------------------------------------------+ +|10:63 | Reserved / Unused | ++------+-----------------------------------------------------------------------+ + +**H_SCM_PERFORMANCE_STATS** + +| Input: drcIndex, resultBuffer Addr +| Out: None +| Return Value: *H_Success, H_Parameter, H_Unsupported, H_Hardware, H_Authority, H_Privilege* + +Given a DRC Index collect the performance statistics for NVDIMM and copy them +to the resultBuffer. + +**H_SCM_FLUSH** + +| Input: *drcIndex, continue-token* +| Out: *continue-token* +| Return Value: *H_SUCCESS, H_Parameter, H_P2, H_BUSY* + +Given a DRC Index Flush the data to backend NVDIMM device. + +The hcall returns H_BUSY when the flush takes longer time and the hcall needs +to be issued multiple times in order to be completely serviced. The +*continue-token* from the output to be passed in the argument list of +subsequent hcalls to the hypervisor until the hcall is completely serviced +at which point H_SUCCESS or other error is returned by the hypervisor. + +References +========== +.. [1] "Power Architecture Platform Reference" + https://en.wikipedia.org/wiki/Power_Architecture_Platform_Reference +.. [2] "Linux on Power Architecture Platform Reference" + https://members.openpowerfoundation.org/document/dl/469 +.. [3] "Definitions and Notation" Book III-Section 14.5.3 + https://openpowerfoundation.org/?resource_lib=power-isa-version-3-0 +.. [4] arch/powerpc/include/asm/hvcall.h +.. [5] "64-Bit ELF V2 ABI Specification: Power Architecture" + https://openpowerfoundation.org/?resource_lib=64-bit-elf-v2-abi-specification-power-architecture diff --git a/Documentation/arch/powerpc/pci_iov_resource_on_powernv.rst b/Documentation/arch/powerpc/pci_iov_resource_on_powernv.rst new file mode 100644 index 0000000000..f5a5793e16 --- /dev/null +++ b/Documentation/arch/powerpc/pci_iov_resource_on_powernv.rst @@ -0,0 +1,312 @@ +=================================================== +PCI Express I/O Virtualization Resource on Powerenv +=================================================== + +Wei Yang + +Benjamin Herrenschmidt + +Bjorn Helgaas + +26 Aug 2014 + +This document describes the requirement from hardware for PCI MMIO resource +sizing and assignment on PowerKVM and how generic PCI code handles this +requirement. The first two sections describe the concepts of Partitionable +Endpoints and the implementation on P8 (IODA2). The next two sections talks +about considerations on enabling SRIOV on IODA2. + +1. Introduction to Partitionable Endpoints +========================================== + +A Partitionable Endpoint (PE) is a way to group the various resources +associated with a device or a set of devices to provide isolation between +partitions (i.e., filtering of DMA, MSIs etc.) and to provide a mechanism +to freeze a device that is causing errors in order to limit the possibility +of propagation of bad data. + +There is thus, in HW, a table of PE states that contains a pair of "frozen" +state bits (one for MMIO and one for DMA, they get set together but can be +cleared independently) for each PE. + +When a PE is frozen, all stores in any direction are dropped and all loads +return all 1's value. MSIs are also blocked. There's a bit more state that +captures things like the details of the error that caused the freeze etc., but +that's not critical. + +The interesting part is how the various PCIe transactions (MMIO, DMA, ...) +are matched to their corresponding PEs. + +The following section provides a rough description of what we have on P8 +(IODA2). Keep in mind that this is all per PHB (PCI host bridge). Each PHB +is a completely separate HW entity that replicates the entire logic, so has +its own set of PEs, etc. + +2. Implementation of Partitionable Endpoints on P8 (IODA2) +========================================================== + +P8 supports up to 256 Partitionable Endpoints per PHB. + + * Inbound + + For DMA, MSIs and inbound PCIe error messages, we have a table (in + memory but accessed in HW by the chip) that provides a direct + correspondence between a PCIe RID (bus/dev/fn) with a PE number. + We call this the RTT. + + - For DMA we then provide an entire address space for each PE that can + contain two "windows", depending on the value of PCI address bit 59. + Each window can be configured to be remapped via a "TCE table" (IOMMU + translation table), which has various configurable characteristics + not described here. + + - For MSIs, we have two windows in the address space (one at the top of + the 32-bit space and one much higher) which, via a combination of the + address and MSI value, will result in one of the 2048 interrupts per + bridge being triggered. There's a PE# in the interrupt controller + descriptor table as well which is compared with the PE# obtained from + the RTT to "authorize" the device to emit that specific interrupt. + + - Error messages just use the RTT. + + * Outbound. That's where the tricky part is. + + Like other PCI host bridges, the Power8 IODA2 PHB supports "windows" + from the CPU address space to the PCI address space. There is one M32 + window and sixteen M64 windows. They have different characteristics. + First what they have in common: they forward a configurable portion of + the CPU address space to the PCIe bus and must be naturally aligned + power of two in size. The rest is different: + + - The M32 window: + + * Is limited to 4GB in size. + + * Drops the top bits of the address (above the size) and replaces + them with a configurable value. This is typically used to generate + 32-bit PCIe accesses. We configure that window at boot from FW and + don't touch it from Linux; it's usually set to forward a 2GB + portion of address space from the CPU to PCIe + 0x8000_0000..0xffff_ffff. (Note: The top 64KB are actually + reserved for MSIs but this is not a problem at this point; we just + need to ensure Linux doesn't assign anything there, the M32 logic + ignores that however and will forward in that space if we try). + + * It is divided into 256 segments of equal size. A table in the chip + maps each segment to a PE#. That allows portions of the MMIO space + to be assigned to PEs on a segment granularity. For a 2GB window, + the segment granularity is 2GB/256 = 8MB. + + Now, this is the "main" window we use in Linux today (excluding + SR-IOV). We basically use the trick of forcing the bridge MMIO windows + onto a segment alignment/granularity so that the space behind a bridge + can be assigned to a PE. + + Ideally we would like to be able to have individual functions in PEs + but that would mean using a completely different address allocation + scheme where individual function BARs can be "grouped" to fit in one or + more segments. + + - The M64 windows: + + * Must be at least 256MB in size. + + * Do not translate addresses (the address on PCIe is the same as the + address on the PowerBus). There is a way to also set the top 14 + bits which are not conveyed by PowerBus but we don't use this. + + * Can be configured to be segmented. When not segmented, we can + specify the PE# for the entire window. When segmented, a window + has 256 segments; however, there is no table for mapping a segment + to a PE#. The segment number *is* the PE#. + + * Support overlaps. If an address is covered by multiple windows, + there's a defined ordering for which window applies. + + We have code (fairly new compared to the M32 stuff) that exploits that + for large BARs in 64-bit space: + + We configure an M64 window to cover the entire region of address space + that has been assigned by FW for the PHB (about 64GB, ignore the space + for the M32, it comes out of a different "reserve"). We configure it + as segmented. + + Then we do the same thing as with M32, using the bridge alignment + trick, to match to those giant segments. + + Since we cannot remap, we have two additional constraints: + + - We do the PE# allocation *after* the 64-bit space has been assigned + because the addresses we use directly determine the PE#. We then + update the M32 PE# for the devices that use both 32-bit and 64-bit + spaces or assign the remaining PE# to 32-bit only devices. + + - We cannot "group" segments in HW, so if a device ends up using more + than one segment, we end up with more than one PE#. There is a HW + mechanism to make the freeze state cascade to "companion" PEs but + that only works for PCIe error messages (typically used so that if + you freeze a switch, it freezes all its children). So we do it in + SW. We lose a bit of effectiveness of EEH in that case, but that's + the best we found. So when any of the PEs freezes, we freeze the + other ones for that "domain". We thus introduce the concept of + "master PE" which is the one used for DMA, MSIs, etc., and "secondary + PEs" that are used for the remaining M64 segments. + + We would like to investigate using additional M64 windows in "single + PE" mode to overlay over specific BARs to work around some of that, for + example for devices with very large BARs, e.g., GPUs. It would make + sense, but we haven't done it yet. + +3. Considerations for SR-IOV on PowerKVM +======================================== + + * SR-IOV Background + + The PCIe SR-IOV feature allows a single Physical Function (PF) to + support several Virtual Functions (VFs). Registers in the PF's SR-IOV + Capability control the number of VFs and whether they are enabled. + + When VFs are enabled, they appear in Configuration Space like normal + PCI devices, but the BARs in VF config space headers are unusual. For + a non-VF device, software uses BARs in the config space header to + discover the BAR sizes and assign addresses for them. For VF devices, + software uses VF BAR registers in the *PF* SR-IOV Capability to + discover sizes and assign addresses. The BARs in the VF's config space + header are read-only zeros. + + When a VF BAR in the PF SR-IOV Capability is programmed, it sets the + base address for all the corresponding VF(n) BARs. For example, if the + PF SR-IOV Capability is programmed to enable eight VFs, and it has a + 1MB VF BAR0, the address in that VF BAR sets the base of an 8MB region. + This region is divided into eight contiguous 1MB regions, each of which + is a BAR0 for one of the VFs. Note that even though the VF BAR + describes an 8MB region, the alignment requirement is for a single VF, + i.e., 1MB in this example. + + There are several strategies for isolating VFs in PEs: + + - M32 window: There's one M32 window, and it is split into 256 + equally-sized segments. The finest granularity possible is a 256MB + window with 1MB segments. VF BARs that are 1MB or larger could be + mapped to separate PEs in this window. Each segment can be + individually mapped to a PE via the lookup table, so this is quite + flexible, but it works best when all the VF BARs are the same size. If + they are different sizes, the entire window has to be small enough that + the segment size matches the smallest VF BAR, which means larger VF + BARs span several segments. + + - Non-segmented M64 window: A non-segmented M64 window is mapped entirely + to a single PE, so it could only isolate one VF. + + - Single segmented M64 windows: A segmented M64 window could be used just + like the M32 window, but the segments can't be individually mapped to + PEs (the segment number is the PE#), so there isn't as much + flexibility. A VF with multiple BARs would have to be in a "domain" of + multiple PEs, which is not as well isolated as a single PE. + + - Multiple segmented M64 windows: As usual, each window is split into 256 + equally-sized segments, and the segment number is the PE#. But if we + use several M64 windows, they can be set to different base addresses + and different segment sizes. If we have VFs that each have a 1MB BAR + and a 32MB BAR, we could use one M64 window to assign 1MB segments and + another M64 window to assign 32MB segments. + + Finally, the plan to use M64 windows for SR-IOV, which will be described + more in the next two sections. For a given VF BAR, we need to + effectively reserve the entire 256 segments (256 * VF BAR size) and + position the VF BAR to start at the beginning of a free range of + segments/PEs inside that M64 window. + + The goal is of course to be able to give a separate PE for each VF. + + The IODA2 platform has 16 M64 windows, which are used to map MMIO + range to PE#. Each M64 window defines one MMIO range and this range is + divided into 256 segments, with each segment corresponding to one PE. + + We decide to leverage this M64 window to map VFs to individual PEs, since + SR-IOV VF BARs are all the same size. + + But doing so introduces another problem: total_VFs is usually smaller + than the number of M64 window segments, so if we map one VF BAR directly + to one M64 window, some part of the M64 window will map to another + device's MMIO range. + + IODA supports 256 PEs, so segmented windows contain 256 segments, so if + total_VFs is less than 256, we have the situation in Figure 1.0, where + segments [total_VFs, 255] of the M64 window may map to some MMIO range on + other devices:: + + 0 1 total_VFs - 1 + +------+------+- -+------+------+ + | | | ... | | | + +------+------+- -+------+------+ + + VF(n) BAR space + + 0 1 total_VFs - 1 255 + +------+------+- -+------+------+- -+------+------+ + | | | ... | | | ... | | | + +------+------+- -+------+------+- -+------+------+ + + M64 window + + Figure 1.0 Direct map VF(n) BAR space + + Our current solution is to allocate 256 segments even if the VF(n) BAR + space doesn't need that much, as shown in Figure 1.1:: + + 0 1 total_VFs - 1 255 + +------+------+- -+------+------+- -+------+------+ + | | | ... | | | ... | | | + +------+------+- -+------+------+- -+------+------+ + + VF(n) BAR space + extra + + 0 1 total_VFs - 1 255 + +------+------+- -+------+------+- -+------+------+ + | | | ... | | | ... | | | + +------+------+- -+------+------+- -+------+------+ + + M64 window + + Figure 1.1 Map VF(n) BAR space + extra + + Allocating the extra space ensures that the entire M64 window will be + assigned to this one SR-IOV device and none of the space will be + available for other devices. Note that this only expands the space + reserved in software; there are still only total_VFs VFs, and they only + respond to segments [0, total_VFs - 1]. There's nothing in hardware that + responds to segments [total_VFs, 255]. + +4. Implications for the Generic PCI Code +======================================== + +The PCIe SR-IOV spec requires that the base of the VF(n) BAR space be +aligned to the size of an individual VF BAR. + +In IODA2, the MMIO address determines the PE#. If the address is in an M32 +window, we can set the PE# by updating the table that translates segments +to PE#s. Similarly, if the address is in an unsegmented M64 window, we can +set the PE# for the window. But if it's in a segmented M64 window, the +segment number is the PE#. + +Therefore, the only way to control the PE# for a VF is to change the base +of the VF(n) BAR space in the VF BAR. If the PCI core allocates the exact +amount of space required for the VF(n) BAR space, the VF BAR value is fixed +and cannot be changed. + +On the other hand, if the PCI core allocates additional space, the VF BAR +value can be changed as long as the entire VF(n) BAR space remains inside +the space allocated by the core. + +Ideally the segment size will be the same as an individual VF BAR size. +Then each VF will be in its own PE. The VF BARs (and therefore the PE#s) +are contiguous. If VF0 is in PE(x), then VF(n) is in PE(x+n). If we +allocate 256 segments, there are (256 - numVFs) choices for the PE# of VF0. + +If the segment size is smaller than the VF BAR size, it will take several +segments to cover a VF BAR, and a VF will be in several PEs. This is +possible, but the isolation isn't as good, and it reduces the number of PE# +choices because instead of consuming only numVFs segments, the VF(n) BAR +space will consume (numVFs * n) segments. That means there aren't as many +available segments for adjusting base of the VF(n) BAR space. diff --git a/Documentation/arch/powerpc/pmu-ebb.rst b/Documentation/arch/powerpc/pmu-ebb.rst new file mode 100644 index 0000000000..4f474758eb --- /dev/null +++ b/Documentation/arch/powerpc/pmu-ebb.rst @@ -0,0 +1,138 @@ +======================== +PMU Event Based Branches +======================== + +Event Based Branches (EBBs) are a feature which allows the hardware to +branch directly to a specified user space address when certain events occur. + +The full specification is available in Power ISA v2.07: + + https://www.power.org/documentation/power-isa-version-2-07/ + +One type of event for which EBBs can be configured is PMU exceptions. This +document describes the API for configuring the Power PMU to generate EBBs, +using the Linux perf_events API. + + +Terminology +----------- + +Throughout this document we will refer to an "EBB event" or "EBB events". This +just refers to a struct perf_event which has set the "EBB" flag in its +attr.config. All events which can be configured on the hardware PMU are +possible "EBB events". + + +Background +---------- + +When a PMU EBB occurs it is delivered to the currently running process. As such +EBBs can only sensibly be used by programs for self-monitoring. + +It is a feature of the perf_events API that events can be created on other +processes, subject to standard permission checks. This is also true of EBB +events, however unless the target process enables EBBs (via mtspr(BESCR)) no +EBBs will ever be delivered. + +This makes it possible for a process to enable EBBs for itself, but not +actually configure any events. At a later time another process can come along +and attach an EBB event to the process, which will then cause EBBs to be +delivered to the first process. It's not clear if this is actually useful. + + +When the PMU is configured for EBBs, all PMU interrupts are delivered to the +user process. This means once an EBB event is scheduled on the PMU, no non-EBB +events can be configured. This means that EBB events can not be run +concurrently with regular 'perf' commands, or any other perf events. + +It is however safe to run 'perf' commands on a process which is using EBBs. The +kernel will in general schedule the EBB event, and perf will be notified that +its events could not run. + +The exclusion between EBB events and regular events is implemented using the +existing "pinned" and "exclusive" attributes of perf_events. This means EBB +events will be given priority over other events, unless they are also pinned. +If an EBB event and a regular event are both pinned, then whichever is enabled +first will be scheduled and the other will be put in error state. See the +section below titled "Enabling an EBB event" for more information. + + +Creating an EBB event +--------------------- + +To request that an event is counted using EBB, the event code should have bit +63 set. + +EBB events must be created with a particular, and restrictive, set of +attributes - this is so that they interoperate correctly with the rest of the +perf_events subsystem. + +An EBB event must be created with the "pinned" and "exclusive" attributes set. +Note that if you are creating a group of EBB events, only the leader can have +these attributes set. + +An EBB event must NOT set any of the "inherit", "sample_period", "freq" or +"enable_on_exec" attributes. + +An EBB event must be attached to a task. This is specified to perf_event_open() +by passing a pid value, typically 0 indicating the current task. + +All events in a group must agree on whether they want EBB. That is all events +must request EBB, or none may request EBB. + +EBB events must specify the PMC they are to be counted on. This ensures +userspace is able to reliably determine which PMC the event is scheduled on. + + +Enabling an EBB event +--------------------- + +Once an EBB event has been successfully opened, it must be enabled with the +perf_events API. This can be achieved either via the ioctl() interface, or the +prctl() interface. + +However, due to the design of the perf_events API, enabling an event does not +guarantee that it has been scheduled on the PMU. To ensure that the EBB event +has been scheduled on the PMU, you must perform a read() on the event. If the +read() returns EOF, then the event has not been scheduled and EBBs are not +enabled. + +This behaviour occurs because the EBB event is pinned and exclusive. When the +EBB event is enabled it will force all other non-pinned events off the PMU. In +this case the enable will be successful. However if there is already an event +pinned on the PMU then the enable will not be successful. + + +Reading an EBB event +-------------------- + +It is possible to read() from an EBB event. However the results are +meaningless. Because interrupts are being delivered to the user process the +kernel is not able to count the event, and so will return a junk value. + + +Closing an EBB event +-------------------- + +When an EBB event is finished with, you can close it using close() as for any +regular event. If this is the last EBB event the PMU will be deconfigured and +no further PMU EBBs will be delivered. + + +EBB Handler +----------- + +The EBB handler is just regular userspace code, however it must be written in +the style of an interrupt handler. When the handler is entered all registers +are live (possibly) and so must be saved somehow before the handler can invoke +other code. + +It's up to the program how to handle this. For C programs a relatively simple +option is to create an interrupt frame on the stack and save registers there. + +Fork +---- + +EBB events are not inherited across fork. If the child process wishes to use +EBBs it should open a new event for itself. Similarly the EBB state in +BESCR/EBBHR/EBBRR is cleared across fork(). diff --git a/Documentation/arch/powerpc/ptrace.rst b/Documentation/arch/powerpc/ptrace.rst new file mode 100644 index 0000000000..5629edf4d5 --- /dev/null +++ b/Documentation/arch/powerpc/ptrace.rst @@ -0,0 +1,157 @@ +====== +Ptrace +====== + +GDB intends to support the following hardware debug features of BookE +processors: + +4 hardware breakpoints (IAC) +2 hardware watchpoints (read, write and read-write) (DAC) +2 value conditions for the hardware watchpoints (DVC) + +For that, we need to extend ptrace so that GDB can query and set these +resources. Since we're extending, we're trying to create an interface +that's extendable and that covers both BookE and server processors, so +that GDB doesn't need to special-case each of them. We added the +following 3 new ptrace requests. + +1. PPC_PTRACE_GETHWDBGINFO +============================ + +Query for GDB to discover the hardware debug features. The main info to +be returned here is the minimum alignment for the hardware watchpoints. +BookE processors don't have restrictions here, but server processors have +an 8-byte alignment restriction for hardware watchpoints. We'd like to avoid +adding special cases to GDB based on what it sees in AUXV. + +Since we're at it, we added other useful info that the kernel can return to +GDB: this query will return the number of hardware breakpoints, hardware +watchpoints and whether it supports a range of addresses and a condition. +The query will fill the following structure provided by the requesting process:: + + struct ppc_debug_info { + unit32_t version; + unit32_t num_instruction_bps; + unit32_t num_data_bps; + unit32_t num_condition_regs; + unit32_t data_bp_alignment; + unit32_t sizeof_condition; /* size of the DVC register */ + uint64_t features; /* bitmask of the individual flags */ + }; + +features will have bits indicating whether there is support for:: + + #define PPC_DEBUG_FEATURE_INSN_BP_RANGE 0x1 + #define PPC_DEBUG_FEATURE_INSN_BP_MASK 0x2 + #define PPC_DEBUG_FEATURE_DATA_BP_RANGE 0x4 + #define PPC_DEBUG_FEATURE_DATA_BP_MASK 0x8 + #define PPC_DEBUG_FEATURE_DATA_BP_DAWR 0x10 + #define PPC_DEBUG_FEATURE_DATA_BP_ARCH_31 0x20 + +2. PPC_PTRACE_SETHWDEBUG + +Sets a hardware breakpoint or watchpoint, according to the provided structure:: + + struct ppc_hw_breakpoint { + uint32_t version; + #define PPC_BREAKPOINT_TRIGGER_EXECUTE 0x1 + #define PPC_BREAKPOINT_TRIGGER_READ 0x2 + #define PPC_BREAKPOINT_TRIGGER_WRITE 0x4 + uint32_t trigger_type; /* only some combinations allowed */ + #define PPC_BREAKPOINT_MODE_EXACT 0x0 + #define PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE 0x1 + #define PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE 0x2 + #define PPC_BREAKPOINT_MODE_MASK 0x3 + uint32_t addr_mode; /* address match mode */ + + #define PPC_BREAKPOINT_CONDITION_MODE 0x3 + #define PPC_BREAKPOINT_CONDITION_NONE 0x0 + #define PPC_BREAKPOINT_CONDITION_AND 0x1 + #define PPC_BREAKPOINT_CONDITION_EXACT 0x1 /* different name for the same thing as above */ + #define PPC_BREAKPOINT_CONDITION_OR 0x2 + #define PPC_BREAKPOINT_CONDITION_AND_OR 0x3 + #define PPC_BREAKPOINT_CONDITION_BE_ALL 0x00ff0000 /* byte enable bits */ + #define PPC_BREAKPOINT_CONDITION_BE(n) (1<<((n)+16)) + uint32_t condition_mode; /* break/watchpoint condition flags */ + + uint64_t addr; + uint64_t addr2; + uint64_t condition_value; + }; + +A request specifies one event, not necessarily just one register to be set. +For instance, if the request is for a watchpoint with a condition, both the +DAC and DVC registers will be set in the same request. + +With this GDB can ask for all kinds of hardware breakpoints and watchpoints +that the BookE supports. COMEFROM breakpoints available in server processors +are not contemplated, but that is out of the scope of this work. + +ptrace will return an integer (handle) uniquely identifying the breakpoint or +watchpoint just created. This integer will be used in the PPC_PTRACE_DELHWDEBUG +request to ask for its removal. Return -ENOSPC if the requested breakpoint +can't be allocated on the registers. + +Some examples of using the structure to: + +- set a breakpoint in the first breakpoint register:: + + p.version = PPC_DEBUG_CURRENT_VERSION; + p.trigger_type = PPC_BREAKPOINT_TRIGGER_EXECUTE; + p.addr_mode = PPC_BREAKPOINT_MODE_EXACT; + p.condition_mode = PPC_BREAKPOINT_CONDITION_NONE; + p.addr = (uint64_t) address; + p.addr2 = 0; + p.condition_value = 0; + +- set a watchpoint which triggers on reads in the second watchpoint register:: + + p.version = PPC_DEBUG_CURRENT_VERSION; + p.trigger_type = PPC_BREAKPOINT_TRIGGER_READ; + p.addr_mode = PPC_BREAKPOINT_MODE_EXACT; + p.condition_mode = PPC_BREAKPOINT_CONDITION_NONE; + p.addr = (uint64_t) address; + p.addr2 = 0; + p.condition_value = 0; + +- set a watchpoint which triggers only with a specific value:: + + p.version = PPC_DEBUG_CURRENT_VERSION; + p.trigger_type = PPC_BREAKPOINT_TRIGGER_READ; + p.addr_mode = PPC_BREAKPOINT_MODE_EXACT; + p.condition_mode = PPC_BREAKPOINT_CONDITION_AND | PPC_BREAKPOINT_CONDITION_BE_ALL; + p.addr = (uint64_t) address; + p.addr2 = 0; + p.condition_value = (uint64_t) condition; + +- set a ranged hardware breakpoint:: + + p.version = PPC_DEBUG_CURRENT_VERSION; + p.trigger_type = PPC_BREAKPOINT_TRIGGER_EXECUTE; + p.addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE; + p.condition_mode = PPC_BREAKPOINT_CONDITION_NONE; + p.addr = (uint64_t) begin_range; + p.addr2 = (uint64_t) end_range; + p.condition_value = 0; + +- set a watchpoint in server processors (BookS):: + + p.version = 1; + p.trigger_type = PPC_BREAKPOINT_TRIGGER_RW; + p.addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE; + or + p.addr_mode = PPC_BREAKPOINT_MODE_EXACT; + + p.condition_mode = PPC_BREAKPOINT_CONDITION_NONE; + p.addr = (uint64_t) begin_range; + /* For PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE addr2 needs to be specified, where + * addr2 - addr <= 8 Bytes. + */ + p.addr2 = (uint64_t) end_range; + p.condition_value = 0; + +3. PPC_PTRACE_DELHWDEBUG + +Takes an integer which identifies an existing breakpoint or watchpoint +(i.e., the value returned from PTRACE_SETHWDEBUG), and deletes the +corresponding breakpoint or watchpoint.. diff --git a/Documentation/arch/powerpc/qe_firmware.rst b/Documentation/arch/powerpc/qe_firmware.rst new file mode 100644 index 0000000000..a358f152b7 --- /dev/null +++ b/Documentation/arch/powerpc/qe_firmware.rst @@ -0,0 +1,296 @@ +========================================= +Freescale QUICC Engine Firmware Uploading +========================================= + +(c) 2007 Timur Tabi , + Freescale Semiconductor + +.. Table of Contents + + I - Software License for Firmware + + II - Microcode Availability + + III - Description and Terminology + + IV - Microcode Programming Details + + V - Firmware Structure Layout + + VI - Sample Code for Creating Firmware Files + +Revision Information +==================== + +November 30, 2007: Rev 1.0 - Initial version + +I - Software License for Firmware +================================= + +Each firmware file comes with its own software license. For information on +the particular license, please see the license text that is distributed with +the firmware. + +II - Microcode Availability +=========================== + +Firmware files are distributed through various channels. Some are available on +http://opensource.freescale.com. For other firmware files, please contact +your Freescale representative or your operating system vendor. + +III - Description and Terminology +================================= + +In this document, the term 'microcode' refers to the sequence of 32-bit +integers that compose the actual QE microcode. + +The term 'firmware' refers to a binary blob that contains the microcode as +well as other data that + + 1) describes the microcode's purpose + 2) describes how and where to upload the microcode + 3) specifies the values of various registers + 4) includes additional data for use by specific device drivers + +Firmware files are binary files that contain only a firmware. + +IV - Microcode Programming Details +=================================== + +The QE architecture allows for only one microcode present in I-RAM for each +RISC processor. To replace any current microcode, a full QE reset (which +disables the microcode) must be performed first. + +QE microcode is uploaded using the following procedure: + +1) The microcode is placed into I-RAM at a specific location, using the + IRAM.IADD and IRAM.IDATA registers. + +2) The CERCR.CIR bit is set to 0 or 1, depending on whether the firmware + needs split I-RAM. Split I-RAM is only meaningful for SOCs that have + QEs with multiple RISC processors, such as the 8360. Splitting the I-RAM + allows each processor to run a different microcode, effectively creating an + asymmetric multiprocessing (AMP) system. + +3) The TIBCR trap registers are loaded with the addresses of the trap handlers + in the microcode. + +4) The RSP.ECCR register is programmed with the value provided. + +5) If necessary, device drivers that need the virtual traps and extended mode + data will use them. + +Virtual Microcode Traps + +These virtual traps are conditional branches in the microcode. These are +"soft" provisional introduced in the ROMcode in order to enable higher +flexibility and save h/w traps If new features are activated or an issue is +being fixed in the RAM package utilizing they should be activated. This data +structure signals the microcode which of these virtual traps is active. + +This structure contains 6 words that the application should copy to some +specific been defined. This table describes the structure:: + + --------------------------------------------------------------- + | Offset in | | Destination Offset | Size of | + | array | Protocol | within PRAM | Operand | + --------------------------------------------------------------| + | 0 | Ethernet | 0xF8 | 4 bytes | + | | interworking | | | + --------------------------------------------------------------- + | 4 | ATM | 0xF8 | 4 bytes | + | | interworking | | | + --------------------------------------------------------------- + | 8 | PPP | 0xF8 | 4 bytes | + | | interworking | | | + --------------------------------------------------------------- + | 12 | Ethernet RX | 0x22 | 1 byte | + | | Distributor Page | | | + --------------------------------------------------------------- + | 16 | ATM Globtal | 0x28 | 1 byte | + | | Params Table | | | + --------------------------------------------------------------- + | 20 | Insert Frame | 0xF8 | 4 bytes | + --------------------------------------------------------------- + + +Extended Modes + +This is a double word bit array (64 bits) that defines special functionality +which has an impact on the software drivers. Each bit has its own impact +and has special instructions for the s/w associated with it. This structure is +described in this table:: + + ----------------------------------------------------------------------- + | Bit # | Name | Description | + ----------------------------------------------------------------------- + | 0 | General | Indicates that prior to each host command | + | | push command | given by the application, the software must | + | | | assert a special host command (push command)| + | | | CECDR = 0x00800000. | + | | | CECR = 0x01c1000f. | + ----------------------------------------------------------------------- + | 1 | UCC ATM | Indicates that after issuing ATM RX INIT | + | | RX INIT | command, the host must issue another special| + | | push command | command (push command) and immediately | + | | | following that re-issue the ATM RX INIT | + | | | command. (This makes the sequence of | + | | | initializing the ATM receiver a sequence of | + | | | three host commands) | + | | | CECDR = 0x00800000. | + | | | CECR = 0x01c1000f. | + ----------------------------------------------------------------------- + | 2 | Add/remove | Indicates that following the specific host | + | | command | command: "Add/Remove entry in Hash Lookup | + | | validation | Table" used in Interworking setup, the user | + | | | must issue another command. | + | | | CECDR = 0xce000003. | + | | | CECR = 0x01c10f58. | + ----------------------------------------------------------------------- + | 3 | General push | Indicates that the s/w has to initialize | + | | command | some pointers in the Ethernet thread pages | + | | | which are used when Header Compression is | + | | | activated. The full details of these | + | | | pointers is located in the software drivers.| + ----------------------------------------------------------------------- + | 4 | General push | Indicates that after issuing Ethernet TX | + | | command | INIT command, user must issue this command | + | | | for each SNUM of Ethernet TX thread. | + | | | CECDR = 0x00800003. | + | | | CECR = 0x7'b{0}, 8'b{Enet TX thread SNUM}, | + | | | 1'b{1}, 12'b{0}, 4'b{1} | + ----------------------------------------------------------------------- + | 5 - 31 | N/A | Reserved, set to zero. | + ----------------------------------------------------------------------- + +V - Firmware Structure Layout +============================== + +QE microcode from Freescale is typically provided as a header file. This +header file contains macros that define the microcode binary itself as well as +some other data used in uploading that microcode. The format of these files +do not lend themselves to simple inclusion into other code. Hence, +the need for a more portable format. This section defines that format. + +Instead of distributing a header file, the microcode and related data are +embedded into a binary blob. This blob is passed to the qe_upload_firmware() +function, which parses the blob and performs everything necessary to upload +the microcode. + +All integers are big-endian. See the comments for function +qe_upload_firmware() for up-to-date implementation information. + +This structure supports versioning, where the version of the structure is +embedded into the structure itself. To ensure forward and backwards +compatibility, all versions of the structure must use the same 'qe_header' +structure at the beginning. + +'header' (type: struct qe_header): + The 'length' field is the size, in bytes, of the entire structure, + including all the microcode embedded in it, as well as the CRC (if + present). + + The 'magic' field is an array of three bytes that contains the letters + 'Q', 'E', and 'F'. This is an identifier that indicates that this + structure is a QE Firmware structure. + + The 'version' field is a single byte that indicates the version of this + structure. If the layout of the structure should ever need to be + changed to add support for additional types of microcode, then the + version number should also be changed. + +The 'id' field is a null-terminated string(suitable for printing) that +identifies the firmware. + +The 'count' field indicates the number of 'microcode' structures. There +must be one and only one 'microcode' structure for each RISC processor. +Therefore, this field also represents the number of RISC processors for this +SOC. + +The 'soc' structure contains the SOC numbers and revisions used to match +the microcode to the SOC itself. Normally, the microcode loader should +check the data in this structure with the SOC number and revisions, and +only upload the microcode if there's a match. However, this check is not +made on all platforms. + +Although it is not recommended, you can specify '0' in the soc.model +field to skip matching SOCs altogether. + +The 'model' field is a 16-bit number that matches the actual SOC. The +'major' and 'minor' fields are the major and minor revision numbers, +respectively, of the SOC. + +For example, to match the 8323, revision 1.0:: + + soc.model = 8323 + soc.major = 1 + soc.minor = 0 + +'padding' is necessary for structure alignment. This field ensures that the +'extended_modes' field is aligned on a 64-bit boundary. + +'extended_modes' is a bitfield that defines special functionality which has an +impact on the device drivers. Each bit has its own impact and has special +instructions for the driver associated with it. This field is stored in +the QE library and available to any driver that calls qe_get_firmware_info(). + +'vtraps' is an array of 8 words that contain virtual trap values for each +virtual traps. As with 'extended_modes', this field is stored in the QE +library and available to any driver that calls qe_get_firmware_info(). + +'microcode' (type: struct qe_microcode): + For each RISC processor there is one 'microcode' structure. The first + 'microcode' structure is for the first RISC, and so on. + + The 'id' field is a null-terminated string suitable for printing that + identifies this particular microcode. + + 'traps' is an array of 16 words that contain hardware trap values + for each of the 16 traps. If trap[i] is 0, then this particular + trap is to be ignored (i.e. not written to TIBCR[i]). The entire value + is written as-is to the TIBCR[i] register, so be sure to set the EN + and T_IBP bits if necessary. + + 'eccr' is the value to program into the ECCR register. + + 'iram_offset' is the offset into IRAM to start writing the + microcode. + + 'count' is the number of 32-bit words in the microcode. + + 'code_offset' is the offset, in bytes, from the beginning of this + structure where the microcode itself can be found. The first + microcode binary should be located immediately after the 'microcode' + array. + + 'major', 'minor', and 'revision' are the major, minor, and revision + version numbers, respectively, of the microcode. If all values are 0, + then these fields are ignored. + + 'reserved' is necessary for structure alignment. Since 'microcode' + is an array, the 64-bit 'extended_modes' field needs to be aligned + on a 64-bit boundary, and this can only happen if the size of + 'microcode' is a multiple of 8 bytes. To ensure that, we add + 'reserved'. + +After the last microcode is a 32-bit CRC. It can be calculated using +this algorithm:: + + u32 crc32(const u8 *p, unsigned int len) + { + unsigned int i; + u32 crc = 0; + + while (len--) { + crc ^= *p++; + for (i = 0; i < 8; i++) + crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0); + } + return crc; + } + +VI - Sample Code for Creating Firmware Files +============================================ + +A Python program that creates firmware binaries from the header files normally +distributed by Freescale can be found on http://opensource.freescale.com. diff --git a/Documentation/arch/powerpc/syscall64-abi.rst b/Documentation/arch/powerpc/syscall64-abi.rst new file mode 100644 index 0000000000..56490c4c0c --- /dev/null +++ b/Documentation/arch/powerpc/syscall64-abi.rst @@ -0,0 +1,153 @@ +=============================================== +Power Architecture 64-bit Linux system call ABI +=============================================== + +syscall +======= + +Invocation +---------- +The syscall is made with the sc instruction, and returns with execution +continuing at the instruction following the sc instruction. + +If PPC_FEATURE2_SCV appears in the AT_HWCAP2 ELF auxiliary vector, the +scv 0 instruction is an alternative that may provide better performance, +with some differences to calling sequence. + +syscall calling sequence\ [1]_ matches the Power Architecture 64-bit ELF ABI +specification C function calling sequence, including register preservation +rules, with the following differences. + +.. [1] Some syscalls (typically low-level management functions) may have + different calling sequences (e.g., rt_sigreturn). + +Parameters +---------- +The system call number is specified in r0. + +There is a maximum of 6 integer parameters to a syscall, passed in r3-r8. + +Return value +------------ +- For the sc instruction, both a value and an error condition are returned. + cr0.SO is the error condition, and r3 is the return value. When cr0.SO is + clear, the syscall succeeded and r3 is the return value. When cr0.SO is set, + the syscall failed and r3 is the error value (that normally corresponds to + errno). + +- For the scv 0 instruction, the return value indicates failure if it is + -4095..-1 (i.e., it is >= -MAX_ERRNO (-4095) as an unsigned comparison), + in which case the error value is the negated return value. + +Stack +----- +System calls do not modify the caller's stack frame. For example, the caller's +stack frame LR and CR save fields are not used. + +Register preservation rules +--------------------------- +Register preservation rules match the ELF ABI calling sequence with some +differences. + +For the sc instruction, the differences from the ELF ABI are as follows: + ++--------------+--------------------+-----------------------------------------+ +| Register | Preservation Rules | Purpose | ++==============+====================+=========================================+ +| r0 | Volatile | (System call number.) | ++--------------+--------------------+-----------------------------------------+ +| r3 | Volatile | (Parameter 1, and return value.) | ++--------------+--------------------+-----------------------------------------+ +| r4-r8 | Volatile | (Parameters 2-6.) | ++--------------+--------------------+-----------------------------------------+ +| cr0 | Volatile | (cr0.SO is the return error condition.) | ++--------------+--------------------+-----------------------------------------+ +| cr1, cr5-7 | Nonvolatile | | ++--------------+--------------------+-----------------------------------------+ +| lr | Nonvolatile | | ++--------------+--------------------+-----------------------------------------+ + +For the scv 0 instruction, the differences from the ELF ABI are as follows: + ++--------------+--------------------+-----------------------------------------+ +| Register | Preservation Rules | Purpose | ++==============+====================+=========================================+ +| r0 | Volatile | (System call number.) | ++--------------+--------------------+-----------------------------------------+ +| r3 | Volatile | (Parameter 1, and return value.) | ++--------------+--------------------+-----------------------------------------+ +| r4-r8 | Volatile | (Parameters 2-6.) | ++--------------+--------------------+-----------------------------------------+ + +All floating point and vector data registers as well as control and status +registers are nonvolatile. + +Transactional Memory +-------------------- +Syscall behavior can change if the processor is in transactional or suspended +transaction state, and the syscall can affect the behavior of the transaction. + +If the processor is in suspended state when a syscall is made, the syscall +will be performed as normal, and will return as normal. The syscall will be +performed in suspended state, so its side effects will be persistent according +to the usual transactional memory semantics. A syscall may or may not result +in the transaction being doomed by hardware. + +If the processor is in transactional state when a syscall is made, then the +behavior depends on the presence of PPC_FEATURE2_HTM_NOSC in the AT_HWCAP2 ELF +auxiliary vector. + +- If present, which is the case for newer kernels, then the syscall will not + be performed and the transaction will be doomed by the kernel with the + failure code TM_CAUSE_SYSCALL | TM_CAUSE_PERSISTENT in the TEXASR SPR. + +- If not present (older kernels), then the kernel will suspend the + transactional state and the syscall will proceed as in the case of a + suspended state syscall, and will resume the transactional state before + returning to the caller. This case is not well defined or supported, so this + behavior should not be relied upon. + +scv 0 syscalls will always behave as PPC_FEATURE2_HTM_NOSC. + +ptrace +------ +When ptracing system calls (PTRACE_SYSCALL), the pt_regs.trap value contains +the system call type that can be used to distinguish between sc and scv 0 +system calls, and the different register conventions can be accounted for. + +If the value of (pt_regs.trap & 0xfff0) is 0xc00 then the system call was +performed with the sc instruction, if it is 0x3000 then the system call was +performed with the scv 0 instruction. + +vsyscall +======== + +vsyscall calling sequence matches the syscall calling sequence, with the +following differences. Some vsyscalls may have different calling sequences. + +Parameters and return value +--------------------------- +r0 is not used as an input. The vsyscall is selected by its address. + +Stack +----- +The vsyscall may or may not use the caller's stack frame save areas. + +Register preservation rules +--------------------------- + +=========== ======== +r0 Volatile +cr1, cr5-7 Volatile +lr Volatile +=========== ======== + +Invocation +---------- +The vsyscall is performed with a branch-with-link instruction to the vsyscall +function address. + +Transactional Memory +-------------------- +vsyscalls will run in the same transactional state as the caller. A vsyscall +may or may not result in the transaction being doomed by hardware. diff --git a/Documentation/arch/powerpc/transactional_memory.rst b/Documentation/arch/powerpc/transactional_memory.rst new file mode 100644 index 0000000000..040a20675f --- /dev/null +++ b/Documentation/arch/powerpc/transactional_memory.rst @@ -0,0 +1,274 @@ +============================ +Transactional Memory support +============================ + +POWER kernel support for this feature is currently limited to supporting +its use by user programs. It is not currently used by the kernel itself. + +This file aims to sum up how it is supported by Linux and what behaviour you +can expect from your user programs. + + +Basic overview +============== + +Hardware Transactional Memory is supported on POWER8 processors, and is a +feature that enables a different form of atomic memory access. Several new +instructions are presented to delimit transactions; transactions are +guaranteed to either complete atomically or roll back and undo any partial +changes. + +A simple transaction looks like this:: + + begin_move_money: + tbegin + beq abort_handler + + ld r4, SAVINGS_ACCT(r3) + ld r5, CURRENT_ACCT(r3) + subi r5, r5, 1 + addi r4, r4, 1 + std r4, SAVINGS_ACCT(r3) + std r5, CURRENT_ACCT(r3) + + tend + + b continue + + abort_handler: + ... test for odd failures ... + + /* Retry the transaction if it failed because it conflicted with + * someone else: */ + b begin_move_money + + +The 'tbegin' instruction denotes the start point, and 'tend' the end point. +Between these points the processor is in 'Transactional' state; any memory +references will complete in one go if there are no conflicts with other +transactional or non-transactional accesses within the system. In this +example, the transaction completes as though it were normal straight-line code +IF no other processor has touched SAVINGS_ACCT(r3) or CURRENT_ACCT(r3); an +atomic move of money from the current account to the savings account has been +performed. Even though the normal ld/std instructions are used (note no +lwarx/stwcx), either *both* SAVINGS_ACCT(r3) and CURRENT_ACCT(r3) will be +updated, or neither will be updated. + +If, in the meantime, there is a conflict with the locations accessed by the +transaction, the transaction will be aborted by the CPU. Register and memory +state will roll back to that at the 'tbegin', and control will continue from +'tbegin+4'. The branch to abort_handler will be taken this second time; the +abort handler can check the cause of the failure, and retry. + +Checkpointed registers include all GPRs, FPRs, VRs/VSRs, LR, CCR/CR, CTR, FPCSR +and a few other status/flag regs; see the ISA for details. + +Causes of transaction aborts +============================ + +- Conflicts with cache lines used by other processors +- Signals +- Context switches +- See the ISA for full documentation of everything that will abort transactions. + + +Syscalls +======== + +Syscalls made from within an active transaction will not be performed and the +transaction will be doomed by the kernel with the failure code TM_CAUSE_SYSCALL +| TM_CAUSE_PERSISTENT. + +Syscalls made from within a suspended transaction are performed as normal and +the transaction is not explicitly doomed by the kernel. However, what the +kernel does to perform the syscall may result in the transaction being doomed +by the hardware. The syscall is performed in suspended mode so any side +effects will be persistent, independent of transaction success or failure. No +guarantees are provided by the kernel about which syscalls will affect +transaction success. + +Care must be taken when relying on syscalls to abort during active transactions +if the calls are made via a library. Libraries may cache values (which may +give the appearance of success) or perform operations that cause transaction +failure before entering the kernel (which may produce different failure codes). +Examples are glibc's getpid() and lazy symbol resolution. + + +Signals +======= + +Delivery of signals (both sync and async) during transactions provides a second +thread state (ucontext/mcontext) to represent the second transactional register +state. Signal delivery 'treclaim's to capture both register states, so signals +abort transactions. The usual ucontext_t passed to the signal handler +represents the checkpointed/original register state; the signal appears to have +arisen at 'tbegin+4'. + +If the sighandler ucontext has uc_link set, a second ucontext has been +delivered. For future compatibility the MSR.TS field should be checked to +determine the transactional state -- if so, the second ucontext in uc->uc_link +represents the active transactional registers at the point of the signal. + +For 64-bit processes, uc->uc_mcontext.regs->msr is a full 64-bit MSR and its TS +field shows the transactional mode. + +For 32-bit processes, the mcontext's MSR register is only 32 bits; the top 32 +bits are stored in the MSR of the second ucontext, i.e. in +uc->uc_link->uc_mcontext.regs->msr. The top word contains the transactional +state TS. + +However, basic signal handlers don't need to be aware of transactions +and simply returning from the handler will deal with things correctly: + +Transaction-aware signal handlers can read the transactional register state +from the second ucontext. This will be necessary for crash handlers to +determine, for example, the address of the instruction causing the SIGSEGV. + +Example signal handler:: + + void crash_handler(int sig, siginfo_t *si, void *uc) + { + ucontext_t *ucp = uc; + ucontext_t *transactional_ucp = ucp->uc_link; + + if (ucp_link) { + u64 msr = ucp->uc_mcontext.regs->msr; + /* May have transactional ucontext! */ + #ifndef __powerpc64__ + msr |= ((u64)transactional_ucp->uc_mcontext.regs->msr) << 32; + #endif + if (MSR_TM_ACTIVE(msr)) { + /* Yes, we crashed during a transaction. Oops. */ + fprintf(stderr, "Transaction to be restarted at 0x%llx, but " + "crashy instruction was at 0x%llx\n", + ucp->uc_mcontext.regs->nip, + transactional_ucp->uc_mcontext.regs->nip); + } + } + + fix_the_problem(ucp->dar); + } + +When in an active transaction that takes a signal, we need to be careful with +the stack. It's possible that the stack has moved back up after the tbegin. +The obvious case here is when the tbegin is called inside a function that +returns before a tend. In this case, the stack is part of the checkpointed +transactional memory state. If we write over this non transactionally or in +suspend, we are in trouble because if we get a tm abort, the program counter and +stack pointer will be back at the tbegin but our in memory stack won't be valid +anymore. + +To avoid this, when taking a signal in an active transaction, we need to use +the stack pointer from the checkpointed state, rather than the speculated +state. This ensures that the signal context (written tm suspended) will be +written below the stack required for the rollback. The transaction is aborted +because of the treclaim, so any memory written between the tbegin and the +signal will be rolled back anyway. + +For signals taken in non-TM or suspended mode, we use the +normal/non-checkpointed stack pointer. + +Any transaction initiated inside a sighandler and suspended on return +from the sighandler to the kernel will get reclaimed and discarded. + +Failure cause codes used by kernel +================================== + +These are defined in , and distinguish different reasons why the +kernel aborted a transaction: + + ====================== ================================ + TM_CAUSE_RESCHED Thread was rescheduled. + TM_CAUSE_TLBI Software TLB invalid. + TM_CAUSE_FAC_UNAV FP/VEC/VSX unavailable trap. + TM_CAUSE_SYSCALL Syscall from active transaction. + TM_CAUSE_SIGNAL Signal delivered. + TM_CAUSE_MISC Currently unused. + TM_CAUSE_ALIGNMENT Alignment fault. + TM_CAUSE_EMULATE Emulation that touched memory. + ====================== ================================ + +These can be checked by the user program's abort handler as TEXASR[0:7]. If +bit 7 is set, it indicates that the error is considered persistent. For example +a TM_CAUSE_ALIGNMENT will be persistent while a TM_CAUSE_RESCHED will not. + +GDB +=== + +GDB and ptrace are not currently TM-aware. If one stops during a transaction, +it looks like the transaction has just started (the checkpointed state is +presented). The transaction cannot then be continued and will take the failure +handler route. Furthermore, the transactional 2nd register state will be +inaccessible. GDB can currently be used on programs using TM, but not sensibly +in parts within transactions. + +POWER9 +====== + +TM on POWER9 has issues with storing the complete register state. This +is described in this commit:: + + commit 4bb3c7a0208fc13ca70598efd109901a7cd45ae7 + Author: Paul Mackerras + Date: Wed Mar 21 21:32:01 2018 +1100 + KVM: PPC: Book3S HV: Work around transactional memory bugs in POWER9 + +To account for this different POWER9 chips have TM enabled in +different ways. + +On POWER9N DD2.01 and below, TM is disabled. ie +HWCAP2[PPC_FEATURE2_HTM] is not set. + +On POWER9N DD2.1 TM is configured by firmware to always abort a +transaction when tm suspend occurs. So tsuspend will cause a +transaction to be aborted and rolled back. Kernel exceptions will also +cause the transaction to be aborted and rolled back and the exception +will not occur. If userspace constructs a sigcontext that enables TM +suspend, the sigcontext will be rejected by the kernel. This mode is +advertised to users with HWCAP2[PPC_FEATURE2_HTM_NO_SUSPEND] set. +HWCAP2[PPC_FEATURE2_HTM] is not set in this mode. + +On POWER9N DD2.2 and above, KVM and POWERVM emulate TM for guests (as +described in commit 4bb3c7a0208f), hence TM is enabled for guests +ie. HWCAP2[PPC_FEATURE2_HTM] is set for guest userspace. Guests that +makes heavy use of TM suspend (tsuspend or kernel suspend) will result +in traps into the hypervisor and hence will suffer a performance +degradation. Host userspace has TM disabled +ie. HWCAP2[PPC_FEATURE2_HTM] is not set. (although we make enable it +at some point in the future if we bring the emulation into host +userspace context switching). + +POWER9C DD1.2 and above are only available with POWERVM and hence +Linux only runs as a guest. On these systems TM is emulated like on +POWER9N DD2.2. + +Guest migration from POWER8 to POWER9 will work with POWER9N DD2.2 and +POWER9C DD1.2. Since earlier POWER9 processors don't support TM +emulation, migration from POWER8 to POWER9 is not supported there. + +Kernel implementation +===================== + +h/rfid mtmsrd quirk +------------------- + +As defined in the ISA, rfid has a quirk which is useful in early +exception handling. When in a userspace transaction and we enter the +kernel via some exception, MSR will end up as TM=0 and TS=01 (ie. TM +off but TM suspended). Regularly the kernel will want change bits in +the MSR and will perform an rfid to do this. In this case rfid can +have SRR0 TM = 0 and TS = 00 (ie. TM off and non transaction) and the +resulting MSR will retain TM = 0 and TS=01 from before (ie. stay in +suspend). This is a quirk in the architecture as this would normally +be a transition from TS=01 to TS=00 (ie. suspend -> non transactional) +which is an illegal transition. + +This quirk is described the architecture in the definition of rfid +with these lines: + + if (MSR 29:31 ¬ = 0b010 | SRR1 29:31 ¬ = 0b000) then + MSR 29:31 <- SRR1 29:31 + +hrfid and mtmsrd have the same quirk. + +The Linux kernel uses this quirk in its early exception handling. diff --git a/Documentation/arch/powerpc/ultravisor.rst b/Documentation/arch/powerpc/ultravisor.rst new file mode 100644 index 0000000000..ba6b1bf1cc --- /dev/null +++ b/Documentation/arch/powerpc/ultravisor.rst @@ -0,0 +1,1117 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. _ultravisor: + +============================ +Protected Execution Facility +============================ + +.. contents:: + :depth: 3 + +Introduction +############ + + Protected Execution Facility (PEF) is an architectural change for + POWER 9 that enables Secure Virtual Machines (SVMs). DD2.3 chips + (PVR=0x004e1203) or greater will be PEF-capable. A new ISA release + will include the PEF RFC02487 changes. + + When enabled, PEF adds a new higher privileged mode, called Ultravisor + mode, to POWER architecture. Along with the new mode there is new + firmware called the Protected Execution Ultravisor (or Ultravisor + for short). Ultravisor mode is the highest privileged mode in POWER + architecture. + + +------------------+ + | Privilege States | + +==================+ + | Problem | + +------------------+ + | Supervisor | + +------------------+ + | Hypervisor | + +------------------+ + | Ultravisor | + +------------------+ + + PEF protects SVMs from the hypervisor, privileged users, and other + VMs in the system. SVMs are protected while at rest and can only be + executed by an authorized machine. All virtual machines utilize + hypervisor services. The Ultravisor filters calls between the SVMs + and the hypervisor to assure that information does not accidentally + leak. All hypercalls except H_RANDOM are reflected to the hypervisor. + H_RANDOM is not reflected to prevent the hypervisor from influencing + random values in the SVM. + + To support this there is a refactoring of the ownership of resources + in the CPU. Some of the resources which were previously hypervisor + privileged are now ultravisor privileged. + +Hardware +======== + + The hardware changes include the following: + + * There is a new bit in the MSR that determines whether the current + process is running in secure mode, MSR(S) bit 41. MSR(S)=1, process + is in secure mode, MSR(s)=0 process is in normal mode. + + * The MSR(S) bit can only be set by the Ultravisor. + + * HRFID cannot be used to set the MSR(S) bit. If the hypervisor needs + to return to a SVM it must use an ultracall. It can determine if + the VM it is returning to is secure. + + * There is a new Ultravisor privileged register, SMFCTRL, which has an + enable/disable bit SMFCTRL(E). + + * The privilege of a process is now determined by three MSR bits, + MSR(S, HV, PR). In each of the tables below the modes are listed + from least privilege to highest privilege. The higher privilege + modes can access all the resources of the lower privilege modes. + + **Secure Mode MSR Settings** + + +---+---+---+---------------+ + | S | HV| PR|Privilege | + +===+===+===+===============+ + | 1 | 0 | 1 | Problem | + +---+---+---+---------------+ + | 1 | 0 | 0 | Privileged(OS)| + +---+---+---+---------------+ + | 1 | 1 | 0 | Ultravisor | + +---+---+---+---------------+ + | 1 | 1 | 1 | Reserved | + +---+---+---+---------------+ + + **Normal Mode MSR Settings** + + +---+---+---+---------------+ + | S | HV| PR|Privilege | + +===+===+===+===============+ + | 0 | 0 | 1 | Problem | + +---+---+---+---------------+ + | 0 | 0 | 0 | Privileged(OS)| + +---+---+---+---------------+ + | 0 | 1 | 0 | Hypervisor | + +---+---+---+---------------+ + | 0 | 1 | 1 | Problem (Host)| + +---+---+---+---------------+ + + * Memory is partitioned into secure and normal memory. Only processes + that are running in secure mode can access secure memory. + + * The hardware does not allow anything that is not running secure to + access secure memory. This means that the Hypervisor cannot access + the memory of the SVM without using an ultracall (asking the + Ultravisor). The Ultravisor will only allow the hypervisor to see + the SVM memory encrypted. + + * I/O systems are not allowed to directly address secure memory. This + limits the SVMs to virtual I/O only. + + * The architecture allows the SVM to share pages of memory with the + hypervisor that are not protected with encryption. However, this + sharing must be initiated by the SVM. + + * When a process is running in secure mode all hypercalls + (syscall lev=1) go to the Ultravisor. + + * When a process is in secure mode all interrupts go to the + Ultravisor. + + * The following resources have become Ultravisor privileged and + require an Ultravisor interface to manipulate: + + * Processor configurations registers (SCOMs). + + * Stop state information. + + * The debug registers CIABR, DAWR, and DAWRX when SMFCTRL(D) is set. + If SMFCTRL(D) is not set they do not work in secure mode. When set, + reading and writing requires an Ultravisor call, otherwise that + will cause a Hypervisor Emulation Assistance interrupt. + + * PTCR and partition table entries (partition table is in secure + memory). An attempt to write to PTCR will cause a Hypervisor + Emulation Assitance interrupt. + + * LDBAR (LD Base Address Register) and IMC (In-Memory Collection) + non-architected registers. An attempt to write to them will cause a + Hypervisor Emulation Assistance interrupt. + + * Paging for an SVM, sharing of memory with Hypervisor for an SVM. + (Including Virtual Processor Area (VPA) and virtual I/O). + + +Software/Microcode +================== + + The software changes include: + + * SVMs are created from normal VM using (open source) tooling supplied + by IBM. + + * All SVMs start as normal VMs and utilize an ultracall, UV_ESM + (Enter Secure Mode), to make the transition. + + * When the UV_ESM ultracall is made the Ultravisor copies the VM into + secure memory, decrypts the verification information, and checks the + integrity of the SVM. If the integrity check passes the Ultravisor + passes control in secure mode. + + * The verification information includes the pass phrase for the + encrypted disk associated with the SVM. This pass phrase is given + to the SVM when requested. + + * The Ultravisor is not involved in protecting the encrypted disk of + the SVM while at rest. + + * For external interrupts the Ultravisor saves the state of the SVM, + and reflects the interrupt to the hypervisor for processing. + For hypercalls, the Ultravisor inserts neutral state into all + registers not needed for the hypercall then reflects the call to + the hypervisor for processing. The H_RANDOM hypercall is performed + by the Ultravisor and not reflected. + + * For virtual I/O to work bounce buffering must be done. + + * The Ultravisor uses AES (IAPM) for protection of SVM memory. IAPM + is a mode of AES that provides integrity and secrecy concurrently. + + * The movement of data between normal and secure pages is coordinated + with the Ultravisor by a new HMM plug-in in the Hypervisor. + + The Ultravisor offers new services to the hypervisor and SVMs. These + are accessed through ultracalls. + +Terminology +=========== + + * Hypercalls: special system calls used to request services from + Hypervisor. + + * Normal memory: Memory that is accessible to Hypervisor. + + * Normal page: Page backed by normal memory and available to + Hypervisor. + + * Shared page: A page backed by normal memory and available to both + the Hypervisor/QEMU and the SVM (i.e page has mappings in SVM and + Hypervisor/QEMU). + + * Secure memory: Memory that is accessible only to Ultravisor and + SVMs. + + * Secure page: Page backed by secure memory and only available to + Ultravisor and SVM. + + * SVM: Secure Virtual Machine. + + * Ultracalls: special system calls used to request services from + Ultravisor. + + +Ultravisor calls API +#################### + + This section describes Ultravisor calls (ultracalls) needed to + support Secure Virtual Machines (SVM)s and Paravirtualized KVM. The + ultracalls allow the SVMs and Hypervisor to request services from the + Ultravisor such as accessing a register or memory region that can only + be accessed when running in Ultravisor-privileged mode. + + The specific service needed from an ultracall is specified in register + R3 (the first parameter to the ultracall). Other parameters to the + ultracall, if any, are specified in registers R4 through R12. + + Return value of all ultracalls is in register R3. Other output values + from the ultracall, if any, are returned in registers R4 through R12. + The only exception to this register usage is the ``UV_RETURN`` + ultracall described below. + + Each ultracall returns specific error codes, applicable in the context + of the ultracall. However, like with the PowerPC Architecture Platform + Reference (PAPR), if no specific error code is defined for a + particular situation, then the ultracall will fallback to an erroneous + parameter-position based code. i.e U_PARAMETER, U_P2, U_P3 etc + depending on the ultracall parameter that may have caused the error. + + Some ultracalls involve transferring a page of data between Ultravisor + and Hypervisor. Secure pages that are transferred from secure memory + to normal memory may be encrypted using dynamically generated keys. + When the secure pages are transferred back to secure memory, they may + be decrypted using the same dynamically generated keys. Generation and + management of these keys will be covered in a separate document. + + For now this only covers ultracalls currently implemented and being + used by Hypervisor and SVMs but others can be added here when it + makes sense. + + The full specification for all hypercalls/ultracalls will eventually + be made available in the public/OpenPower version of the PAPR + specification. + + .. note:: + + If PEF is not enabled, the ultracalls will be redirected to the + Hypervisor which must handle/fail the calls. + +Ultracalls used by Hypervisor +============================= + + This section describes the virtual memory management ultracalls used + by the Hypervisor to manage SVMs. + +UV_PAGE_OUT +----------- + + Encrypt and move the contents of a page from secure memory to normal + memory. + +Syntax +~~~~~~ + +.. code-block:: c + + uint64_t ultracall(const uint64_t UV_PAGE_OUT, + uint16_t lpid, /* LPAR ID */ + uint64_t dest_ra, /* real address of destination page */ + uint64_t src_gpa, /* source guest-physical-address */ + uint8_t flags, /* flags */ + uint64_t order) /* page size order */ + +Return values +~~~~~~~~~~~~~ + + One of the following values: + + * U_SUCCESS on success. + * U_PARAMETER if ``lpid`` is invalid. + * U_P2 if ``dest_ra`` is invalid. + * U_P3 if the ``src_gpa`` address is invalid. + * U_P4 if any bit in the ``flags`` is unrecognized + * U_P5 if the ``order`` parameter is unsupported. + * U_FUNCTION if functionality is not supported. + * U_BUSY if page cannot be currently paged-out. + +Description +~~~~~~~~~~~ + + Encrypt the contents of a secure-page and make it available to + Hypervisor in a normal page. + + By default, the source page is unmapped from the SVM's partition- + scoped page table. But the Hypervisor can provide a hint to the + Ultravisor to retain the page mapping by setting the ``UV_SNAPSHOT`` + flag in ``flags`` parameter. + + If the source page is already a shared page the call returns + U_SUCCESS, without doing anything. + +Use cases +~~~~~~~~~ + + #. QEMU attempts to access an address belonging to the SVM but the + page frame for that address is not mapped into QEMU's address + space. In this case, the Hypervisor will allocate a page frame, + map it into QEMU's address space and issue the ``UV_PAGE_OUT`` + call to retrieve the encrypted contents of the page. + + #. When Ultravisor runs low on secure memory and it needs to page-out + an LRU page. In this case, Ultravisor will issue the + ``H_SVM_PAGE_OUT`` hypercall to the Hypervisor. The Hypervisor will + then allocate a normal page and issue the ``UV_PAGE_OUT`` ultracall + and the Ultravisor will encrypt and move the contents of the secure + page into the normal page. + + #. When Hypervisor accesses SVM data, the Hypervisor requests the + Ultravisor to transfer the corresponding page into a insecure page, + which the Hypervisor can access. The data in the normal page will + be encrypted though. + +UV_PAGE_IN +---------- + + Move the contents of a page from normal memory to secure memory. + +Syntax +~~~~~~ + +.. code-block:: c + + uint64_t ultracall(const uint64_t UV_PAGE_IN, + uint16_t lpid, /* the LPAR ID */ + uint64_t src_ra, /* source real address of page */ + uint64_t dest_gpa, /* destination guest physical address */ + uint64_t flags, /* flags */ + uint64_t order) /* page size order */ + +Return values +~~~~~~~~~~~~~ + + One of the following values: + + * U_SUCCESS on success. + * U_BUSY if page cannot be currently paged-in. + * U_FUNCTION if functionality is not supported + * U_PARAMETER if ``lpid`` is invalid. + * U_P2 if ``src_ra`` is invalid. + * U_P3 if the ``dest_gpa`` address is invalid. + * U_P4 if any bit in the ``flags`` is unrecognized + * U_P5 if the ``order`` parameter is unsupported. + +Description +~~~~~~~~~~~ + + Move the contents of the page identified by ``src_ra`` from normal + memory to secure memory and map it to the guest physical address + ``dest_gpa``. + + If `dest_gpa` refers to a shared address, map the page into the + partition-scoped page-table of the SVM. If `dest_gpa` is not shared, + copy the contents of the page into the corresponding secure page. + Depending on the context, decrypt the page before being copied. + + The caller provides the attributes of the page through the ``flags`` + parameter. Valid values for ``flags`` are: + + * CACHE_INHIBITED + * CACHE_ENABLED + * WRITE_PROTECTION + + The Hypervisor must pin the page in memory before making + ``UV_PAGE_IN`` ultracall. + +Use cases +~~~~~~~~~ + + #. When a normal VM switches to secure mode, all its pages residing + in normal memory, are moved into secure memory. + + #. When an SVM requests to share a page with Hypervisor the Hypervisor + allocates a page and informs the Ultravisor. + + #. When an SVM accesses a secure page that has been paged-out, + Ultravisor invokes the Hypervisor to locate the page. After + locating the page, the Hypervisor uses UV_PAGE_IN to make the + page available to Ultravisor. + +UV_PAGE_INVAL +------------- + + Invalidate the Ultravisor mapping of a page. + +Syntax +~~~~~~ + +.. code-block:: c + + uint64_t ultracall(const uint64_t UV_PAGE_INVAL, + uint16_t lpid, /* the LPAR ID */ + uint64_t guest_pa, /* destination guest-physical-address */ + uint64_t order) /* page size order */ + +Return values +~~~~~~~~~~~~~ + + One of the following values: + + * U_SUCCESS on success. + * U_PARAMETER if ``lpid`` is invalid. + * U_P2 if ``guest_pa`` is invalid (or corresponds to a secure + page mapping). + * U_P3 if the ``order`` is invalid. + * U_FUNCTION if functionality is not supported. + * U_BUSY if page cannot be currently invalidated. + +Description +~~~~~~~~~~~ + + This ultracall informs Ultravisor that the page mapping in Hypervisor + corresponding to the given guest physical address has been invalidated + and that the Ultravisor should not access the page. If the specified + ``guest_pa`` corresponds to a secure page, Ultravisor will ignore the + attempt to invalidate the page and return U_P2. + +Use cases +~~~~~~~~~ + + #. When a shared page is unmapped from the QEMU's page table, possibly + because it is paged-out to disk, Ultravisor needs to know that the + page should not be accessed from its side too. + + +UV_WRITE_PATE +------------- + + Validate and write the partition table entry (PATE) for a given + partition. + +Syntax +~~~~~~ + +.. code-block:: c + + uint64_t ultracall(const uint64_t UV_WRITE_PATE, + uint32_t lpid, /* the LPAR ID */ + uint64_t dw0 /* the first double word to write */ + uint64_t dw1) /* the second double word to write */ + +Return values +~~~~~~~~~~~~~ + + One of the following values: + + * U_SUCCESS on success. + * U_BUSY if PATE cannot be currently written to. + * U_FUNCTION if functionality is not supported. + * U_PARAMETER if ``lpid`` is invalid. + * U_P2 if ``dw0`` is invalid. + * U_P3 if the ``dw1`` address is invalid. + * U_PERMISSION if the Hypervisor is attempting to change the PATE + of a secure virtual machine or if called from a + context other than Hypervisor. + +Description +~~~~~~~~~~~ + + Validate and write a LPID and its partition-table-entry for the given + LPID. If the LPID is already allocated and initialized, this call + results in changing the partition table entry. + +Use cases +~~~~~~~~~ + + #. The Partition table resides in Secure memory and its entries, + called PATE (Partition Table Entries), point to the partition- + scoped page tables for the Hypervisor as well as each of the + virtual machines (both secure and normal). The Hypervisor + operates in partition 0 and its partition-scoped page tables + reside in normal memory. + + #. This ultracall allows the Hypervisor to register the partition- + scoped and process-scoped page table entries for the Hypervisor + and other partitions (virtual machines) with the Ultravisor. + + #. If the value of the PATE for an existing partition (VM) changes, + the TLB cache for the partition is flushed. + + #. The Hypervisor is responsible for allocating LPID. The LPID and + its PATE entry are registered together. The Hypervisor manages + the PATE entries for a normal VM and can change the PATE entry + anytime. Ultravisor manages the PATE entries for an SVM and + Hypervisor is not allowed to modify them. + +UV_RETURN +--------- + + Return control from the Hypervisor back to the Ultravisor after + processing an hypercall or interrupt that was forwarded (aka + *reflected*) to the Hypervisor. + +Syntax +~~~~~~ + +.. code-block:: c + + uint64_t ultracall(const uint64_t UV_RETURN) + +Return values +~~~~~~~~~~~~~ + + This call never returns to Hypervisor on success. It returns + U_INVALID if ultracall is not made from a Hypervisor context. + +Description +~~~~~~~~~~~ + + When an SVM makes an hypercall or incurs some other exception, the + Ultravisor usually forwards (aka *reflects*) the exceptions to the + Hypervisor. After processing the exception, Hypervisor uses the + ``UV_RETURN`` ultracall to return control back to the SVM. + + The expected register state on entry to this ultracall is: + + * Non-volatile registers are restored to their original values. + * If returning from an hypercall, register R0 contains the return + value (**unlike other ultracalls**) and, registers R4 through R12 + contain any output values of the hypercall. + * R3 contains the ultracall number, i.e UV_RETURN. + * If returning with a synthesized interrupt, R2 contains the + synthesized interrupt number. + +Use cases +~~~~~~~~~ + + #. Ultravisor relies on the Hypervisor to provide several services to + the SVM such as processing hypercall and other exceptions. After + processing the exception, Hypervisor uses UV_RETURN to return + control back to the Ultravisor. + + #. Hypervisor has to use this ultracall to return control to the SVM. + + +UV_REGISTER_MEM_SLOT +-------------------- + + Register an SVM address-range with specified properties. + +Syntax +~~~~~~ + +.. code-block:: c + + uint64_t ultracall(const uint64_t UV_REGISTER_MEM_SLOT, + uint64_t lpid, /* LPAR ID of the SVM */ + uint64_t start_gpa, /* start guest physical address */ + uint64_t size, /* size of address range in bytes */ + uint64_t flags /* reserved for future expansion */ + uint16_t slotid) /* slot identifier */ + +Return values +~~~~~~~~~~~~~ + + One of the following values: + + * U_SUCCESS on success. + * U_PARAMETER if ``lpid`` is invalid. + * U_P2 if ``start_gpa`` is invalid. + * U_P3 if ``size`` is invalid. + * U_P4 if any bit in the ``flags`` is unrecognized. + * U_P5 if the ``slotid`` parameter is unsupported. + * U_PERMISSION if called from context other than Hypervisor. + * U_FUNCTION if functionality is not supported. + + +Description +~~~~~~~~~~~ + + Register a memory range for an SVM. The memory range starts at the + guest physical address ``start_gpa`` and is ``size`` bytes long. + +Use cases +~~~~~~~~~ + + + #. When a virtual machine goes secure, all the memory slots managed by + the Hypervisor move into secure memory. The Hypervisor iterates + through each of memory slots, and registers the slot with + Ultravisor. Hypervisor may discard some slots such as those used + for firmware (SLOF). + + #. When new memory is hot-plugged, a new memory slot gets registered. + + +UV_UNREGISTER_MEM_SLOT +---------------------- + + Unregister an SVM address-range that was previously registered using + UV_REGISTER_MEM_SLOT. + +Syntax +~~~~~~ + +.. code-block:: c + + uint64_t ultracall(const uint64_t UV_UNREGISTER_MEM_SLOT, + uint64_t lpid, /* LPAR ID of the SVM */ + uint64_t slotid) /* reservation slotid */ + +Return values +~~~~~~~~~~~~~ + + One of the following values: + + * U_SUCCESS on success. + * U_FUNCTION if functionality is not supported. + * U_PARAMETER if ``lpid`` is invalid. + * U_P2 if ``slotid`` is invalid. + * U_PERMISSION if called from context other than Hypervisor. + +Description +~~~~~~~~~~~ + + Release the memory slot identified by ``slotid`` and free any + resources allocated towards the reservation. + +Use cases +~~~~~~~~~ + + #. Memory hot-remove. + + +UV_SVM_TERMINATE +---------------- + + Terminate an SVM and release its resources. + +Syntax +~~~~~~ + +.. code-block:: c + + uint64_t ultracall(const uint64_t UV_SVM_TERMINATE, + uint64_t lpid, /* LPAR ID of the SVM */) + +Return values +~~~~~~~~~~~~~ + + One of the following values: + + * U_SUCCESS on success. + * U_FUNCTION if functionality is not supported. + * U_PARAMETER if ``lpid`` is invalid. + * U_INVALID if VM is not secure. + * U_PERMISSION if not called from a Hypervisor context. + +Description +~~~~~~~~~~~ + + Terminate an SVM and release all its resources. + +Use cases +~~~~~~~~~ + + #. Called by Hypervisor when terminating an SVM. + + +Ultracalls used by SVM +====================== + +UV_SHARE_PAGE +------------- + + Share a set of guest physical pages with the Hypervisor. + +Syntax +~~~~~~ + +.. code-block:: c + + uint64_t ultracall(const uint64_t UV_SHARE_PAGE, + uint64_t gfn, /* guest page frame number */ + uint64_t num) /* number of pages of size PAGE_SIZE */ + +Return values +~~~~~~~~~~~~~ + + One of the following values: + + * U_SUCCESS on success. + * U_FUNCTION if functionality is not supported. + * U_INVALID if the VM is not secure. + * U_PARAMETER if ``gfn`` is invalid. + * U_P2 if ``num`` is invalid. + +Description +~~~~~~~~~~~ + + Share the ``num`` pages starting at guest physical frame number ``gfn`` + with the Hypervisor. Assume page size is PAGE_SIZE bytes. Zero the + pages before returning. + + If the address is already backed by a secure page, unmap the page and + back it with an insecure page, with the help of the Hypervisor. If it + is not backed by any page yet, mark the PTE as insecure and back it + with an insecure page when the address is accessed. If it is already + backed by an insecure page, zero the page and return. + +Use cases +~~~~~~~~~ + + #. The Hypervisor cannot access the SVM pages since they are backed by + secure pages. Hence an SVM must explicitly request Ultravisor for + pages it can share with Hypervisor. + + #. Shared pages are needed to support virtio and Virtual Processor Area + (VPA) in SVMs. + + +UV_UNSHARE_PAGE +--------------- + + Restore a shared SVM page to its initial state. + +Syntax +~~~~~~ + +.. code-block:: c + + uint64_t ultracall(const uint64_t UV_UNSHARE_PAGE, + uint64_t gfn, /* guest page frame number */ + uint73 num) /* number of pages of size PAGE_SIZE*/ + +Return values +~~~~~~~~~~~~~ + + One of the following values: + + * U_SUCCESS on success. + * U_FUNCTION if functionality is not supported. + * U_INVALID if VM is not secure. + * U_PARAMETER if ``gfn`` is invalid. + * U_P2 if ``num`` is invalid. + +Description +~~~~~~~~~~~ + + Stop sharing ``num`` pages starting at ``gfn`` with the Hypervisor. + Assume that the page size is PAGE_SIZE. Zero the pages before + returning. + + If the address is already backed by an insecure page, unmap the page + and back it with a secure page. Inform the Hypervisor to release + reference to its shared page. If the address is not backed by a page + yet, mark the PTE as secure and back it with a secure page when that + address is accessed. If it is already backed by an secure page zero + the page and return. + +Use cases +~~~~~~~~~ + + #. The SVM may decide to unshare a page from the Hypervisor. + + +UV_UNSHARE_ALL_PAGES +-------------------- + + Unshare all pages the SVM has shared with Hypervisor. + +Syntax +~~~~~~ + +.. code-block:: c + + uint64_t ultracall(const uint64_t UV_UNSHARE_ALL_PAGES) + +Return values +~~~~~~~~~~~~~ + + One of the following values: + + * U_SUCCESS on success. + * U_FUNCTION if functionality is not supported. + * U_INVAL if VM is not secure. + +Description +~~~~~~~~~~~ + + Unshare all shared pages from the Hypervisor. All unshared pages are + zeroed on return. Only pages explicitly shared by the SVM with the + Hypervisor (using UV_SHARE_PAGE ultracall) are unshared. Ultravisor + may internally share some pages with the Hypervisor without explicit + request from the SVM. These pages will not be unshared by this + ultracall. + +Use cases +~~~~~~~~~ + + #. This call is needed when ``kexec`` is used to boot a different + kernel. It may also be needed during SVM reset. + +UV_ESM +------ + + Secure the virtual machine (*enter secure mode*). + +Syntax +~~~~~~ + +.. code-block:: c + + uint64_t ultracall(const uint64_t UV_ESM, + uint64_t esm_blob_addr, /* location of the ESM blob */ + unint64_t fdt) /* Flattened device tree */ + +Return values +~~~~~~~~~~~~~ + + One of the following values: + + * U_SUCCESS on success (including if VM is already secure). + * U_FUNCTION if functionality is not supported. + * U_INVALID if VM is not secure. + * U_PARAMETER if ``esm_blob_addr`` is invalid. + * U_P2 if ``fdt`` is invalid. + * U_PERMISSION if any integrity checks fail. + * U_RETRY insufficient memory to create SVM. + * U_NO_KEY symmetric key unavailable. + +Description +~~~~~~~~~~~ + + Secure the virtual machine. On successful completion, return + control to the virtual machine at the address specified in the + ESM blob. + +Use cases +~~~~~~~~~ + + #. A normal virtual machine can choose to switch to a secure mode. + +Hypervisor Calls API +#################### + + This document describes the Hypervisor calls (hypercalls) that are + needed to support the Ultravisor. Hypercalls are services provided by + the Hypervisor to virtual machines and Ultravisor. + + Register usage for these hypercalls is identical to that of the other + hypercalls defined in the Power Architecture Platform Reference (PAPR) + document. i.e on input, register R3 identifies the specific service + that is being requested and registers R4 through R11 contain + additional parameters to the hypercall, if any. On output, register + R3 contains the return value and registers R4 through R9 contain any + other output values from the hypercall. + + This document only covers hypercalls currently implemented/planned + for Ultravisor usage but others can be added here when it makes sense. + + The full specification for all hypercalls/ultracalls will eventually + be made available in the public/OpenPower version of the PAPR + specification. + +Hypervisor calls to support Ultravisor +====================================== + + Following are the set of hypercalls needed to support Ultravisor. + +H_SVM_INIT_START +---------------- + + Begin the process of converting a normal virtual machine into an SVM. + +Syntax +~~~~~~ + +.. code-block:: c + + uint64_t hypercall(const uint64_t H_SVM_INIT_START) + +Return values +~~~~~~~~~~~~~ + + One of the following values: + + * H_SUCCESS on success. + * H_STATE if the VM is not in a position to switch to secure. + +Description +~~~~~~~~~~~ + + Initiate the process of securing a virtual machine. This involves + coordinating with the Ultravisor, using ultracalls, to allocate + resources in the Ultravisor for the new SVM, transferring the VM's + pages from normal to secure memory etc. When the process is + completed, Ultravisor issues the H_SVM_INIT_DONE hypercall. + +Use cases +~~~~~~~~~ + + #. Ultravisor uses this hypercall to inform Hypervisor that a VM + has initiated the process of switching to secure mode. + + +H_SVM_INIT_DONE +--------------- + + Complete the process of securing an SVM. + +Syntax +~~~~~~ + +.. code-block:: c + + uint64_t hypercall(const uint64_t H_SVM_INIT_DONE) + +Return values +~~~~~~~~~~~~~ + + One of the following values: + + * H_SUCCESS on success. + * H_UNSUPPORTED if called from the wrong context (e.g. + from an SVM or before an H_SVM_INIT_START + hypercall). + * H_STATE if the hypervisor could not successfully + transition the VM to Secure VM. + +Description +~~~~~~~~~~~ + + Complete the process of securing a virtual machine. This call must + be made after a prior call to ``H_SVM_INIT_START`` hypercall. + +Use cases +~~~~~~~~~ + + On successfully securing a virtual machine, the Ultravisor informs + Hypervisor about it. Hypervisor can use this call to finish setting + up its internal state for this virtual machine. + + +H_SVM_INIT_ABORT +---------------- + + Abort the process of securing an SVM. + +Syntax +~~~~~~ + +.. code-block:: c + + uint64_t hypercall(const uint64_t H_SVM_INIT_ABORT) + +Return values +~~~~~~~~~~~~~ + + One of the following values: + + * H_PARAMETER on successfully cleaning up the state, + Hypervisor will return this value to the + **guest**, to indicate that the underlying + UV_ESM ultracall failed. + + * H_STATE if called after a VM has gone secure (i.e + H_SVM_INIT_DONE hypercall was successful). + + * H_UNSUPPORTED if called from a wrong context (e.g. from a + normal VM). + +Description +~~~~~~~~~~~ + + Abort the process of securing a virtual machine. This call must + be made after a prior call to ``H_SVM_INIT_START`` hypercall and + before a call to ``H_SVM_INIT_DONE``. + + On entry into this hypercall the non-volatile GPRs and FPRs are + expected to contain the values they had at the time the VM issued + the UV_ESM ultracall. Further ``SRR0`` is expected to contain the + address of the instruction after the ``UV_ESM`` ultracall and ``SRR1`` + the MSR value with which to return to the VM. + + This hypercall will cleanup any partial state that was established for + the VM since the prior ``H_SVM_INIT_START`` hypercall, including paging + out pages that were paged-into secure memory, and issue the + ``UV_SVM_TERMINATE`` ultracall to terminate the VM. + + After the partial state is cleaned up, control returns to the VM + (**not Ultravisor**), at the address specified in ``SRR0`` with the + MSR values set to the value in ``SRR1``. + +Use cases +~~~~~~~~~ + + If after a successful call to ``H_SVM_INIT_START``, the Ultravisor + encounters an error while securing a virtual machine, either due + to lack of resources or because the VM's security information could + not be validated, Ultravisor informs the Hypervisor about it. + Hypervisor should use this call to clean up any internal state for + this virtual machine and return to the VM. + +H_SVM_PAGE_IN +------------- + + Move the contents of a page from normal memory to secure memory. + +Syntax +~~~~~~ + +.. code-block:: c + + uint64_t hypercall(const uint64_t H_SVM_PAGE_IN, + uint64_t guest_pa, /* guest-physical-address */ + uint64_t flags, /* flags */ + uint64_t order) /* page size order */ + +Return values +~~~~~~~~~~~~~ + + One of the following values: + + * H_SUCCESS on success. + * H_PARAMETER if ``guest_pa`` is invalid. + * H_P2 if ``flags`` is invalid. + * H_P3 if ``order`` of page is invalid. + +Description +~~~~~~~~~~~ + + Retrieve the content of the page, belonging to the VM at the specified + guest physical address. + + Only valid value(s) in ``flags`` are: + + * H_PAGE_IN_SHARED which indicates that the page is to be shared + with the Ultravisor. + + * H_PAGE_IN_NONSHARED indicates that the UV is not anymore + interested in the page. Applicable if the page is a shared page. + + The ``order`` parameter must correspond to the configured page size. + +Use cases +~~~~~~~~~ + + #. When a normal VM becomes a secure VM (using the UV_ESM ultracall), + the Ultravisor uses this hypercall to move contents of each page of + the VM from normal memory to secure memory. + + #. Ultravisor uses this hypercall to ask Hypervisor to provide a page + in normal memory that can be shared between the SVM and Hypervisor. + + #. Ultravisor uses this hypercall to page-in a paged-out page. This + can happen when the SVM touches a paged-out page. + + #. If SVM wants to disable sharing of pages with Hypervisor, it can + inform Ultravisor to do so. Ultravisor will then use this hypercall + and inform Hypervisor that it has released access to the normal + page. + +H_SVM_PAGE_OUT +--------------- + + Move the contents of the page to normal memory. + +Syntax +~~~~~~ + +.. code-block:: c + + uint64_t hypercall(const uint64_t H_SVM_PAGE_OUT, + uint64_t guest_pa, /* guest-physical-address */ + uint64_t flags, /* flags (currently none) */ + uint64_t order) /* page size order */ + +Return values +~~~~~~~~~~~~~ + + One of the following values: + + * H_SUCCESS on success. + * H_PARAMETER if ``guest_pa`` is invalid. + * H_P2 if ``flags`` is invalid. + * H_P3 if ``order`` is invalid. + +Description +~~~~~~~~~~~ + + Move the contents of the page identified by ``guest_pa`` to normal + memory. + + Currently ``flags`` is unused and must be set to 0. The ``order`` + parameter must correspond to the configured page size. + +Use cases +~~~~~~~~~ + + #. If Ultravisor is running low on secure pages, it can move the + contents of some secure pages, into normal pages using this + hypercall. The content will be encrypted. + +References +########## + +- `Supporting Protected Computing on IBM Power Architecture `_ diff --git a/Documentation/arch/powerpc/vas-api.rst b/Documentation/arch/powerpc/vas-api.rst new file mode 100644 index 0000000000..a9625a2fa0 --- /dev/null +++ b/Documentation/arch/powerpc/vas-api.rst @@ -0,0 +1,305 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. _VAS-API: + +=================================================== +Virtual Accelerator Switchboard (VAS) userspace API +=================================================== + +Introduction +============ + +Power9 processor introduced Virtual Accelerator Switchboard (VAS) which +allows both userspace and kernel communicate to co-processor +(hardware accelerator) referred to as the Nest Accelerator (NX). The NX +unit comprises of one or more hardware engines or co-processor types +such as 842 compression, GZIP compression and encryption. On power9, +userspace applications will have access to only GZIP Compression engine +which supports ZLIB and GZIP compression algorithms in the hardware. + +To communicate with NX, kernel has to establish a channel or window and +then requests can be submitted directly without kernel involvement. +Requests to the GZIP engine must be formatted as a co-processor Request +Block (CRB) and these CRBs must be submitted to the NX using COPY/PASTE +instructions to paste the CRB to hardware address that is associated with +the engine's request queue. + +The GZIP engine provides two priority levels of requests: Normal and +High. Only Normal requests are supported from userspace right now. + +This document explains userspace API that is used to interact with +kernel to setup channel / window which can be used to send compression +requests directly to NX accelerator. + + +Overview +======== + +Application access to the GZIP engine is provided through +/dev/crypto/nx-gzip device node implemented by the VAS/NX device driver. +An application must open the /dev/crypto/nx-gzip device to obtain a file +descriptor (fd). Then should issue VAS_TX_WIN_OPEN ioctl with this fd to +establish connection to the engine. It means send window is opened on GZIP +engine for this process. Once a connection is established, the application +should use the mmap() system call to map the hardware address of engine's +request queue into the application's virtual address space. + +The application can then submit one or more requests to the engine by +using copy/paste instructions and pasting the CRBs to the virtual address +(aka paste_address) returned by mmap(). User space can close the +established connection or send window by closing the file descriptor +(close(fd)) or upon the process exit. + +Note that applications can send several requests with the same window or +can establish multiple windows, but one window for each file descriptor. + +Following sections provide additional details and references about the +individual steps. + +NX-GZIP Device Node +=================== + +There is one /dev/crypto/nx-gzip node in the system and it provides +access to all GZIP engines in the system. The only valid operations on +/dev/crypto/nx-gzip are: + + * open() the device for read and write. + * issue VAS_TX_WIN_OPEN ioctl + * mmap() the engine's request queue into application's virtual + address space (i.e. get a paste_address for the co-processor + engine). + * close the device node. + +Other file operations on this device node are undefined. + +Note that the copy and paste operations go directly to the hardware and +do not go through this device. Refer COPY/PASTE document for more +details. + +Although a system may have several instances of the NX co-processor +engines (typically, one per P9 chip) there is just one +/dev/crypto/nx-gzip device node in the system. When the nx-gzip device +node is opened, Kernel opens send window on a suitable instance of NX +accelerator. It finds CPU on which the user process is executing and +determine the NX instance for the corresponding chip on which this CPU +belongs. + +Applications may chose a specific instance of the NX co-processor using +the vas_id field in the VAS_TX_WIN_OPEN ioctl as detailed below. + +A userspace library libnxz is available here but still in development: + + https://github.com/abalib/power-gzip + +Applications that use inflate / deflate calls can link with libnxz +instead of libz and use NX GZIP compression without any modification. + +Open /dev/crypto/nx-gzip +======================== + +The nx-gzip device should be opened for read and write. No special +privileges are needed to open the device. Each window corresponds to one +file descriptor. So if the userspace process needs multiple windows, +several open calls have to be issued. + +See open(2) system call man pages for other details such as return values, +error codes and restrictions. + +VAS_TX_WIN_OPEN ioctl +===================== + +Applications should use the VAS_TX_WIN_OPEN ioctl as follows to establish +a connection with NX co-processor engine: + + :: + + struct vas_tx_win_open_attr { + __u32 version; + __s16 vas_id; /* specific instance of vas or -1 + for default */ + __u16 reserved1; + __u64 flags; /* For future use */ + __u64 reserved2[6]; + }; + + version: + The version field must be currently set to 1. + vas_id: + If '-1' is passed, kernel will make a best-effort attempt + to assign an optimal instance of NX for the process. To + select the specific VAS instance, refer + "Discovery of available VAS engines" section below. + + flags, reserved1 and reserved2[6] fields are for future extension + and must be set to 0. + + The attributes attr for the VAS_TX_WIN_OPEN ioctl are defined as + follows:: + + #define VAS_MAGIC 'v' + #define VAS_TX_WIN_OPEN _IOW(VAS_MAGIC, 1, + struct vas_tx_win_open_attr) + + struct vas_tx_win_open_attr attr; + rc = ioctl(fd, VAS_TX_WIN_OPEN, &attr); + + The VAS_TX_WIN_OPEN ioctl returns 0 on success. On errors, it + returns -1 and sets the errno variable to indicate the error. + + Error conditions: + + ====== ================================================ + EINVAL fd does not refer to a valid VAS device. + EINVAL Invalid vas ID + EINVAL version is not set with proper value + EEXIST Window is already opened for the given fd + ENOMEM Memory is not available to allocate window + ENOSPC System has too many active windows (connections) + opened + EINVAL reserved fields are not set to 0. + ====== ================================================ + + See the ioctl(2) man page for more details, error codes and + restrictions. + +mmap() NX-GZIP device +===================== + +The mmap() system call for a NX-GZIP device fd returns a paste_address +that the application can use to copy/paste its CRB to the hardware engines. + + :: + + paste_addr = mmap(addr, size, prot, flags, fd, offset); + + Only restrictions on mmap for a NX-GZIP device fd are: + + * size should be PAGE_SIZE + * offset parameter should be 0ULL + + Refer to mmap(2) man page for additional details/restrictions. + In addition to the error conditions listed on the mmap(2) man + page, can also fail with one of the following error codes: + + ====== ============================================= + EINVAL fd is not associated with an open window + (i.e mmap() does not follow a successful call + to the VAS_TX_WIN_OPEN ioctl). + EINVAL offset field is not 0ULL. + ====== ============================================= + +Discovery of available VAS engines +================================== + +Each available VAS instance in the system will have a device tree node +like /proc/device-tree/vas@* or /proc/device-tree/xscom@*/vas@*. +Determine the chip or VAS instance and use the corresponding ibm,vas-id +property value in this node to select specific VAS instance. + +Copy/Paste operations +===================== + +Applications should use the copy and paste instructions to send CRB to NX. +Refer section 4.4 in PowerISA for Copy/Paste instructions: +https://openpowerfoundation.org/?resource_lib=power-isa-version-3-0 + +CRB Specification and use NX +============================ + +Applications should format requests to the co-processor using the +co-processor Request Block (CRBs). Refer NX-GZIP user's manual for the format +of CRB and use NX from userspace such as sending requests and checking +request status. + +NX Fault handling +================= + +Applications send requests to NX and wait for the status by polling on +co-processor Status Block (CSB) flags. NX updates status in CSB after each +request is processed. Refer NX-GZIP user's manual for the format of CSB and +status flags. + +In case if NX encounters translation error (called NX page fault) on CSB +address or any request buffer, raises an interrupt on the CPU to handle the +fault. Page fault can happen if an application passes invalid addresses or +request buffers are not in memory. The operating system handles the fault by +updating CSB with the following data:: + + csb.flags = CSB_V; + csb.cc = CSB_CC_FAULT_ADDRESS; + csb.ce = CSB_CE_TERMINATION; + csb.address = fault_address; + +When an application receives translation error, it can touch or access +the page that has a fault address so that this page will be in memory. Then +the application can resend this request to NX. + +If the OS can not update CSB due to invalid CSB address, sends SEGV signal +to the process who opened the send window on which the original request was +issued. This signal returns with the following siginfo struct:: + + siginfo.si_signo = SIGSEGV; + siginfo.si_errno = EFAULT; + siginfo.si_code = SEGV_MAPERR; + siginfo.si_addr = CSB address; + +In the case of multi-thread applications, NX send windows can be shared +across all threads. For example, a child thread can open a send window, +but other threads can send requests to NX using this window. These +requests will be successful even in the case of OS handling faults as long +as CSB address is valid. If the NX request contains an invalid CSB address, +the signal will be sent to the child thread that opened the window. But if +the thread is exited without closing the window and the request is issued +using this window. the signal will be issued to the thread group leader +(tgid). It is up to the application whether to ignore or handle these +signals. + +NX-GZIP User's Manual: +https://github.com/libnxz/power-gzip/blob/master/doc/power_nx_gzip_um.pdf + +Simple example +============== + + :: + + int use_nx_gzip() + { + int rc, fd; + void *addr; + struct vas_setup_attr txattr; + + fd = open("/dev/crypto/nx-gzip", O_RDWR); + if (fd < 0) { + fprintf(stderr, "open nx-gzip failed\n"); + return -1; + } + memset(&txattr, 0, sizeof(txattr)); + txattr.version = 1; + txattr.vas_id = -1 + rc = ioctl(fd, VAS_TX_WIN_OPEN, + (unsigned long)&txattr); + if (rc < 0) { + fprintf(stderr, "ioctl() n %d, error %d\n", + rc, errno); + return rc; + } + addr = mmap(NULL, 4096, PROT_READ|PROT_WRITE, + MAP_SHARED, fd, 0ULL); + if (addr == MAP_FAILED) { + fprintf(stderr, "mmap() failed, errno %d\n", + errno); + return -errno; + } + do { + //Format CRB request with compression or + //uncompression + // Refer tests for vas_copy/vas_paste + vas_copy((&crb, 0, 1); + vas_paste(addr, 0, 1); + // Poll on csb.flags with timeout + // csb address is listed in CRB + } while (true) + close(fd) or window can be closed upon process exit + } + + Refer https://github.com/libnxz/power-gzip for tests or more + use cases. diff --git a/Documentation/arch/powerpc/vcpudispatch_stats.rst b/Documentation/arch/powerpc/vcpudispatch_stats.rst new file mode 100644 index 0000000000..5704657a59 --- /dev/null +++ b/Documentation/arch/powerpc/vcpudispatch_stats.rst @@ -0,0 +1,75 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================== +VCPU Dispatch Statistics +======================== + +For Shared Processor LPARs, the POWER Hypervisor maintains a relatively +static mapping of the LPAR processors (vcpus) to physical processor +chips (representing the "home" node) and tries to always dispatch vcpus +on their associated physical processor chip. However, under certain +scenarios, vcpus may be dispatched on a different processor chip (away +from its home node). + +/proc/powerpc/vcpudispatch_stats can be used to obtain statistics +related to the vcpu dispatch behavior. Writing '1' to this file enables +collecting the statistics, while writing '0' disables the statistics. +By default, the DTLB log for each vcpu is processed 50 times a second so +as not to miss any entries. This processing frequency can be changed +through /proc/powerpc/vcpudispatch_stats_freq. + +The statistics themselves are available by reading the procfs file +/proc/powerpc/vcpudispatch_stats. Each line in the output corresponds to +a vcpu as represented by the first field, followed by 8 numbers. + +The first number corresponds to: + +1. total vcpu dispatches since the beginning of statistics collection + +The next 4 numbers represent vcpu dispatch dispersions: + +2. number of times this vcpu was dispatched on the same processor as last + time +3. number of times this vcpu was dispatched on a different processor core + as last time, but within the same chip +4. number of times this vcpu was dispatched on a different chip +5. number of times this vcpu was dispatches on a different socket/drawer + (next numa boundary) + +The final 3 numbers represent statistics in relation to the home node of +the vcpu: + +6. number of times this vcpu was dispatched in its home node (chip) +7. number of times this vcpu was dispatched in a different node +8. number of times this vcpu was dispatched in a node further away (numa + distance) + +An example output:: + + $ sudo cat /proc/powerpc/vcpudispatch_stats + cpu0 6839 4126 2683 30 0 6821 18 0 + cpu1 2515 1274 1229 12 0 2509 6 0 + cpu2 2317 1198 1109 10 0 2312 5 0 + cpu3 2259 1165 1088 6 0 2256 3 0 + cpu4 2205 1143 1056 6 0 2202 3 0 + cpu5 2165 1121 1038 6 0 2162 3 0 + cpu6 2183 1127 1050 6 0 2180 3 0 + cpu7 2193 1133 1052 8 0 2187 6 0 + cpu8 2165 1115 1032 18 0 2156 9 0 + cpu9 2301 1252 1033 16 0 2293 8 0 + cpu10 2197 1138 1041 18 0 2187 10 0 + cpu11 2273 1185 1062 26 0 2260 13 0 + cpu12 2186 1125 1043 18 0 2177 9 0 + cpu13 2161 1115 1030 16 0 2153 8 0 + cpu14 2206 1153 1033 20 0 2196 10 0 + cpu15 2163 1115 1032 16 0 2155 8 0 + +In the output above, for vcpu0, there have been 6839 dispatches since +statistics were enabled. 4126 of those dispatches were on the same +physical cpu as the last time. 2683 were on a different core, but within +the same chip, while 30 dispatches were on a different chip compared to +its last dispatch. + +Also, out of the total of 6839 dispatches, we see that there have been +6821 dispatches on the vcpu's home node, while 18 dispatches were +outside its home node, on a neighbouring chip. diff --git a/Documentation/arch/powerpc/vmemmap_dedup.rst b/Documentation/arch/powerpc/vmemmap_dedup.rst new file mode 100644 index 0000000000..dc4db59fdf --- /dev/null +++ b/Documentation/arch/powerpc/vmemmap_dedup.rst @@ -0,0 +1,101 @@ +.. SPDX-License-Identifier: GPL-2.0 + +========== +Device DAX +========== + +The device-dax interface uses the tail deduplication technique explained in +Documentation/mm/vmemmap_dedup.rst + +On powerpc, vmemmap deduplication is only used with radix MMU translation. Also +with a 64K page size, only the devdax namespace with 1G alignment uses vmemmap +deduplication. + +With 2M PMD level mapping, we require 32 struct pages and a single 64K vmemmap +page can contain 1024 struct pages (64K/sizeof(struct page)). Hence there is no +vmemmap deduplication possible. + +With 1G PUD level mapping, we require 16384 struct pages and a single 64K +vmemmap page can contain 1024 struct pages (64K/sizeof(struct page)). Hence we +require 16 64K pages in vmemmap to map the struct page for 1G PUD level mapping. + +Here's how things look like on device-dax after the sections are populated:: + +-----------+ ---virt_to_page---> +-----------+ mapping to +-----------+ + | | | 0 | -------------> | 0 | + | | +-----------+ +-----------+ + | | | 1 | -------------> | 1 | + | | +-----------+ +-----------+ + | | | 2 | ----------------^ ^ ^ ^ ^ ^ + | | +-----------+ | | | | | + | | | 3 | ------------------+ | | | | + | | +-----------+ | | | | + | | | 4 | --------------------+ | | | + | PUD | +-----------+ | | | + | level | | . | ----------------------+ | | + | mapping | +-----------+ | | + | | | . | ------------------------+ | + | | +-----------+ | + | | | 15 | --------------------------+ + | | +-----------+ + | | + | | + | | + +-----------+ + + +With 4K page size, 2M PMD level mapping requires 512 struct pages and a single +4K vmemmap page contains 64 struct pages(4K/sizeof(struct page)). Hence we +require 8 4K pages in vmemmap to map the struct page for 2M pmd level mapping. + +Here's how things look like on device-dax after the sections are populated:: + + +-----------+ ---virt_to_page---> +-----------+ mapping to +-----------+ + | | | 0 | -------------> | 0 | + | | +-----------+ +-----------+ + | | | 1 | -------------> | 1 | + | | +-----------+ +-----------+ + | | | 2 | ----------------^ ^ ^ ^ ^ ^ + | | +-----------+ | | | | | + | | | 3 | ------------------+ | | | | + | | +-----------+ | | | | + | | | 4 | --------------------+ | | | + | PMD | +-----------+ | | | + | level | | 5 | ----------------------+ | | + | mapping | +-----------+ | | + | | | 6 | ------------------------+ | + | | +-----------+ | + | | | 7 | --------------------------+ + | | +-----------+ + | | + | | + | | + +-----------+ + +With 1G PUD level mapping, we require 262144 struct pages and a single 4K +vmemmap page can contain 64 struct pages (4K/sizeof(struct page)). Hence we +require 4096 4K pages in vmemmap to map the struct pages for 1G PUD level +mapping. + +Here's how things look like on device-dax after the sections are populated:: + + +-----------+ ---virt_to_page---> +-----------+ mapping to +-----------+ + | | | 0 | -------------> | 0 | + | | +-----------+ +-----------+ + | | | 1 | -------------> | 1 | + | | +-----------+ +-----------+ + | | | 2 | ----------------^ ^ ^ ^ ^ ^ + | | +-----------+ | | | | | + | | | 3 | ------------------+ | | | | + | | +-----------+ | | | | + | | | 4 | --------------------+ | | | + | PUD | +-----------+ | | | + | level | | . | ----------------------+ | | + | mapping | +-----------+ | | + | | | . | ------------------------+ | + | | +-----------+ | + | | | 4095 | --------------------------+ + | | +-----------+ + | | + | | + | | + +-----------+ diff --git a/Documentation/arch/riscv/acpi.rst b/Documentation/arch/riscv/acpi.rst new file mode 100644 index 0000000000..9870a28281 --- /dev/null +++ b/Documentation/arch/riscv/acpi.rst @@ -0,0 +1,10 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============== +ACPI on RISC-V +============== + +The ISA string parsing rules for ACPI are defined by `Version ASCIIDOC +Conversion, 12/2022 of the RISC-V specifications, as defined by tag +"riscv-isa-release-1239329-2023-05-23" (commit 1239329 +) `_ diff --git a/Documentation/arch/riscv/boot-image-header.rst b/Documentation/arch/riscv/boot-image-header.rst new file mode 100644 index 0000000000..df2ffc173e --- /dev/null +++ b/Documentation/arch/riscv/boot-image-header.rst @@ -0,0 +1,59 @@ +================================= +Boot image header in RISC-V Linux +================================= + +:Author: Atish Patra +:Date: 20 May 2019 + +This document only describes the boot image header details for RISC-V Linux. + +The following 64-byte header is present in decompressed Linux kernel image:: + + u32 code0; /* Executable code */ + u32 code1; /* Executable code */ + u64 text_offset; /* Image load offset, little endian */ + u64 image_size; /* Effective Image size, little endian */ + u64 flags; /* kernel flags, little endian */ + u32 version; /* Version of this header */ + u32 res1 = 0; /* Reserved */ + u64 res2 = 0; /* Reserved */ + u64 magic = 0x5643534952; /* Magic number, little endian, "RISCV" */ + u32 magic2 = 0x05435352; /* Magic number 2, little endian, "RSC\x05" */ + u32 res3; /* Reserved for PE COFF offset */ + +This header format is compliant with PE/COFF header and largely inspired from +ARM64 header. Thus, both ARM64 & RISC-V header can be combined into one common +header in future. + +Notes +===== + +- This header is also reused to support EFI stub for RISC-V. EFI specification + needs PE/COFF image header in the beginning of the kernel image in order to + load it as an EFI application. In order to support EFI stub, code0 is replaced + with "MZ" magic string and res3(at offset 0x3c) points to the rest of the + PE/COFF header. + +- version field indicate header version number + + ========== ============= + Bits 0:15 Minor version + Bits 16:31 Major version + ========== ============= + + This preserves compatibility across newer and older version of the header. + The current version is defined as 0.2. + +- The "magic" field is deprecated as of version 0.2. In a future + release, it may be removed. This originally should have matched up + with the ARM64 header "magic" field, but unfortunately does not. + The "magic2" field replaces it, matching up with the ARM64 header. + +- In current header, the flags field has only one field. + + ===== ==================================== + Bit 0 Kernel endianness. 1 if BE, 0 if LE. + ===== ==================================== + +- Image size is mandatory for boot loader to load kernel image. Booting will + fail otherwise. diff --git a/Documentation/arch/riscv/boot.rst b/Documentation/arch/riscv/boot.rst new file mode 100644 index 0000000000..6077b587a8 --- /dev/null +++ b/Documentation/arch/riscv/boot.rst @@ -0,0 +1,169 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============================================== +RISC-V Kernel Boot Requirements and Constraints +=============================================== + +:Author: Alexandre Ghiti +:Date: 23 May 2023 + +This document describes what the RISC-V kernel expects from bootloaders and +firmware, and also the constraints that any developer must have in mind when +touching the early boot process. For the purposes of this document, the +``early boot process`` refers to any code that runs before the final virtual +mapping is set up. + +Pre-kernel Requirements and Constraints +======================================= + +The RISC-V kernel expects the following of bootloaders and platform firmware: + +Register state +-------------- + +The RISC-V kernel expects: + + * ``$a0`` to contain the hartid of the current core. + * ``$a1`` to contain the address of the devicetree in memory. + +CSR state +--------- + +The RISC-V kernel expects: + + * ``$satp = 0``: the MMU, if present, must be disabled. + +Reserved memory for resident firmware +------------------------------------- + +The RISC-V kernel must not map any resident memory, or memory protected with +PMPs, in the direct mapping, so the firmware must correctly mark those regions +as per the devicetree specification and/or the UEFI specification. + +Kernel location +--------------- + +The RISC-V kernel expects to be placed at a PMD boundary (2MB aligned for rv64 +and 4MB aligned for rv32). Note that the EFI stub will physically relocate the +kernel if that's not the case. + +Hardware description +-------------------- + +The firmware can pass either a devicetree or ACPI tables to the RISC-V kernel. + +The devicetree is either passed directly to the kernel from the previous stage +using the ``$a1`` register, or when booting with UEFI, it can be passed using the +EFI configuration table. + +The ACPI tables are passed to the kernel using the EFI configuration table. In +this case, a tiny devicetree is still created by the EFI stub. Please refer to +"EFI stub and devicetree" section below for details about this devicetree. + +Kernel entry +------------ + +On SMP systems, there are 2 methods to enter the kernel: + +- ``RISCV_BOOT_SPINWAIT``: the firmware releases all harts in the kernel, one hart + wins a lottery and executes the early boot code while the other harts are + parked waiting for the initialization to finish. This method is mostly used to + support older firmwares without SBI HSM extension and M-mode RISC-V kernel. +- ``Ordered booting``: the firmware releases only one hart that will execute the + initialization phase and then will start all other harts using the SBI HSM + extension. The ordered booting method is the preferred booting method for + booting the RISC-V kernel because it can support CPU hotplug and kexec. + +UEFI +---- + +UEFI memory map +~~~~~~~~~~~~~~~ + +When booting with UEFI, the RISC-V kernel will use only the EFI memory map to +populate the system memory. + +The UEFI firmware must parse the subnodes of the ``/reserved-memory`` devicetree +node and abide by the devicetree specification to convert the attributes of +those subnodes (``no-map`` and ``reusable``) into their correct EFI equivalent +(refer to section "3.5.4 /reserved-memory and UEFI" of the devicetree +specification v0.4-rc1). + +RISCV_EFI_BOOT_PROTOCOL +~~~~~~~~~~~~~~~~~~~~~~~ + +When booting with UEFI, the EFI stub requires the boot hartid in order to pass +it to the RISC-V kernel in ``$a1``. The EFI stub retrieves the boot hartid using +one of the following methods: + +- ``RISCV_EFI_BOOT_PROTOCOL`` (**preferred**). +- ``boot-hartid`` devicetree subnode (**deprecated**). + +Any new firmware must implement ``RISCV_EFI_BOOT_PROTOCOL`` as the devicetree +based approach is deprecated now. + +Early Boot Requirements and Constraints +======================================= + +The RISC-V kernel's early boot process operates under the following constraints: + +EFI stub and devicetree +----------------------- + +When booting with UEFI, the devicetree is supplemented (or created) by the EFI +stub with the same parameters as arm64 which are described at the paragraph +"UEFI kernel support on ARM" in Documentation/arch/arm/uefi.rst. + +Virtual mapping installation +---------------------------- + +The installation of the virtual mapping is done in 2 steps in the RISC-V kernel: + +1. ``setup_vm()`` installs a temporary kernel mapping in ``early_pg_dir`` which + allows discovery of the system memory. Only the kernel text/data are mapped + at this point. When establishing this mapping, no allocation can be done + (since the system memory is not known yet), so ``early_pg_dir`` page table is + statically allocated (using only one table for each level). + +2. ``setup_vm_final()`` creates the final kernel mapping in ``swapper_pg_dir`` + and takes advantage of the discovered system memory to create the linear + mapping. When establishing this mapping, the kernel can allocate memory but + cannot access it directly (since the direct mapping is not present yet), so + it uses temporary mappings in the fixmap region to be able to access the + newly allocated page table levels. + +For ``virt_to_phys()`` and ``phys_to_virt()`` to be able to correctly convert +direct mapping addresses to physical addresses, they need to know the start of +the DRAM. This happens after step 1, right before step 2 installs the direct +mapping (see ``setup_bootmem()`` function in arch/riscv/mm/init.c). Any usage of +those macros before the final virtual mapping is installed must be carefully +examined. + +Devicetree mapping via fixmap +----------------------------- + +As the ``reserved_mem`` array is initialized with virtual addresses established +by ``setup_vm()``, and used with the mapping established by +``setup_vm_final()``, the RISC-V kernel uses the fixmap region to map the +devicetree. This ensures that the devicetree remains accessible by both virtual +mappings. + +Pre-MMU execution +----------------- + +A few pieces of code need to run before even the first virtual mapping is +established. These are the installation of the first virtual mapping itself, +patching of early alternatives and the early parsing of the kernel command line. +That code must be very carefully compiled as: + +- ``-fno-pie``: This is needed for relocatable kernels which use ``-fPIE``, + since otherwise, any access to a global symbol would go through the GOT which + is only relocated virtually. +- ``-mcmodel=medany``: Any access to a global symbol must be PC-relative to + avoid any relocations to happen before the MMU is setup. +- *all* instrumentation must also be disabled (that includes KASAN, ftrace and + others). + +As using a symbol from a different compilation unit requires this unit to be +compiled with those flags, we advise, as much as possible, not to use external +symbols. diff --git a/Documentation/arch/riscv/features.rst b/Documentation/arch/riscv/features.rst new file mode 100644 index 0000000000..36e90144ad --- /dev/null +++ b/Documentation/arch/riscv/features.rst @@ -0,0 +1,3 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. kernel-feat:: features riscv diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst new file mode 100644 index 0000000000..7b2384de47 --- /dev/null +++ b/Documentation/arch/riscv/hwprobe.rst @@ -0,0 +1,104 @@ +.. SPDX-License-Identifier: GPL-2.0 + +RISC-V Hardware Probing Interface +--------------------------------- + +The RISC-V hardware probing interface is based around a single syscall, which +is defined in :: + + struct riscv_hwprobe { + __s64 key; + __u64 value; + }; + + long sys_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count, + size_t cpu_count, cpu_set_t *cpus, + unsigned int flags); + +The arguments are split into three groups: an array of key-value pairs, a CPU +set, and some flags. The key-value pairs are supplied with a count. Userspace +must prepopulate the key field for each element, and the kernel will fill in the +value if the key is recognized. If a key is unknown to the kernel, its key field +will be cleared to -1, and its value set to 0. The CPU set is defined by +CPU_SET(3). For value-like keys (eg. vendor/arch/impl), the returned value will +be only be valid if all CPUs in the given set have the same value. Otherwise -1 +will be returned. For boolean-like keys, the value returned will be a logical +AND of the values for the specified CPUs. Usermode can supply NULL for cpus and +0 for cpu_count as a shortcut for all online CPUs. There are currently no flags, +this value must be zero for future compatibility. + +On success 0 is returned, on failure a negative error code is returned. + +The following keys are defined: + +* :c:macro:`RISCV_HWPROBE_KEY_MVENDORID`: Contains the value of ``mvendorid``, + as defined by the RISC-V privileged architecture specification. + +* :c:macro:`RISCV_HWPROBE_KEY_MARCHID`: Contains the value of ``marchid``, as + defined by the RISC-V privileged architecture specification. + +* :c:macro:`RISCV_HWPROBE_KEY_MIMPLID`: Contains the value of ``mimplid``, as + defined by the RISC-V privileged architecture specification. + +* :c:macro:`RISCV_HWPROBE_KEY_BASE_BEHAVIOR`: A bitmask containing the base + user-visible behavior that this kernel supports. The following base user ABIs + are defined: + + * :c:macro:`RISCV_HWPROBE_BASE_BEHAVIOR_IMA`: Support for rv32ima or + rv64ima, as defined by version 2.2 of the user ISA and version 1.10 of the + privileged ISA, with the following known exceptions (more exceptions may be + added, but only if it can be demonstrated that the user ABI is not broken): + + * The ``fence.i`` instruction cannot be directly executed by userspace + programs (it may still be executed in userspace via a + kernel-controlled mechanism such as the vDSO). + +* :c:macro:`RISCV_HWPROBE_KEY_IMA_EXT_0`: A bitmask containing the extensions + that are compatible with the :c:macro:`RISCV_HWPROBE_BASE_BEHAVIOR_IMA`: + base system behavior. + + * :c:macro:`RISCV_HWPROBE_IMA_FD`: The F and D extensions are supported, as + defined by commit cd20cee ("FMIN/FMAX now implement + minimumNumber/maximumNumber, not minNum/maxNum") of the RISC-V ISA manual. + + * :c:macro:`RISCV_HWPROBE_IMA_C`: The C extension is supported, as defined + by version 2.2 of the RISC-V ISA manual. + + * :c:macro:`RISCV_HWPROBE_IMA_V`: The V extension is supported, as defined by + version 1.0 of the RISC-V Vector extension manual. + + * :c:macro:`RISCV_HWPROBE_EXT_ZBA`: The Zba address generation extension is + supported, as defined in version 1.0 of the Bit-Manipulation ISA + extensions. + + * :c:macro:`RISCV_HWPROBE_EXT_ZBB`: The Zbb extension is supported, as defined + in version 1.0 of the Bit-Manipulation ISA extensions. + + * :c:macro:`RISCV_HWPROBE_EXT_ZBS`: The Zbs extension is supported, as defined + in version 1.0 of the Bit-Manipulation ISA extensions. + + * :c:macro:`RISCV_HWPROBE_EXT_ZICBOZ`: The Zicboz extension is supported, as + ratified in commit 3dd606f ("Create cmobase-v1.0.pdf") of riscv-CMOs. + +* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance + information about the selected set of processors. + + * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNKNOWN`: The performance of misaligned + accesses is unknown. + + * :c:macro:`RISCV_HWPROBE_MISALIGNED_EMULATED`: Misaligned accesses are + emulated via software, either in or below the kernel. These accesses are + always extremely slow. + + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SLOW`: Misaligned accesses are slower + than equivalent byte accesses. Misaligned accesses may be supported + directly in hardware, or trapped and emulated by software. + + * :c:macro:`RISCV_HWPROBE_MISALIGNED_FAST`: Misaligned accesses are faster + than equivalent byte accesses. + + * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNSUPPORTED`: Misaligned accesses are + not supported at all and will generate a misaligned address fault. + +* :c:macro:`RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE`: An unsigned int which + represents the size of the Zicboz block in bytes. diff --git a/Documentation/arch/riscv/index.rst b/Documentation/arch/riscv/index.rst new file mode 100644 index 0000000000..4dab0cb4b9 --- /dev/null +++ b/Documentation/arch/riscv/index.rst @@ -0,0 +1,24 @@ +=================== +RISC-V architecture +=================== + +.. toctree:: + :maxdepth: 1 + + acpi + boot + boot-image-header + vm-layout + hwprobe + patch-acceptance + uabi + vector + + features + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/arch/riscv/patch-acceptance.rst b/Documentation/arch/riscv/patch-acceptance.rst new file mode 100644 index 0000000000..634aa222b4 --- /dev/null +++ b/Documentation/arch/riscv/patch-acceptance.rst @@ -0,0 +1,59 @@ +.. SPDX-License-Identifier: GPL-2.0 + +arch/riscv maintenance guidelines for developers +================================================ + +Overview +-------- +The RISC-V instruction set architecture is developed in the open: +in-progress drafts are available for all to review and to experiment +with implementations. New module or extension drafts can change +during the development process - sometimes in ways that are +incompatible with previous drafts. This flexibility can present a +challenge for RISC-V Linux maintenance. Linux maintainers disapprove +of churn, and the Linux development process prefers well-reviewed and +tested code over experimental code. We wish to extend these same +principles to the RISC-V-related code that will be accepted for +inclusion in the kernel. + +Patchwork +--------- + +RISC-V has a patchwork instance, where the status of patches can be checked: + + https://patchwork.kernel.org/project/linux-riscv/list/ + +If your patch does not appear in the default view, the RISC-V maintainers have +likely either requested changes, or expect it to be applied to another tree. + +Automation runs against this patchwork instance, building/testing patches as +they arrive. The automation applies patches against the current HEAD of the +RISC-V `for-next` and `fixes` branches, depending on whether the patch has been +detected as a fix. Failing those, it will use the RISC-V `master` branch. +The exact commit to which a series has been applied will be noted on patchwork. +Patches for which any of the checks fail are unlikely to be applied and in most +cases will need to be resubmitted. + +Submit Checklist Addendum +------------------------- +We'll only accept patches for new modules or extensions if the +specifications for those modules or extensions are listed as being +unlikely to be incompatibly changed in the future. For +specifications from the RISC-V foundation this means "Frozen" or +"Ratified", for the UEFI forum specifications this means a published +ECR. (Developers may, of course, maintain their own Linux kernel trees +that contain code for any draft extensions that they wish.) + +Additionally, the RISC-V specification allows implementers to create +their own custom extensions. These custom extensions aren't required +to go through any review or ratification process by the RISC-V +Foundation. To avoid the maintenance complexity and potential +performance impact of adding kernel code for implementor-specific +RISC-V extensions, we'll only consider patches for extensions that either: + +- Have been officially frozen or ratified by the RISC-V Foundation, or +- Have been implemented in hardware that is widely available, per standard + Linux practice. + +(Implementers, may, of course, maintain their own Linux kernel trees containing +code for any custom extensions that they wish.) diff --git a/Documentation/arch/riscv/uabi.rst b/Documentation/arch/riscv/uabi.rst new file mode 100644 index 0000000000..54d199dce7 --- /dev/null +++ b/Documentation/arch/riscv/uabi.rst @@ -0,0 +1,68 @@ +.. SPDX-License-Identifier: GPL-2.0 + +RISC-V Linux User ABI +===================== + +ISA string ordering in /proc/cpuinfo +------------------------------------ + +The canonical order of ISA extension names in the ISA string is defined in +chapter 27 of the unprivileged specification. +The specification uses vague wording, such as should, when it comes to ordering, +so for our purposes the following rules apply: + +#. Single-letter extensions come first, in canonical order. + The canonical order is "IMAFDQLCBKJTPVH". + +#. All multi-letter extensions will be separated from other extensions by an + underscore. + +#. Additional standard extensions (starting with 'Z') will be sorted after + single-letter extensions and before any higher-privileged extensions. + +#. For additional standard extensions, the first letter following the 'Z' + conventionally indicates the most closely related alphabetical + extension category. If multiple 'Z' extensions are named, they will be + ordered first by category, in canonical order, as listed above, then + alphabetically within a category. + +#. Standard supervisor-level extensions (starting with 'S') will be listed + after standard unprivileged extensions. If multiple supervisor-level + extensions are listed, they will be ordered alphabetically. + +#. Standard machine-level extensions (starting with 'Zxm') will be listed + after any lower-privileged, standard extensions. If multiple machine-level + extensions are listed, they will be ordered alphabetically. + +#. Non-standard extensions (starting with 'X') will be listed after all standard + extensions. If multiple non-standard extensions are listed, they will be + ordered alphabetically. + +An example string following the order is:: + + rv64imadc_zifoo_zigoo_zafoo_sbar_scar_zxmbaz_xqux_xrux + +"isa" and "hart isa" lines in /proc/cpuinfo +------------------------------------------- + +The "isa" line in /proc/cpuinfo describes the lowest common denominator of +RISC-V ISA extensions recognized by the kernel and implemented on all harts. The +"hart isa" line, in contrast, describes the set of extensions recognized by the +kernel on the particular hart being described, even if those extensions may not +be present on all harts in the system. + +In both lines, the presence of an extension guarantees only that the hardware +has the described capability. Additional kernel support or policy changes may be +required before an extension's capability is fully usable by userspace programs. +Similarly, for S-mode extensions, presence in one of these lines does not +guarantee that the kernel is taking advantage of the extension, or that the +feature will be visible in guest VMs managed by this kernel. + +Inversely, the absence of an extension in these lines does not necessarily mean +the hardware does not support that feature. The running kernel may not recognize +the extension, or may have deliberately removed it from the listing. + +Misaligned accesses +------------------- + +Misaligned accesses are supported in userspace, but they may perform poorly. diff --git a/Documentation/arch/riscv/vector.rst b/Documentation/arch/riscv/vector.rst new file mode 100644 index 0000000000..75dd88a62e --- /dev/null +++ b/Documentation/arch/riscv/vector.rst @@ -0,0 +1,140 @@ +.. SPDX-License-Identifier: GPL-2.0 + +========================================= +Vector Extension Support for RISC-V Linux +========================================= + +This document briefly outlines the interface provided to userspace by Linux in +order to support the use of the RISC-V Vector Extension. + +1. prctl() Interface +--------------------- + +Two new prctl() calls are added to allow programs to manage the enablement +status for the use of Vector in userspace. The intended usage guideline for +these interfaces is to give init systems a way to modify the availability of V +for processes running under its domain. Calling these interfaces is not +recommended in libraries routines because libraries should not override policies +configured from the parant process. Also, users must noted that these interfaces +are not portable to non-Linux, nor non-RISC-V environments, so it is discourage +to use in a portable code. To get the availability of V in an ELF program, +please read :c:macro:`COMPAT_HWCAP_ISA_V` bit of :c:macro:`ELF_HWCAP` in the +auxiliary vector. + +* prctl(PR_RISCV_V_SET_CONTROL, unsigned long arg) + + Sets the Vector enablement status of the calling thread, where the control + argument consists of two 2-bit enablement statuses and a bit for inheritance + mode. Other threads of the calling process are unaffected. + + Enablement status is a tri-state value each occupying 2-bit of space in + the control argument: + + * :c:macro:`PR_RISCV_V_VSTATE_CTRL_DEFAULT`: Use the system-wide default + enablement status on execve(). The system-wide default setting can be + controlled via sysctl interface (see sysctl section below). + + * :c:macro:`PR_RISCV_V_VSTATE_CTRL_ON`: Allow Vector to be run for the + thread. + + * :c:macro:`PR_RISCV_V_VSTATE_CTRL_OFF`: Disallow Vector. Executing Vector + instructions under such condition will trap and casuse the termination of the thread. + + arg: The control argument is a 5-bit value consisting of 3 parts, and + accessed by 3 masks respectively. + + The 3 masks, PR_RISCV_V_VSTATE_CTRL_CUR_MASK, + PR_RISCV_V_VSTATE_CTRL_NEXT_MASK, and PR_RISCV_V_VSTATE_CTRL_INHERIT + represents bit[1:0], bit[3:2], and bit[4]. bit[1:0] accounts for the + enablement status of current thread, and the setting at bit[3:2] takes place + at next execve(). bit[4] defines the inheritance mode of the setting in + bit[3:2]. + + * :c:macro:`PR_RISCV_V_VSTATE_CTRL_CUR_MASK`: bit[1:0]: Account for the + Vector enablement status for the calling thread. The calling thread is + not able to turn off Vector once it has been enabled. The prctl() call + fails with EPERM if the value in this mask is PR_RISCV_V_VSTATE_CTRL_OFF + but the current enablement status is not off. Setting + PR_RISCV_V_VSTATE_CTRL_DEFAULT here takes no effect but to set back + the original enablement status. + + * :c:macro:`PR_RISCV_V_VSTATE_CTRL_NEXT_MASK`: bit[3:2]: Account for the + Vector enablement setting for the calling thread at the next execve() + system call. If PR_RISCV_V_VSTATE_CTRL_DEFAULT is used in this mask, + then the enablement status will be decided by the system-wide + enablement status when execve() happen. + + * :c:macro:`PR_RISCV_V_VSTATE_CTRL_INHERIT`: bit[4]: the inheritance + mode for the setting at PR_RISCV_V_VSTATE_CTRL_NEXT_MASK. If the bit + is set then the following execve() will not clear the setting in both + PR_RISCV_V_VSTATE_CTRL_NEXT_MASK and PR_RISCV_V_VSTATE_CTRL_INHERIT. + This setting persists across changes in the system-wide default value. + + Return value: + * 0 on success; + * EINVAL: Vector not supported, invalid enablement status for current or + next mask; + * EPERM: Turning off Vector in PR_RISCV_V_VSTATE_CTRL_CUR_MASK if Vector + was enabled for the calling thread. + + On success: + * A valid setting for PR_RISCV_V_VSTATE_CTRL_CUR_MASK takes place + immediately. The enablement status specified in + PR_RISCV_V_VSTATE_CTRL_NEXT_MASK happens at the next execve() call, or + all following execve() calls if PR_RISCV_V_VSTATE_CTRL_INHERIT bit is + set. + * Every successful call overwrites a previous setting for the calling + thread. + +* prctl(PR_RISCV_V_GET_CONTROL) + + Gets the same Vector enablement status for the calling thread. Setting for + next execve() call and the inheritance bit are all OR-ed together. + + Note that ELF programs are able to get the availability of V for itself by + reading :c:macro:`COMPAT_HWCAP_ISA_V` bit of :c:macro:`ELF_HWCAP` in the + auxiliary vector. + + Return value: + * a nonnegative value on success; + * EINVAL: Vector not supported. + +2. System runtime configuration (sysctl) +----------------------------------------- + +To mitigate the ABI impact of expansion of the signal stack, a +policy mechanism is provided to the administrators, distro maintainers, and +developers to control the default Vector enablement status for userspace +processes in form of sysctl knob: + +* /proc/sys/abi/riscv_v_default_allow + + Writing the text representation of 0 or 1 to this file sets the default + system enablement status for new starting userspace programs. Valid values + are: + + * 0: Do not allow Vector code to be executed as the default for new processes. + * 1: Allow Vector code to be executed as the default for new processes. + + Reading this file returns the current system default enablement status. + + At every execve() call, a new enablement status of the new process is set to + the system default, unless: + + * PR_RISCV_V_VSTATE_CTRL_INHERIT is set for the calling process, and the + setting in PR_RISCV_V_VSTATE_CTRL_NEXT_MASK is not + PR_RISCV_V_VSTATE_CTRL_DEFAULT. Or, + + * The setting in PR_RISCV_V_VSTATE_CTRL_NEXT_MASK is not + PR_RISCV_V_VSTATE_CTRL_DEFAULT. + + Modifying the system default enablement status does not affect the enablement + status of any existing process of thread that do not make an execve() call. + +3. Vector Register State Across System Calls +--------------------------------------------- + +As indicated by version 1.0 of the V extension [1], vector registers are +clobbered by system calls. + +1: https://github.com/riscv/riscv-v-spec/blob/master/calling-convention.adoc diff --git a/Documentation/arch/riscv/vm-layout.rst b/Documentation/arch/riscv/vm-layout.rst new file mode 100644 index 0000000000..69ff6da1db --- /dev/null +++ b/Documentation/arch/riscv/vm-layout.rst @@ -0,0 +1,157 @@ +.. SPDX-License-Identifier: GPL-2.0 + +===================================== +Virtual Memory Layout on RISC-V Linux +===================================== + +:Author: Alexandre Ghiti +:Date: 12 February 2021 + +This document describes the virtual memory layout used by the RISC-V Linux +Kernel. + +RISC-V Linux Kernel 32bit +========================= + +RISC-V Linux Kernel SV32 +------------------------ + +TODO + +RISC-V Linux Kernel 64bit +========================= + +The RISC-V privileged architecture document states that the 64bit addresses +"must have bits 63–48 all equal to bit 47, or else a page-fault exception will +occur.": that splits the virtual address space into 2 halves separated by a very +big hole, the lower half is where the userspace resides, the upper half is where +the RISC-V Linux Kernel resides. + +RISC-V Linux Kernel SV39 +------------------------ + +:: + + ======================================================================================================================== + Start addr | Offset | End addr | Size | VM area description + ======================================================================================================================== + | | | | + 0000000000000000 | 0 | 0000003fffffffff | 256 GB | user-space virtual memory, different per mm + __________________|____________|__________________|_________|___________________________________________________________ + | | | | + 0000004000000000 | +256 GB | ffffffbfffffffff | ~16M TB | ... huge, almost 64 bits wide hole of non-canonical + | | | | virtual memory addresses up to the -256 GB + | | | | starting offset of kernel mappings. + __________________|____________|__________________|_________|___________________________________________________________ + | + | Kernel-space virtual memory, shared between all processes: + ____________________________________________________________|___________________________________________________________ + | | | | + ffffffc6fea00000 | -228 GB | ffffffc6feffffff | 6 MB | fixmap + ffffffc6ff000000 | -228 GB | ffffffc6ffffffff | 16 MB | PCI io + ffffffc700000000 | -228 GB | ffffffc7ffffffff | 4 GB | vmemmap + ffffffc800000000 | -224 GB | ffffffd7ffffffff | 64 GB | vmalloc/ioremap space + ffffffd800000000 | -160 GB | fffffff6ffffffff | 124 GB | direct mapping of all physical memory + fffffff700000000 | -36 GB | fffffffeffffffff | 32 GB | kasan + __________________|____________|__________________|_________|____________________________________________________________ + | + | + ____________________________________________________________|____________________________________________________________ + | | | | + ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules, BPF + ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel + __________________|____________|__________________|_________|____________________________________________________________ + + +RISC-V Linux Kernel SV48 +------------------------ + +:: + + ======================================================================================================================== + Start addr | Offset | End addr | Size | VM area description + ======================================================================================================================== + | | | | + 0000000000000000 | 0 | 00007fffffffffff | 128 TB | user-space virtual memory, different per mm + __________________|____________|__________________|_________|___________________________________________________________ + | | | | + 0000800000000000 | +128 TB | ffff7fffffffffff | ~16M TB | ... huge, almost 64 bits wide hole of non-canonical + | | | | virtual memory addresses up to the -128 TB + | | | | starting offset of kernel mappings. + __________________|____________|__________________|_________|___________________________________________________________ + | + | Kernel-space virtual memory, shared between all processes: + ____________________________________________________________|___________________________________________________________ + | | | | + ffff8d7ffea00000 | -114.5 TB | ffff8d7ffeffffff | 6 MB | fixmap + ffff8d7fff000000 | -114.5 TB | ffff8d7fffffffff | 16 MB | PCI io + ffff8d8000000000 | -114.5 TB | ffff8f7fffffffff | 2 TB | vmemmap + ffff8f8000000000 | -112.5 TB | ffffaf7fffffffff | 32 TB | vmalloc/ioremap space + ffffaf8000000000 | -80.5 TB | ffffef7fffffffff | 64 TB | direct mapping of all physical memory + ffffef8000000000 | -16.5 TB | fffffffeffffffff | 16.5 TB | kasan + __________________|____________|__________________|_________|____________________________________________________________ + | + | Identical layout to the 39-bit one from here on: + ____________________________________________________________|____________________________________________________________ + | | | | + ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules, BPF + ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel + __________________|____________|__________________|_________|____________________________________________________________ + + +RISC-V Linux Kernel SV57 +------------------------ + +:: + + ======================================================================================================================== + Start addr | Offset | End addr | Size | VM area description + ======================================================================================================================== + | | | | + 0000000000000000 | 0 | 00ffffffffffffff | 64 PB | user-space virtual memory, different per mm + __________________|____________|__________________|_________|___________________________________________________________ + | | | | + 0100000000000000 | +64 PB | feffffffffffffff | ~16K PB | ... huge, almost 64 bits wide hole of non-canonical + | | | | virtual memory addresses up to the -64 PB + | | | | starting offset of kernel mappings. + __________________|____________|__________________|_________|___________________________________________________________ + | + | Kernel-space virtual memory, shared between all processes: + ____________________________________________________________|___________________________________________________________ + | | | | + ff1bfffffea00000 | -57 PB | ff1bfffffeffffff | 6 MB | fixmap + ff1bffffff000000 | -57 PB | ff1bffffffffffff | 16 MB | PCI io + ff1c000000000000 | -57 PB | ff1fffffffffffff | 1 PB | vmemmap + ff20000000000000 | -56 PB | ff5fffffffffffff | 16 PB | vmalloc/ioremap space + ff60000000000000 | -40 PB | ffdeffffffffffff | 32 PB | direct mapping of all physical memory + ffdf000000000000 | -8 PB | fffffffeffffffff | 8 PB | kasan + __________________|____________|__________________|_________|____________________________________________________________ + | + | Identical layout to the 39-bit one from here on: + ____________________________________________________________|____________________________________________________________ + | | | | + ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules, BPF + ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel + __________________|____________|__________________|_________|____________________________________________________________ + + +Userspace VAs +-------------------- +To maintain compatibility with software that relies on the VA space with a +maximum of 48 bits the kernel will, by default, return virtual addresses to +userspace from a 48-bit range (sv48). This default behavior is achieved by +passing 0 into the hint address parameter of mmap. On CPUs with an address space +smaller than sv48, the CPU maximum supported address space will be the default. + +Software can "opt-in" to receiving VAs from another VA space by providing +a hint address to mmap. A hint address passed to mmap will cause the largest +address space that fits entirely into the hint to be used, unless there is no +space left in the address space. If there is no space available in the requested +address space, an address in the next smallest available address space will be +returned. + +For example, in order to obtain 48-bit VA space, a hint address greater than +:code:`1 << 47` must be provided. Note that this is 47 due to sv48 userspace +ending at :code:`1 << 47` and the addresses beyond this are reserved for the +kernel. Similarly, to obtain 57-bit VA space addresses, a hint address greater +than or equal to :code:`1 << 56` must be provided. diff --git a/Documentation/arch/sh/index.rst b/Documentation/arch/sh/index.rst index c64776738c..01fce7c131 100644 --- a/Documentation/arch/sh/index.rst +++ b/Documentation/arch/sh/index.rst @@ -43,12 +43,6 @@ mach-x3proto Busses ====== -SuperHyway ----------- - -.. kernel-doc:: drivers/sh/superhyway/superhyway.c - :export: - Maple ----- diff --git a/Documentation/arch/x86/amd-memory-encryption.rst b/Documentation/arch/x86/amd-memory-encryption.rst index 934310ce72..07caa8fff8 100644 --- a/Documentation/arch/x86/amd-memory-encryption.rst +++ b/Documentation/arch/x86/amd-memory-encryption.rst @@ -130,4 +130,4 @@ SNP feature support. More details in AMD64 APM[1] Vol 2: 15.34.10 SEV_STATUS MSR -[1] https://www.amd.com/system/files/TechDocs/40332.pdf +[1] https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/24593.pdf diff --git a/Documentation/arch/x86/amd_hsmp.rst b/Documentation/arch/x86/amd_hsmp.rst index 440e4b645a..c92bfd5535 100644 --- a/Documentation/arch/x86/amd_hsmp.rst +++ b/Documentation/arch/x86/amd_hsmp.rst @@ -41,6 +41,24 @@ In-kernel integration: * Locking across callers is taken care by the driver. +HSMP sysfs interface +==================== + +1. Metrics table binary sysfs + +AMD MI300A MCM provides GET_METRICS_TABLE message to retrieve +most of the system management information from SMU in one go. + +The metrics table is made available as hexadecimal sysfs binary file +under per socket sysfs directory created at +/sys/devices/platform/amd_hsmp/socket%d/metrics_bin + +Note: lseek() is not supported as entire metrics table is read. + +Metrics table definitions will be documented as part of Public PPR. +The same is defined in the amd_hsmp.h header. + + An example ========== diff --git a/Documentation/arch/x86/boot.rst b/Documentation/arch/x86/boot.rst index f5d2f2414d..22cc7a040d 100644 --- a/Documentation/arch/x86/boot.rst +++ b/Documentation/arch/x86/boot.rst @@ -77,7 +77,7 @@ Protocol 2.14 BURNT BY INCORRECT COMMIT Protocol 2.15 (Kernel 5.5) Added the kernel_info and kernel_info.setup_type_max. ============= ============================================================ -.. note:: + .. note:: The protocol version number should be changed only if the setup header is changed. There is no need to update the version number if boot_params or kernel_info are changed. Additionally, it is recommended to use diff --git a/Documentation/arch/x86/iommu.rst b/Documentation/arch/x86/iommu.rst index 42c7a6faa3..41fbadfe22 100644 --- a/Documentation/arch/x86/iommu.rst +++ b/Documentation/arch/x86/iommu.rst @@ -5,7 +5,7 @@ x86 IOMMU Support The architecture specs can be obtained from the below locations. - Intel: http://www.intel.com/content/dam/www/public/us/en/documents/product-specifications/vt-directed-io-spec.pdf -- AMD: https://www.amd.com/system/files/TechDocs/48882_IOMMU.pdf +- AMD: https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/specifications/48882_3_07_PUB.pdf This guide gives a quick cheat sheet for some basic understanding. diff --git a/Documentation/arch/x86/resctrl.rst b/Documentation/arch/x86/resctrl.rst index cb05d90111..a6279df64a 100644 --- a/Documentation/arch/x86/resctrl.rst +++ b/Documentation/arch/x86/resctrl.rst @@ -35,7 +35,7 @@ about the feature from resctrl's info directory. To use the feature mount the file system:: - # mount -t resctrl resctrl [-o cdp[,cdpl2][,mba_MBps]] /sys/fs/resctrl + # mount -t resctrl resctrl [-o cdp[,cdpl2][,mba_MBps][,debug]] /sys/fs/resctrl mount options are: @@ -46,6 +46,9 @@ mount options are: "mba_MBps": Enable the MBA Software Controller(mba_sc) to specify MBA bandwidth in MBps +"debug": + Make debug files accessible. Available debug files are annotated with + "Available only with debug option". L2 and L3 CDP are controlled separately. @@ -124,6 +127,13 @@ related to allocation: "P": Corresponding region is pseudo-locked. No sharing allowed. +"sparse_masks": + Indicates if non-contiguous 1s value in CBM is supported. + + "0": + Only contiguous 1s value in CBM is supported. + "1": + Non-contiguous 1s value in CBM is supported. Memory bandwidth(MB) subdirectory contains the following files with respect to allocation: @@ -299,7 +309,14 @@ All groups contain the following files: "tasks": Reading this file shows the list of all tasks that belong to this group. Writing a task id to the file will add a task to the - group. If the group is a CTRL_MON group the task is removed from + group. Multiple tasks can be added by separating the task ids + with commas. Tasks will be assigned sequentially. Multiple + failures are not supported. A single failure encountered while + attempting to assign a task will cause the operation to abort and + already added tasks before the failure will remain in the group. + Failures will be logged to /sys/fs/resctrl/info/last_cmd_status. + + If the group is a CTRL_MON group the task is removed from whichever previous CTRL_MON group owned the task and also from any MON group that owned the task. If the group is a MON group, then the task must already belong to the CTRL_MON parent of this @@ -342,6 +359,10 @@ When control is enabled all CTRL_MON groups will also contain: file. On successful pseudo-locked region creation the mode will automatically change to "pseudo-locked". +"ctrl_hw_id": + Available only with debug option. The identifier used by hardware + for the control group. On x86 this is the CLOSID. + When monitoring is enabled all MON groups will also contain: "mon_data": @@ -355,6 +376,10 @@ When monitoring is enabled all MON groups will also contain: the sum for all tasks in the CTRL_MON group and all tasks in MON groups. Please see example section for more details on usage. +"mon_hw_id": + Available only with debug option. The identifier used by hardware + for the monitor group. On x86 this is the RMID. + Resource allocation rules ------------------------- @@ -445,12 +470,13 @@ For cache resources we describe the portion of the cache that is available for allocation using a bitmask. The maximum value of the mask is defined by each cpu model (and may be different for different cache levels). It is found using CPUID, but is also provided in the "info" directory of -the resctrl file system in "info/{resource}/cbm_mask". Intel hardware +the resctrl file system in "info/{resource}/cbm_mask". Some Intel hardware requires that these masks have all the '1' bits in a contiguous block. So 0x3, 0x6 and 0xC are legal 4-bit masks with two bits set, but 0x5, 0x9 -and 0xA are not. On a system with a 20-bit mask each bit represents 5% -of the capacity of the cache. You could partition the cache into four -equal parts with masks: 0x1f, 0x3e0, 0x7c00, 0xf8000. +and 0xA are not. Check /sys/fs/resctrl/info/{resource}/sparse_masks +if non-contiguous 1s value is supported. On a system with a 20-bit mask +each bit represents 5% of the capacity of the cache. You could partition +the cache into four equal parts with masks: 0x1f, 0x3e0, 0x7c00, 0xf8000. Memory bandwidth Allocation and monitoring ========================================== diff --git a/Documentation/arch/x86/topology.rst b/Documentation/arch/x86/topology.rst index 7f58010ea8..08ebf9edbf 100644 --- a/Documentation/arch/x86/topology.rst +++ b/Documentation/arch/x86/topology.rst @@ -55,19 +55,19 @@ Package-related topology information in the kernel: The number of dies in a package. This information is retrieved via CPUID. - - cpuinfo_x86.cpu_die_id: + - cpuinfo_x86.topo.die_id: The physical ID of the die. This information is retrieved via CPUID. - - cpuinfo_x86.phys_proc_id: + - cpuinfo_x86.topo.pkg_id: The physical ID of the package. This information is retrieved via CPUID and deduced from the APIC IDs of the cores in the package. Modern systems use this value for the socket. There may be multiple - packages within a socket. This value may differ from cpu_die_id. + packages within a socket. This value may differ from topo.die_id. - - cpuinfo_x86.logical_proc_id: + - cpuinfo_x86.topo.logical_pkg_id: The logical ID of the package. As we do not trust BIOSes to enumerate the packages in a consistent way, we introduced the concept of logical package @@ -79,9 +79,7 @@ Package-related topology information in the kernel: The maximum possible number of packages in the system. Helpful for per package facilities to preallocate per package information. - - cpu_llc_id: - - A per-CPU variable containing: + - cpuinfo_x86.topo.llc_id: - On Intel, the first APIC ID of the list of CPUs sharing the Last Level Cache diff --git a/Documentation/block/blk-mq.rst b/Documentation/block/blk-mq.rst index 31f52f3269..fc06761b6e 100644 --- a/Documentation/block/blk-mq.rst +++ b/Documentation/block/blk-mq.rst @@ -56,7 +56,7 @@ sent to the software queue. Then, after the requests are processed by software queues, they will be placed at the hardware queue, a second stage queue where the hardware has direct access to process those requests. However, if the hardware does not have enough -resources to accept more requests, blk-mq will places requests on a temporary +resources to accept more requests, blk-mq will place requests on a temporary queue, to be sent in the future, when the hardware is able. Software staging queues diff --git a/Documentation/block/ioprio.rst b/Documentation/block/ioprio.rst index f72b0de65a..a25c6d5df8 100644 --- a/Documentation/block/ioprio.rst +++ b/Documentation/block/ioprio.rst @@ -80,9 +80,6 @@ ionice.c tool:: #elif defined(__x86_64__) #define __NR_ioprio_set 251 #define __NR_ioprio_get 252 - #elif defined(__ia64__) - #define __NR_ioprio_set 1274 - #define __NR_ioprio_get 1275 #else #error "Unsupported arch" #endif diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst index 0d2647fb35..723408e399 100644 --- a/Documentation/bpf/kfuncs.rst +++ b/Documentation/bpf/kfuncs.rst @@ -37,16 +37,14 @@ prototype in a header for the wrapper kfunc. An example is given below:: /* Disables missing prototype warnings */ - __diag_push(); - __diag_ignore_all("-Wmissing-prototypes", - "Global kfuncs as their definitions will be in BTF"); + __bpf_kfunc_start_defs(); __bpf_kfunc struct task_struct *bpf_find_get_task_by_vpid(pid_t nr) { return find_get_task_by_vpid(nr); } - __diag_pop(); + __bpf_kfunc_end_defs(); A wrapper kfunc is often needed when we need to annotate parameters of the kfunc. Otherwise one may directly make the kfunc visible to the BPF program by diff --git a/Documentation/bpf/libbpf/program_types.rst b/Documentation/bpf/libbpf/program_types.rst index ad4d4d5eec..63bb88846e 100644 --- a/Documentation/bpf/libbpf/program_types.rst +++ b/Documentation/bpf/libbpf/program_types.rst @@ -56,6 +56,16 @@ described in more detail in the footnotes. | | ``BPF_CGROUP_UDP6_RECVMSG`` | ``cgroup/recvmsg6`` | | + +----------------------------------------+----------------------------------+-----------+ | | ``BPF_CGROUP_UDP6_SENDMSG`` | ``cgroup/sendmsg6`` | | +| +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_UNIX_CONNECT`` | ``cgroup/connect_unix`` | | +| +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_UNIX_SENDMSG`` | ``cgroup/sendmsg_unix`` | | +| +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_UNIX_RECVMSG`` | ``cgroup/recvmsg_unix`` | | +| +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_UNIX_GETPEERNAME`` | ``cgroup/getpeername_unix`` | | +| +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_UNIX_GETSOCKNAME`` | ``cgroup/getsockname_unix`` | | +-------------------------------------------+----------------------------------------+----------------------------------+-----------+ | ``BPF_PROG_TYPE_CGROUP_SOCK`` | ``BPF_CGROUP_INET4_POST_BIND`` | ``cgroup/post_bind4`` | | + +----------------------------------------+----------------------------------+-----------+ diff --git a/Documentation/bpf/prog_flow_dissector.rst b/Documentation/bpf/prog_flow_dissector.rst index 4d86780ab0..f24270b8b0 100644 --- a/Documentation/bpf/prog_flow_dissector.rst +++ b/Documentation/bpf/prog_flow_dissector.rst @@ -113,7 +113,7 @@ Flags used by ``eth_get_headlen`` to estimate length of all headers for GRO. * ``BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL`` - tells BPF flow dissector to stop parsing as soon as it reaches IPv6 flow label; used by - ``___skb_get_hash`` and ``__skb_get_hash_symmetric`` to get flow hash. + ``___skb_get_hash`` to get flow hash. * ``BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP`` - tells BPF flow dissector to stop parsing as soon as it reaches encapsulated headers; used by routing infrastructure. diff --git a/Documentation/bpf/standardization/instruction-set.rst b/Documentation/bpf/standardization/instruction-set.rst index c5d53a6e8c..245b6defc2 100644 --- a/Documentation/bpf/standardization/instruction-set.rst +++ b/Documentation/bpf/standardization/instruction-set.rst @@ -283,6 +283,14 @@ For signed operations (``BPF_SDIV`` and ``BPF_SMOD``), for ``BPF_ALU``, is first :term:`sign extended` from 32 to 64 bits, and then interpreted as a 64-bit signed value. +Note that there are varying definitions of the signed modulo operation +when the dividend or divisor are negative, where implementations often +vary by language such that Python, Ruby, etc. differ from C, Go, Java, +etc. This specification requires that signed modulo use truncated division +(where -13 % 3 == -1) as implemented in C, Go, etc.: + + a % n = a - n * trunc(a / n) + The ``BPF_MOVSX`` instruction does a move operation with sign extension. ``BPF_ALU | BPF_MOVSX`` :term:`sign extends` 8-bit and 16-bit operands into 32 bit operands, and zeroes the remaining upper 32 bits. diff --git a/Documentation/conf.py b/Documentation/conf.py index d4fdf6a387..dfc19c915d 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -383,6 +383,12 @@ latex_elements = { verbatimhintsturnover=false, ''', + # + # Some of our authors are fond of deep nesting; tell latex to + # cope. + # + 'maxlistdepth': '10', + # For CJK One-half spacing, need to be in front of hyperref 'extrapackages': r'\usepackage{setspace}', diff --git a/Documentation/core-api/cpu_hotplug.rst b/Documentation/core-api/cpu_hotplug.rst index 9511e405aa..dcb0e379e5 100644 --- a/Documentation/core-api/cpu_hotplug.rst +++ b/Documentation/core-api/cpu_hotplug.rst @@ -40,12 +40,6 @@ Command Line Switches supplied here is lower than the number of physically available CPUs, then those CPUs can not be brought online later. -``additional_cpus=n`` - Use this to limit hotpluggable CPUs. This option sets - ``cpu_possible_mask = cpu_present_mask + additional_cpus`` - - This option is limited to the IA64 architecture. - ``possible_cpus=n`` This option sets ``possible_cpus`` bits in ``cpu_possible_mask``. diff --git a/Documentation/core-api/debugging-via-ohci1394.rst b/Documentation/core-api/debugging-via-ohci1394.rst index 981ad4f89f..cb3d3228df 100644 --- a/Documentation/core-api/debugging-via-ohci1394.rst +++ b/Documentation/core-api/debugging-via-ohci1394.rst @@ -23,9 +23,9 @@ Retrieving a full system memory dump is also possible over the FireWire, using data transfer rates in the order of 10MB/s or more. With most FireWire controllers, memory access is limited to the low 4 GB -of physical address space. This can be a problem on IA64 machines where -memory is located mostly above that limit, but it is rarely a problem on -more common hardware such as x86, x86-64 and PowerPC. +of physical address space. This can be a problem on machines where memory is +located mostly above that limit, but it is rarely a problem on more common +hardware such as x86, x86-64 and PowerPC. At least LSI FW643e and FW643e2 controllers are known to support access to physical addresses above 4 GB, but this feature is currently not enabled by diff --git a/Documentation/core-api/maple_tree.rst b/Documentation/core-api/maple_tree.rst index 45defcf15d..96f3d5f076 100644 --- a/Documentation/core-api/maple_tree.rst +++ b/Documentation/core-api/maple_tree.rst @@ -175,7 +175,7 @@ will return the previous entry which occurs before the entry at index. mas_find() will find the first entry which exists at or above index on the first call, and the next entry from every subsequent calls. -mas_find_rev() will find the fist entry which exists at or below the last on +mas_find_rev() will find the first entry which exists at or below the last on the first call, and the previous entry from every subsequent calls. If the user needs to yield the lock during an operation, then the maple state diff --git a/Documentation/crypto/devel-algos.rst b/Documentation/crypto/devel-algos.rst index 3506899ef8..9b7782f4f6 100644 --- a/Documentation/crypto/devel-algos.rst +++ b/Documentation/crypto/devel-algos.rst @@ -235,6 +235,4 @@ Specifics Of Asynchronous HASH Transformation Some of the drivers will want to use the Generic ScatterWalk in case the implementation needs to be fed separate chunks of the scatterlist which -contains the input data. The buffer containing the resulting hash will -always be properly aligned to .cra_alignmask so there is no need to -worry about this. +contains the input data. diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst index 382818a719..858c77fe7d 100644 --- a/Documentation/dev-tools/kasan.rst +++ b/Documentation/dev-tools/kasan.rst @@ -1,5 +1,8 @@ -The Kernel Address Sanitizer (KASAN) -==================================== +.. SPDX-License-Identifier: GPL-2.0 +.. Copyright (C) 2023, Google LLC. + +Kernel Address Sanitizer (KASAN) +================================ Overview -------- diff --git a/Documentation/dev-tools/kcsan.rst b/Documentation/dev-tools/kcsan.rst index 3ae866dcc9..94b6802ab0 100644 --- a/Documentation/dev-tools/kcsan.rst +++ b/Documentation/dev-tools/kcsan.rst @@ -1,8 +1,8 @@ .. SPDX-License-Identifier: GPL-2.0 .. Copyright (C) 2019, Google LLC. -The Kernel Concurrency Sanitizer (KCSAN) -======================================== +Kernel Concurrency Sanitizer (KCSAN) +==================================== The Kernel Concurrency Sanitizer (KCSAN) is a dynamic race detector, which relies on compile-time instrumentation, and uses a watchpoint-based sampling diff --git a/Documentation/dev-tools/kmsan.rst b/Documentation/dev-tools/kmsan.rst index 55fa82212e..323eedad53 100644 --- a/Documentation/dev-tools/kmsan.rst +++ b/Documentation/dev-tools/kmsan.rst @@ -1,9 +1,9 @@ .. SPDX-License-Identifier: GPL-2.0 .. Copyright (C) 2022, Google LLC. -=================================== -The Kernel Memory Sanitizer (KMSAN) -=================================== +=============================== +Kernel Memory Sanitizer (KMSAN) +=============================== KMSAN is a dynamic error detector aimed at finding uses of uninitialized values. It is based on compiler instrumentation, and is quite similar to the diff --git a/Documentation/dev-tools/kselftest.rst b/Documentation/dev-tools/kselftest.rst index deede972f2..ab376b316c 100644 --- a/Documentation/dev-tools/kselftest.rst +++ b/Documentation/dev-tools/kselftest.rst @@ -112,7 +112,7 @@ You can specify multiple tests to skip:: You can also specify a restricted list of tests to run together with a dedicated skiplist:: - $ make TARGETS="bpf breakpoints size timers" SKIP_TARGETS=bpf kselftest + $ make TARGETS="breakpoints size timers" SKIP_TARGETS=size kselftest See the top-level tools/testing/selftests/Makefile for the list of all possible targets. @@ -165,7 +165,7 @@ To see the list of available tests, the `-l` option can be used:: The `-c` option can be used to run all the tests from a test collection, or the `-t` option for specific single tests. Either can be used multiple times:: - $ ./run_kselftest.sh -c bpf -c seccomp -t timers:posix_timers -t timer:nanosleep + $ ./run_kselftest.sh -c size -c seccomp -t timers:posix_timers -t timer:nanosleep For other features see the script usage output, seen with the `-h` option. @@ -210,7 +210,7 @@ option is supported, such as:: tests by using variables specified in `Running a subset of selftests`_ section:: - $ make -C tools/testing/selftests gen_tar TARGETS="bpf" FORMAT=.xz + $ make -C tools/testing/selftests gen_tar TARGETS="size" FORMAT=.xz .. _tar's auto-compress: https://www.gnu.org/software/tar/manual/html_node/gzip.html#auto_002dcompress diff --git a/Documentation/dev-tools/kunit/usage.rst b/Documentation/dev-tools/kunit/usage.rst index c27e1646ec..9db12e9166 100644 --- a/Documentation/dev-tools/kunit/usage.rst +++ b/Documentation/dev-tools/kunit/usage.rst @@ -651,12 +651,16 @@ For example: } Note that, for functions like device_unregister which only accept a single -pointer-sized argument, it's possible to directly cast that function to -a ``kunit_action_t`` rather than writing a wrapper function, for example: +pointer-sized argument, it's possible to automatically generate a wrapper +with the ``KUNIT_DEFINE_ACTION_WRAPPER()`` macro, for example: .. code-block:: C - kunit_add_action(test, (kunit_action_t *)&device_unregister, &dev); + KUNIT_DEFINE_ACTION_WRAPPER(device_unregister, device_unregister_wrapper, struct device *); + kunit_add_action(test, &device_unregister_wrapper, &dev); + +You should do this in preference to manually casting to the ``kunit_action_t`` type, +as casting function pointers will break Control Flow Integrity (CFI). ``kunit_add_action`` can fail if, for example, the system is out of memory. You can use ``kunit_add_action_or_reset`` instead which runs the action diff --git a/Documentation/dev-tools/ubsan.rst b/Documentation/dev-tools/ubsan.rst index 1be6618e23..2de7c63415 100644 --- a/Documentation/dev-tools/ubsan.rst +++ b/Documentation/dev-tools/ubsan.rst @@ -1,5 +1,7 @@ -The Undefined Behavior Sanitizer - UBSAN -======================================== +.. SPDX-License-Identifier: GPL-2.0 + +Undefined Behavior Sanitizer - UBSAN +==================================== UBSAN is a runtime undefined behaviour checker. diff --git a/Documentation/devicetree/bindings/Makefile b/Documentation/devicetree/bindings/Makefile index 8b395893bd..3e886194b0 100644 --- a/Documentation/devicetree/bindings/Makefile +++ b/Documentation/devicetree/bindings/Makefile @@ -6,7 +6,7 @@ DT_MK_SCHEMA ?= dt-mk-schema DT_SCHEMA_LINT = $(shell which yamllint || \ echo "warning: python package 'yamllint' not installed, skipping" >&2) -DT_SCHEMA_MIN_VERSION = 2022.3 +DT_SCHEMA_MIN_VERSION = 2023.9 PHONY += check_dtschema_version check_dtschema_version: diff --git a/Documentation/devicetree/bindings/arm/amd,pensando.yaml b/Documentation/devicetree/bindings/arm/amd,pensando.yaml new file mode 100644 index 0000000000..e5c2591834 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/amd,pensando.yaml @@ -0,0 +1,26 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/arm/amd,pensando.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: AMD Pensando SoC Platforms + +maintainers: + - Brad Larson + +properties: + $nodename: + const: "/" + compatible: + oneOf: + + - description: Boards with Pensando Elba SoC + items: + - enum: + - amd,pensando-elba-ortano + - const: amd,pensando-elba + +additionalProperties: true + +... diff --git a/Documentation/devicetree/bindings/arm/amlogic.yaml b/Documentation/devicetree/bindings/arm/amlogic.yaml index 1c1094cd6b..caab7ceeda 100644 --- a/Documentation/devicetree/bindings/arm/amlogic.yaml +++ b/Documentation/devicetree/bindings/arm/amlogic.yaml @@ -155,6 +155,7 @@ properties: - enum: - bananapi,bpi-m2s - khadas,vim3 + - libretech,aml-a311d-cc - radxa,zero2 - const: amlogic,a311d - const: amlogic,g12b @@ -196,6 +197,7 @@ properties: - hardkernel,odroid-hc4 - haochuangyi,h96-max - khadas,vim3l + - libretech,aml-s905d3-cc - seirobotics,sei610 - const: amlogic,sm1 @@ -203,6 +205,7 @@ properties: items: - enum: - amlogic,ad401 + - amlogic,ad402 - const: amlogic,a1 - description: Boards with the Amlogic C3 C302X/C308L SoC diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-cti.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-cti.yaml index 6216cfb0a1..2d5545a2b4 100644 --- a/Documentation/devicetree/bindings/arm/arm,coresight-cti.yaml +++ b/Documentation/devicetree/bindings/arm/arm,coresight-cti.yaml @@ -92,11 +92,8 @@ properties: maxItems: 1 cpu: - $ref: /schemas/types.yaml#/definitions/phandle description: - Handle to cpu this device is associated with. This must appear in the - base cti node if compatible string arm,coresight-cti-v8-arch is used, - or may appear in a trig-conns child node when appropriate. + Handle to cpu this CTI is associated with. power-domains: maxItems: 1 @@ -113,12 +110,12 @@ properties: description: defines a phandle reference to an associated CoreSight trace device. When the associated trace device is enabled, then the respective CTI - will be enabled. Use in a trig-conns node, or in CTI base node when - compatible string arm,coresight-cti-v8-arch used. If the associated - device has not been registered then the node name will be stored as - the connection name for later resolution. If the associated device is - not a CoreSight device or not registered then the node name will remain - the connection name and automatic enabling will not occur. + will be enabled. Use in CTI base node when compatible string + arm,coresight-cti-v8-arch used. If the associated device has not been + registered then the node name will be stored as the connection name for + later resolution. If the associated device is not a CoreSight device or + not registered then the node name will remain the connection name and + automatic enabling will not occur. # size cells and address cells required if trig-conns node present. "#size-cells": @@ -130,6 +127,8 @@ properties: patternProperties: '^trig-conns@([0-9]+)$': type: object + additionalProperties: false + description: A trigger connections child node which describes the trigger signals between this CTI and another hardware device. This device may be a CPU, @@ -141,6 +140,21 @@ patternProperties: reg: maxItems: 1 + cpu: + description: + Handle to cpu this trigger connection is associated with. + + arm,cs-dev-assoc: + $ref: /schemas/types.yaml#/definitions/phandle + description: + defines a phandle reference to an associated CoreSight trace device. + When the associated trace device is enabled, then the respective CTI + will be enabled. If the associated device has not been registered + then the node name will be stored as the connection name for later + resolution. If the associated device is not a CoreSight device or + not registered then the node name will remain the connection name + and automatic enabling will not occur. + arm,trig-in-sigs: $ref: /schemas/types.yaml#/definitions/uint32-array minItems: 1 diff --git a/Documentation/devicetree/bindings/arm/arm,integrator.yaml b/Documentation/devicetree/bindings/arm/arm,integrator.yaml index 98ff5698ae..1bdbd1b7ee 100644 --- a/Documentation/devicetree/bindings/arm/arm,integrator.yaml +++ b/Documentation/devicetree/bindings/arm/arm,integrator.yaml @@ -40,45 +40,6 @@ properties: items: - const: arm,integrator-sp - core-module@10000000: - type: object - description: the root node in the Integrator platforms must contain - a core module child node. They are always at physical address - 0x10000000 in all the Integrator variants. - properties: - compatible: - items: - - const: arm,core-module-integrator - - const: syscon - - const: simple-mfd - reg: - maxItems: 1 - - required: - - compatible - - reg - -patternProperties: - "^syscon@[0-9a-f]+$": - description: All Integrator boards must provide a system controller as a - node in the root of the device tree. - type: object - properties: - compatible: - items: - - enum: - - arm,integrator-ap-syscon - - arm,integrator-cp-syscon - - arm,integrator-sp-syscon - - const: syscon - reg: - maxItems: 1 - - required: - - compatible - - reg - - required: - compatible - core-module@10000000 diff --git a/Documentation/devicetree/bindings/arm/arm,realview.yaml b/Documentation/devicetree/bindings/arm/arm,realview.yaml index 8d3ed2e4ed..d1bdee98f9 100644 --- a/Documentation/devicetree/bindings/arm/arm,realview.yaml +++ b/Documentation/devicetree/bindings/arm/arm,realview.yaml @@ -75,43 +75,6 @@ properties: type: object description: All RealView boards must provide a syscon system controller node inside the soc node. - properties: - compatible: - oneOf: - - items: - - const: arm,realview-eb11mp-revb-syscon - - const: arm,realview-eb-syscon - - const: syscon - - const: simple-mfd - - items: - - const: arm,realview-eb11mp-revc-syscon - - const: arm,realview-eb-syscon - - const: syscon - - const: simple-mfd - - items: - - const: arm,realview-eb-syscon - - const: syscon - - const: simple-mfd - - items: - - const: arm,realview-pb1176-syscon - - const: syscon - - const: simple-mfd - - items: - - const: arm,realview-pb11mp-syscon - - const: syscon - - const: simple-mfd - - items: - - const: arm,realview-pba8-syscon - - const: syscon - - const: simple-mfd - - items: - - const: arm,realview-pbx-syscon - - const: syscon - - const: simple-mfd - - required: - - compatible - - reg required: - compatible diff --git a/Documentation/devicetree/bindings/arm/arm,versatile.yaml b/Documentation/devicetree/bindings/arm/arm,versatile.yaml index 13e52ba920..7a3caf6af2 100644 --- a/Documentation/devicetree/bindings/arm/arm,versatile.yaml +++ b/Documentation/devicetree/bindings/arm/arm,versatile.yaml @@ -14,6 +14,14 @@ description: |+ with various pluggable interface boards, in essence the Versatile PB version is a superset of the Versatile AB version. + The root node in the Versatile platforms must contain a core module child + node. They are always at physical address 0x10000000 in all the Versatile + variants. + + When fitted with the IB2 Interface Board, the Versatile AB will present an + optional system controller node which controls the extra peripherals on the + interface board. + properties: $nodename: const: '/' @@ -32,38 +40,6 @@ properties: items: - const: arm,versatile-pb - core-module@10000000: - type: object - description: the root node in the Versatile platforms must contain - a core module child node. They are always at physical address - 0x10000000 in all the Versatile variants. - properties: - compatible: - items: - - const: arm,core-module-versatile - - const: syscon - - const: simple-mfd - reg: - maxItems: 1 - - required: - - compatible - - reg - -patternProperties: - "^syscon@[0-9a-f]+$": - type: object - description: When fitted with the IB2 Interface Board, the Versatile - AB will present an optional system controller node which controls the - extra peripherals on the interface board. - properties: - compatible: - contains: - const: arm,versatile-ib2-syscon - required: - - compatible - - reg - required: - compatible - core-module@10000000 diff --git a/Documentation/devicetree/bindings/arm/aspeed/aspeed.yaml b/Documentation/devicetree/bindings/arm/aspeed/aspeed.yaml index 68f717670f..749ee54a3f 100644 --- a/Documentation/devicetree/bindings/arm/aspeed/aspeed.yaml +++ b/Documentation/devicetree/bindings/arm/aspeed/aspeed.yaml @@ -79,6 +79,7 @@ properties: - facebook,elbert-bmc - facebook,fuji-bmc - facebook,greatlakes-bmc + - facebook,minerva-cmc - facebook,yosemite4-bmc - ibm,everest-bmc - ibm,rainier-bmc diff --git a/Documentation/devicetree/bindings/arm/atmel-at91.yaml b/Documentation/devicetree/bindings/arm/atmel-at91.yaml index dfb8fd0891..89d75fbb1d 100644 --- a/Documentation/devicetree/bindings/arm/atmel-at91.yaml +++ b/Documentation/devicetree/bindings/arm/atmel-at91.yaml @@ -79,6 +79,13 @@ properties: - const: atmel,sama5d2 - const: atmel,sama5 + - description: Microchip SAMA5D29 Curiosity + items: + - const: microchip,sama5d29-curiosity + - const: atmel,sama5d29 + - const: atmel,sama5d2 + - const: atmel,sama5 + - items: - const: atmel,sama5d27 - const: atmel,sama5d2 diff --git a/Documentation/devicetree/bindings/arm/cpus.yaml b/Documentation/devicetree/bindings/arm/cpus.yaml index 0d17cbad97..ffd526363f 100644 --- a/Documentation/devicetree/bindings/arm/cpus.yaml +++ b/Documentation/devicetree/bindings/arm/cpus.yaml @@ -190,6 +190,7 @@ properties: - qcom,kryo280 - qcom,kryo360 - qcom,kryo385 + - qcom,kryo465 - qcom,kryo468 - qcom,kryo485 - qcom,kryo560 @@ -308,7 +309,9 @@ properties: power-domains property. For PSCI based platforms, the name corresponding to the index of the PSCI - PM domain provider, must be "psci". + PM domain provider, must be "psci". For SCMI based platforms, the name + corresponding to the index of an SCMI performance domain provider, must be + "perf". qcom,saw: $ref: /schemas/types.yaml#/definitions/phandle diff --git a/Documentation/devicetree/bindings/arm/fsl.yaml b/Documentation/devicetree/bindings/arm/fsl.yaml index 9450b2c8a6..32b195852a 100644 --- a/Documentation/devicetree/bindings/arm/fsl.yaml +++ b/Documentation/devicetree/bindings/arm/fsl.yaml @@ -25,8 +25,11 @@ properties: - description: i.MX23 based Boards items: - enum: + - creative,x-fi3 - fsl,imx23-evk + - fsl,stmp378x-devb - olimex,imx23-olinuxino + - sandisk,sansa_fuze_plus - const: fsl,imx23 - description: i.MX25 Product Development Kit @@ -385,6 +388,12 @@ properties: - const: toradex,apalis_imx6q - const: fsl,imx6q + - description: i.MX6Q Variscite VAR-SOM-MX6 Boards + items: + - const: variscite,mx6customboard + - const: variscite,var-som-imx6q + - const: fsl,imx6q + - description: TQ-Systems TQMa6Q SoM (variant A) on MBa6x items: - const: tq,imx6q-mba6x-a @@ -975,7 +984,9 @@ properties: - description: PHYTEC phyCORE-i.MX8MM SoM based boards items: - - const: phytec,imx8mm-phyboard-polis-rdk # phyBOARD-Polis RDK + - enum: + - phytec,imx8mm-phyboard-polis-rdk # phyBOARD-Polis RDK + - phytec,imx8mm-phygate-tauri-l # phyGATE-Tauri-L Gateway - const: phytec,imx8mm-phycore-som # phyCORE-i.MX8MM SoM - const: fsl,imx8mm @@ -1389,6 +1400,13 @@ properties: - fsl,ls1043a-qds - const: fsl,ls1043a + - description: TQ-Systems LS1043A based Boards + items: + - enum: + - tq,ls1043a-tqmls1043a-mbls10xxa + - const: tq,ls1043a-tqmls1043a + - const: fsl,ls1043a + - description: LS1046A based Boards items: - enum: @@ -1397,6 +1415,13 @@ properties: - fsl,ls1046a-rdb - const: fsl,ls1046a + - description: TQ-Systems LS1046A based Boards + items: + - enum: + - tq,ls1046a-tqmls1046a-mbls10xxa + - const: tq,ls1046a-tqmls1046a + - const: fsl,ls1046a + - description: LS1088A based Boards items: - enum: @@ -1404,6 +1429,13 @@ properties: - fsl,ls1088a-rdb - const: fsl,ls1088a + - description: TQ-Systems LS1088A based Boards + items: + - enum: + - tq,ls1088a-tqmls1088a-mbls10xxa + - const: tq,ls1088a-tqmls1088a + - const: fsl,ls1088a + - description: LS2080A based Boards items: - enum: @@ -1429,7 +1461,7 @@ properties: - fsl,lx2162a-qds - const: fsl,lx2160a - - description: SolidRun LX2160A based Boards + - description: SolidRun LX2160A CEX-7 based Boards items: - enum: - solidrun,clearfog-cx @@ -1437,6 +1469,13 @@ properties: - const: solidrun,lx2160a-cex7 - const: fsl,lx2160a + - description: SolidRun LX2162A SoM based Boards + items: + - enum: + - solidrun,lx2162a-clearfog + - const: solidrun,lx2162a-som + - const: fsl,lx2160a + - description: S32G2 based Boards items: - enum: diff --git a/Documentation/devicetree/bindings/arm/intel-ixp4xx.yaml b/Documentation/devicetree/bindings/arm/intel-ixp4xx.yaml index 553dcbc70e..d60792b1d9 100644 --- a/Documentation/devicetree/bindings/arm/intel-ixp4xx.yaml +++ b/Documentation/devicetree/bindings/arm/intel-ixp4xx.yaml @@ -16,12 +16,28 @@ properties: oneOf: - items: - enum: + - adieng,coyote + - arcom,vulcan + - dlink,dsm-g600-a + - freecom,fsg-3 + - gateway,7001 + - gateworks,gw2348 + - goramo,multilink-router + - intel,ixdp425 + - intel,ixdpg425 + - iom,nas-100d - linksys,nslu2 + - netgear,wg302v1 + - netgear,wg302v2 + - usr,8200 - welltech,epbx100 + - linksys,wrv54g + - gemtek,gtwx5715 - const: intel,ixp42x - items: - enum: - gateworks,gw2358 + - intel,kixrp435 - const: intel,ixp43x additionalProperties: true diff --git a/Documentation/devicetree/bindings/arm/mediatek.yaml b/Documentation/devicetree/bindings/arm/mediatek.yaml index ae12b1cab9..a5999b3afc 100644 --- a/Documentation/devicetree/bindings/arm/mediatek.yaml +++ b/Documentation/devicetree/bindings/arm/mediatek.yaml @@ -133,11 +133,22 @@ properties: - enum: - mediatek,mt8183-evb - const: mediatek,mt8183 + - description: Google Hayato rev5 + items: + - const: google,hayato-rev5-sku2 + - const: google,hayato-sku2 + - const: google,hayato + - const: mediatek,mt8192 - description: Google Hayato items: - const: google,hayato-rev1 - const: google,hayato - const: mediatek,mt8192 + - description: Google Spherion rev4 (Acer Chromebook 514) + items: + - const: google,spherion-rev4 + - const: google,spherion + - const: mediatek,mt8192 - description: Google Spherion (Acer Chromebook 514) items: - const: google,spherion-rev3 @@ -248,6 +259,11 @@ properties: - enum: - mediatek,mt8365-evk - const: mediatek,mt8365 + - items: + - enum: + - mediatek,mt8395-evk + - const: mediatek,mt8395 + - const: mediatek,mt8195 - items: - enum: - mediatek,mt8516-pumpkin diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml index 28ded09d72..e7720caf31 100644 --- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml +++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml @@ -22,6 +22,7 @@ properties: - mediatek,mt7622-wed - mediatek,mt7981-wed - mediatek,mt7986-wed + - mediatek,mt7988-wed - const: syscon reg: diff --git a/Documentation/devicetree/bindings/arm/psci.yaml b/Documentation/devicetree/bindings/arm/psci.yaml index 0c5381e081..cbb012e217 100644 --- a/Documentation/devicetree/bindings/arm/psci.yaml +++ b/Documentation/devicetree/bindings/arm/psci.yaml @@ -101,6 +101,7 @@ properties: patternProperties: "^power-domain-": $ref: /schemas/power/power-domain.yaml# + unevaluatedProperties: false type: object description: | diff --git a/Documentation/devicetree/bindings/arm/qcom.yaml b/Documentation/devicetree/bindings/arm/qcom.yaml index 90f31beb80..8a6466d1fc 100644 --- a/Documentation/devicetree/bindings/arm/qcom.yaml +++ b/Documentation/devicetree/bindings/arm/qcom.yaml @@ -50,6 +50,7 @@ description: | msm8998 qcs404 qcm2290 + qcm6490 qdu1000 qrb2210 qrb4210 @@ -79,6 +80,7 @@ description: | sm6125 sm6350 sm6375 + sm7125 sm7225 sm8150 sm8250 @@ -189,6 +191,7 @@ properties: - items: - enum: + - longcheer,l9100 - samsung,a7 - sony,kanuti-tulip - square,apq8039-t2 @@ -391,6 +394,11 @@ properties: - const: qcom,qrb2210 - const: qcom,qcm2290 + - items: + - enum: + - fairphone,fp5 + - const: qcom,qcm6490 + - description: Qualcomm Technologies, Inc. Distributed Unit 1000 platform items: - enum: @@ -479,6 +487,11 @@ properties: - const: google,lazor-rev8 - const: qcom,sc7180 + - description: Acer Chromebook Spin 513 (rev9) + items: + - const: google,lazor-rev9 + - const: qcom,sc7180 + - description: Acer Chromebook Spin 513 (newest rev) items: - const: google,lazor @@ -500,6 +513,11 @@ properties: - const: google,lazor-rev8-sku2 - const: qcom,sc7180 + - description: Acer Chromebook Spin 513 with KB Backlight (rev9) + items: + - const: google,lazor-rev9-sku2 + - const: qcom,sc7180 + - description: Acer Chromebook Spin 513 with KB Backlight (newest rev) items: - const: google,lazor-sku2 @@ -521,9 +539,16 @@ properties: - const: google,lazor-rev8-sku0 - const: qcom,sc7180 + - description: Acer Chromebook Spin 513 with LTE (rev9) + items: + - const: google,lazor-rev9-sku0 + - const: google,lazor-rev9-sku10 + - const: qcom,sc7180 + - description: Acer Chromebook Spin 513 with LTE (newest rev) items: - const: google,lazor-sku0 + - const: google,lazor-sku10 - const: qcom,sc7180 - description: Acer Chromebook 511 (rev4 - rev8) @@ -535,9 +560,16 @@ properties: - const: google,lazor-rev8-sku4 - const: qcom,sc7180 + - description: Acer Chromebook 511 (rev9) + items: + - const: google,lazor-rev9-sku4 + - const: google,lazor-rev9-sku15 + - const: qcom,sc7180 + - description: Acer Chromebook 511 (newest rev) items: - const: google,lazor-sku4 + - const: google,lazor-sku15 - const: qcom,sc7180 - description: Acer Chromebook 511 without Touchscreen (rev4) @@ -554,9 +586,16 @@ properties: - const: google,lazor-rev8-sku6 - const: qcom,sc7180 + - description: Acer Chromebook 511 without Touchscreen (rev9) + items: + - const: google,lazor-rev9-sku6 + - const: google,lazor-rev9-sku18 + - const: qcom,sc7180 + - description: Acer Chromebook 511 without Touchscreen (newest rev) items: - const: google,lazor-sku6 + - const: google,lazor-sku18 - const: qcom,sc7180 - description: Google Mrbland with AUO panel (rev0) @@ -943,6 +982,11 @@ properties: - sony,pdx225 - const: qcom,sm6375 + - items: + - enum: + - xiaomi,joyeuse + - const: qcom,sm7125 + - items: - enum: - fairphone,fp4 @@ -1086,6 +1130,7 @@ allOf: - qcom,sm6115 - qcom,sm6125 - qcom,sm6350 + - qcom,sm7125 - qcom,sm7225 - qcom,sm8150 - qcom,sm8250 diff --git a/Documentation/devicetree/bindings/arm/rockchip.yaml b/Documentation/devicetree/bindings/arm/rockchip.yaml index ca53898628..5f7c6c4aad 100644 --- a/Documentation/devicetree/bindings/arm/rockchip.yaml +++ b/Documentation/devicetree/bindings/arm/rockchip.yaml @@ -660,6 +660,11 @@ properties: - pine64,quartz64-b - const: rockchip,rk3566 + - description: Pine64 QuartzPro64 + items: + - const: pine64,quartzpro64 + - const: rockchip,rk3588 + - description: Pine64 SoQuartz SoM items: - enum: @@ -669,6 +674,11 @@ properties: - const: pine64,soquartz - const: rockchip,rk3566 + - description: Powkiddy RGB30 + items: + - const: powkiddy,rgb30 + - const: rockchip,rk3566 + - description: Radxa Compute Module 3(CM3) items: - enum: @@ -870,6 +880,16 @@ properties: - const: tronsmart,orion-r68-meta - const: rockchip,rk3368 + - description: Turing RK1 + items: + - const: turing,rk1 + - const: rockchip,rk3588 + + - description: Xunlong Orange Pi 5 Plus + items: + - const: xunlong,orangepi-5-plus + - const: rockchip,rk3588 + - description: Xunlong Orange Pi R1 Plus / LTS items: - enum: @@ -877,6 +897,11 @@ properties: - xunlong,orangepi-r1-plus-lts - const: rockchip,rk3328 + - description: Xunlong Orange Pi 5 + items: + - const: xunlong,orangepi-5 + - const: rockchip,rk3588s + - description: Zkmagic A95X Z2 items: - const: zkmagic,a95x-z2 diff --git a/Documentation/devicetree/bindings/arm/sti.yaml b/Documentation/devicetree/bindings/arm/sti.yaml index 3ca054c643..842def3e3f 100644 --- a/Documentation/devicetree/bindings/arm/sti.yaml +++ b/Documentation/devicetree/bindings/arm/sti.yaml @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0 +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) %YAML 1.2 --- $id: http://devicetree.org/schemas/arm/sti.yaml# @@ -13,13 +13,20 @@ properties: $nodename: const: '/' compatible: - items: - - enum: - - st,stih415 - - st,stih416 - - st,stih407 - - st,stih410 - - st,stih418 + oneOf: + - items: + - const: st,stih407-b2120 + - const: st,stih407 + - items: + - enum: + - st,stih410-b2120 + - st,stih410-b2260 + - const: st,stih410 + - items: + - enum: + - st,stih418-b2199 + - st,stih418-b2264 + - const: st,stih418 additionalProperties: true diff --git a/Documentation/devicetree/bindings/arm/stm32/stm32.yaml b/Documentation/devicetree/bindings/arm/stm32/stm32.yaml index 4bf28e717a..df087c81c6 100644 --- a/Documentation/devicetree/bindings/arm/stm32/stm32.yaml +++ b/Documentation/devicetree/bindings/arm/stm32/stm32.yaml @@ -146,6 +146,7 @@ properties: - lxa,stm32mp157c-mc1 # Linux Automation MC-1 - lxa,stm32mp157c-tac-gen1 # Linux Automation TAC (Generation 1) - lxa,stm32mp157c-tac-gen2 # Linux Automation TAC (Generation 2) + - oct,stm32mp157c-osd32-red # Octavo OSD32MP1 RED board - const: oct,stm32mp15xx-osd32 - enum: - st,stm32mp157 diff --git a/Documentation/devicetree/bindings/arm/sunxi.yaml b/Documentation/devicetree/bindings/arm/sunxi.yaml index e4dd678f42..11c5ce941d 100644 --- a/Documentation/devicetree/bindings/arm/sunxi.yaml +++ b/Documentation/devicetree/bindings/arm/sunxi.yaml @@ -51,6 +51,11 @@ properties: - const: allwinner,parrot - const: allwinner,sun8i-a33 + - description: Anbernic RG-Nano + items: + - const: anbernic,rg-nano + - const: allwinner,sun8i-v3s + - description: Amarula A64 Relic items: - const: amarula,a64-relic @@ -151,6 +156,17 @@ properties: - const: roofull,beelink-x2 - const: allwinner,sun8i-h3 + - description: BigTreeTech Manta M4/8P + items: + - const: bigtreetech,cb1-manta + - const: bigtreetech,cb1 + - const: allwinner,sun50i-h616 + + - description: BigTreeTech Pi + items: + - const: bigtreetech,pi + - const: allwinner,sun50i-h616 + - description: Chuwi V7 CW0825 items: - const: chuwi,v7-cw0825 diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.yaml b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.yaml deleted file mode 100644 index 89191cfdf6..0000000000 --- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.yaml +++ /dev/null @@ -1,393 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -%YAML 1.2 ---- -$id: http://devicetree.org/schemas/arm/tegra/nvidia,tegra20-pmc.yaml# -$schema: http://devicetree.org/meta-schemas/core.yaml# - -title: Tegra Power Management Controller (PMC) - -maintainers: - - Thierry Reding - - Jonathan Hunter - -properties: - compatible: - enum: - - nvidia,tegra20-pmc - - nvidia,tegra30-pmc - - nvidia,tegra114-pmc - - nvidia,tegra124-pmc - - nvidia,tegra210-pmc - - reg: - maxItems: 1 - description: - Offset and length of the register set for the device. - - clock-names: - items: - - const: pclk - - const: clk32k_in - description: - Must includes entries pclk and clk32k_in. - pclk is the Tegra clock of that name and clk32k_in is 32KHz clock - input to Tegra. - - clocks: - maxItems: 2 - description: - Must contain an entry for each entry in clock-names. - See ../clocks/clocks-bindings.txt for details. - - '#clock-cells': - const: 1 - description: - Tegra PMC has clk_out_1, clk_out_2, and clk_out_3. - PMC also has blink control which allows 32Khz clock output to - Tegra blink pad. - Consumer of PMC clock should specify the desired clock by having - the clock ID in its "clocks" phandle cell with pmc clock provider. - See include/dt-bindings/soc/tegra-pmc.h for the list of Tegra PMC - clock IDs. - - '#interrupt-cells': - const: 2 - description: - Specifies number of cells needed to encode an interrupt source. - The value must be 2. - - interrupt-controller: true - - nvidia,invert-interrupt: - $ref: /schemas/types.yaml#/definitions/flag - description: Inverts the PMU interrupt signal. - The PMU is an external Power Management Unit, whose interrupt output - signal is fed into the PMC. This signal is optionally inverted, and - then fed into the ARM GIC. The PMC is not involved in the detection - or handling of this interrupt signal, merely its inversion. - - nvidia,core-power-req-active-high: - $ref: /schemas/types.yaml#/definitions/flag - description: Core power request active-high. - - nvidia,sys-clock-req-active-high: - $ref: /schemas/types.yaml#/definitions/flag - description: System clock request active-high. - - nvidia,combined-power-req: - $ref: /schemas/types.yaml#/definitions/flag - description: combined power request for CPU and Core. - - nvidia,cpu-pwr-good-en: - $ref: /schemas/types.yaml#/definitions/flag - description: - CPU power good signal from external PMIC to PMC is enabled. - - nvidia,suspend-mode: - $ref: /schemas/types.yaml#/definitions/uint32 - enum: [0, 1, 2] - description: - The suspend mode that the platform should use. - Mode 0 is for LP0, CPU + Core voltage off and DRAM in self-refresh - Mode 1 is for LP1, CPU voltage off and DRAM in self-refresh - Mode 2 is for LP2, CPU voltage off - - nvidia,cpu-pwr-good-time: - $ref: /schemas/types.yaml#/definitions/uint32 - description: CPU power good time in uSec. - - nvidia,cpu-pwr-off-time: - $ref: /schemas/types.yaml#/definitions/uint32 - description: CPU power off time in uSec. - - nvidia,core-pwr-good-time: - $ref: /schemas/types.yaml#/definitions/uint32-array - description: - - Core power good time in uSec. - - nvidia,core-pwr-off-time: - $ref: /schemas/types.yaml#/definitions/uint32 - description: Core power off time in uSec. - - nvidia,lp0-vec: - $ref: /schemas/types.yaml#/definitions/uint32-array - description: - Starting address and length of LP0 vector. - The LP0 vector contains the warm boot code that is executed - by AVP when resuming from the LP0 state. - The AVP (Audio-Video Processor) is an ARM7 processor and - always being the first boot processor when chip is power on - or resume from deep sleep mode. When the system is resumed - from the deep sleep mode, the warm boot code will restore - some PLLs, clocks and then brings up CPU0 for resuming the - system. - - core-supply: - description: - Phandle to voltage regulator connected to the SoC Core power rail. - - core-domain: - type: object - description: | - The vast majority of hardware blocks of Tegra SoC belong to a - Core power domain, which has a dedicated voltage rail that powers - the blocks. - - properties: - operating-points-v2: - description: - Should contain level, voltages and opp-supported-hw property. - The supported-hw is a bitfield indicating SoC speedo or process - ID mask. - - "#power-domain-cells": - const: 0 - - required: - - operating-points-v2 - - "#power-domain-cells" - - additionalProperties: false - - i2c-thermtrip: - type: object - description: - On Tegra30, Tegra114 and Tegra124 if i2c-thermtrip subnode exists, - hardware-triggered thermal reset will be enabled. - - properties: - nvidia,i2c-controller-id: - $ref: /schemas/types.yaml#/definitions/uint32 - description: - ID of I2C controller to send poweroff command to PMU. - Valid values are described in section 9.2.148 - "APBDEV_PMC_SCRATCH53_0" of the Tegra K1 Technical Reference - Manual. - - nvidia,bus-addr: - $ref: /schemas/types.yaml#/definitions/uint32 - description: Bus address of the PMU on the I2C bus. - - nvidia,reg-addr: - $ref: /schemas/types.yaml#/definitions/uint32 - description: PMU I2C register address to issue poweroff command. - - nvidia,reg-data: - $ref: /schemas/types.yaml#/definitions/uint32 - description: Poweroff command to write to PMU. - - nvidia,pinmux-id: - $ref: /schemas/types.yaml#/definitions/uint32 - description: - Pinmux used by the hardware when issuing Poweroff command. - Defaults to 0. Valid values are described in section 12.5.2 - "Pinmux Support" of the Tegra4 Technical Reference Manual. - - required: - - nvidia,i2c-controller-id - - nvidia,bus-addr - - nvidia,reg-addr - - nvidia,reg-data - - additionalProperties: false - - powergates: - type: object - description: | - This node contains a hierarchy of power domain nodes, which should - match the powergates on the Tegra SoC. Each powergate node - represents a power-domain on the Tegra SoC that can be power-gated - by the Tegra PMC. - Hardware blocks belonging to a power domain should contain - "power-domains" property that is a phandle pointing to corresponding - powergate node. - The name of the powergate node should be one of the below. Note that - not every powergate is applicable to all Tegra devices and the following - list shows which powergates are applicable to which devices. - Please refer to Tegra TRM for mode details on the powergate nodes to - use for each power-gate block inside Tegra. - Name Description Devices Applicable - 3d 3D Graphics Tegra20/114/124/210 - 3d0 3D Graphics 0 Tegra30 - 3d1 3D Graphics 1 Tegra30 - aud Audio Tegra210 - dfd Debug Tegra210 - dis Display A Tegra114/124/210 - disb Display B Tegra114/124/210 - heg 2D Graphics Tegra30/114/124/210 - iram Internal RAM Tegra124/210 - mpe MPEG Encode All - nvdec NVIDIA Video Decode Engine Tegra210 - nvjpg NVIDIA JPEG Engine Tegra210 - pcie PCIE Tegra20/30/124/210 - sata SATA Tegra30/124/210 - sor Display interfaces Tegra124/210 - ve2 Video Encode Engine 2 Tegra210 - venc Video Encode Engine All - vdec Video Decode Engine Tegra20/30/114/124 - vic Video Imaging Compositor Tegra124/210 - xusba USB Partition A Tegra114/124/210 - xusbb USB Partition B Tegra114/124/210 - xusbc USB Partition C Tegra114/124/210 - - patternProperties: - "^[a-z0-9]+$": - type: object - additionalProperties: false - - properties: - clocks: - minItems: 1 - maxItems: 8 - description: - Must contain an entry for each clock required by the PMC - for controlling a power-gate. - See ../clocks/clock-bindings.txt document for more details. - - resets: - minItems: 1 - maxItems: 8 - description: - Must contain an entry for each reset required by the PMC - for controlling a power-gate. - See ../reset/reset.txt for more details. - - power-domains: - maxItems: 1 - - '#power-domain-cells': - const: 0 - description: Must be 0. - - required: - - clocks - - resets - - '#power-domain-cells' - - additionalProperties: false - -patternProperties: - "^[a-f0-9]+-[a-f0-9]+$": - type: object - description: - This is a Pad configuration node. On Tegra SOCs a pad is a set of - pins which are configured as a group. The pin grouping is a fixed - attribute of the hardware. The PMC can be used to set pad power state - and signaling voltage. A pad can be either in active or power down mode. - The support for power state and signaling voltage configuration varies - depending on the pad in question. 3.3V and 1.8V signaling voltages - are supported on pins where software controllable signaling voltage - switching is available. - - The pad configuration state nodes are placed under the pmc node and they - are referred to by the pinctrl client properties. For more information - see Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt. - The pad name should be used as the value of the pins property in pin - configuration nodes. - - The following pads are present on Tegra124 and Tegra132 - audio, bb, cam, comp, csia, csb, cse, dsi, dsib, dsic, dsid, hdmi, hsic, - hv, lvds, mipi-bias, nand, pex-bias, pex-clk1, pex-clk2, pex-cntrl, - sdmmc1, sdmmc3, sdmmc4, sys_ddc, uart, usb0, usb1, usb2, usb_bias. - - The following pads are present on Tegra210 - audio, audio-hv, cam, csia, csib, csic, csid, csie, csif, dbg, - debug-nonao, dmic, dp, dsi, dsib, dsic, dsid, emmc, emmc2, gpio, hdmi, - hsic, lvds, mipi-bias, pex-bias, pex-clk1, pex-clk2, pex-cntrl, sdmmc1, - sdmmc3, spi, spi-hv, uart, usb0, usb1, usb2, usb3, usb-bias. - - properties: - pins: - $ref: /schemas/types.yaml#/definitions/string - description: Must contain name of the pad(s) to be configured. - - low-power-enable: - $ref: /schemas/types.yaml#/definitions/flag - description: Configure the pad into power down mode. - - low-power-disable: - $ref: /schemas/types.yaml#/definitions/flag - description: Configure the pad into active mode. - - power-source: - $ref: /schemas/types.yaml#/definitions/uint32 - description: - Must contain either TEGRA_IO_PAD_VOLTAGE_1V8 or - TEGRA_IO_PAD_VOLTAGE_3V3 to select between signaling voltages. - The values are defined in - include/dt-bindings/pinctrl/pinctrl-tegra-io-pad.h. - Power state can be configured on all Tegra124 and Tegra132 - pads. None of the Tegra124 or Tegra132 pads support signaling - voltage switching. - All of the listed Tegra210 pads except pex-cntrl support power - state configuration. Signaling voltage switching is supported - on below Tegra210 pads. - audio, audio-hv, cam, dbg, dmic, gpio, pex-cntrl, sdmmc1, - sdmmc3, spi, spi-hv, and uart. - - required: - - pins - - additionalProperties: false - -required: - - compatible - - reg - - clock-names - - clocks - - '#clock-cells' - -additionalProperties: false - -dependencies: - "nvidia,suspend-mode": ["nvidia,core-pwr-off-time", "nvidia,cpu-pwr-off-time"] - "nvidia,core-pwr-off-time": ["nvidia,core-pwr-good-time"] - "nvidia,cpu-pwr-off-time": ["nvidia,cpu-pwr-good-time"] - -examples: - - | - - #include - #include - #include - - tegra_pmc: pmc@7000e400 { - compatible = "nvidia,tegra210-pmc"; - reg = <0x7000e400 0x400>; - core-supply = <®ulator>; - clocks = <&tegra_car TEGRA210_CLK_PCLK>, <&clk32k_in>; - clock-names = "pclk", "clk32k_in"; - #clock-cells = <1>; - - nvidia,invert-interrupt; - nvidia,suspend-mode = <0>; - nvidia,cpu-pwr-good-time = <0>; - nvidia,cpu-pwr-off-time = <0>; - nvidia,core-pwr-good-time = <4587 3876>; - nvidia,core-pwr-off-time = <39065>; - nvidia,core-power-req-active-high; - nvidia,sys-clock-req-active-high; - - pd_core: core-domain { - operating-points-v2 = <&core_opp_table>; - #power-domain-cells = <0>; - }; - - powergates { - pd_audio: aud { - clocks = <&tegra_car TEGRA210_CLK_APE>, - <&tegra_car TEGRA210_CLK_APB2APE>; - resets = <&tegra_car 198>; - power-domains = <&pd_core>; - #power-domain-cells = <0>; - }; - - pd_xusbss: xusba { - clocks = <&tegra_car TEGRA210_CLK_XUSB_SS>; - resets = <&tegra_car TEGRA210_CLK_XUSB_SS>; - power-domains = <&pd_core>; - #power-domain-cells = <0>; - }; - }; - }; diff --git a/Documentation/devicetree/bindings/ata/nvidia,tegra-ahci.yaml b/Documentation/devicetree/bindings/ata/nvidia,tegra-ahci.yaml index 3c7a2425f3..a17297cbef 100644 --- a/Documentation/devicetree/bindings/ata/nvidia,tegra-ahci.yaml +++ b/Documentation/devicetree/bindings/ata/nvidia,tegra-ahci.yaml @@ -151,7 +151,7 @@ allOf: - interconnects - power-domains -additionalProperties: true +additionalProperties: false examples: - | diff --git a/Documentation/devicetree/bindings/cache/qcom,llcc.yaml b/Documentation/devicetree/bindings/cache/qcom,llcc.yaml index 44892aa589..580f9a97dd 100644 --- a/Documentation/devicetree/bindings/cache/qcom,llcc.yaml +++ b/Documentation/devicetree/bindings/cache/qcom,llcc.yaml @@ -20,6 +20,7 @@ description: | properties: compatible: enum: + - qcom,qdu1000-llcc - qcom,sc7180-llcc - qcom,sc7280-llcc - qcom,sc8180x-llcc @@ -44,6 +45,14 @@ properties: interrupts: maxItems: 1 + nvmem-cells: + items: + - description: Reference to an nvmem node for multi channel DDR + + nvmem-cell-names: + items: + - const: multi-chan-ddr + required: - compatible - reg @@ -92,6 +101,7 @@ allOf: compatible: contains: enum: + - qcom,qdu1000-llcc - qcom,sc8180x-llcc - qcom,sc8280xp-llcc then: diff --git a/Documentation/devicetree/bindings/clock/amlogic,s4-peripherals-clkc.yaml b/Documentation/devicetree/bindings/clock/amlogic,s4-peripherals-clkc.yaml new file mode 100644 index 0000000000..c229e4f0c1 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/amlogic,s4-peripherals-clkc.yaml @@ -0,0 +1,96 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright (C) 2022-2023 Amlogic, Inc. All rights reserved +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/amlogic,s4-peripherals-clkc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Amlogic S4 Peripherals Clock Controller + +maintainers: + - Yu Tu + +properties: + compatible: + const: amlogic,s4-peripherals-clkc + + reg: + maxItems: 1 + + clocks: + minItems: 14 + items: + - description: input fixed pll div2 + - description: input fixed pll div2p5 + - description: input fixed pll div3 + - description: input fixed pll div4 + - description: input fixed pll div5 + - description: input fixed pll div7 + - description: input hifi pll + - description: input gp0 pll + - description: input mpll0 + - description: input mpll1 + - description: input mpll2 + - description: input mpll3 + - description: input hdmi pll + - description: input oscillator (usually at 24MHz) + - description: input external 32kHz reference (optional) + + clock-names: + minItems: 14 + items: + - const: fclk_div2 + - const: fclk_div2p5 + - const: fclk_div3 + - const: fclk_div4 + - const: fclk_div5 + - const: fclk_div7 + - const: hifi_pll + - const: gp0_pll + - const: mpll0 + - const: mpll1 + - const: mpll2 + - const: mpll3 + - const: hdmi_pll + - const: xtal + - const: ext_32k + + "#clock-cells": + const: 1 + +required: + - compatible + - reg + - clocks + - clock-names + - "#clock-cells" + +additionalProperties: false + +examples: + - | + #include + + clkc_periphs: clock-controller@fe000000 { + compatible = "amlogic,s4-peripherals-clkc"; + reg = <0xfe000000 0x49c>; + clocks = <&clkc_pll 3>, + <&clkc_pll 13>, + <&clkc_pll 5>, + <&clkc_pll 7>, + <&clkc_pll 9>, + <&clkc_pll 11>, + <&clkc_pll 17>, + <&clkc_pll 15>, + <&clkc_pll 25>, + <&clkc_pll 27>, + <&clkc_pll 29>, + <&clkc_pll 31>, + <&clkc_pll 20>, + <&xtal>; + clock-names = "fclk_div2", "fclk_div2p5", "fclk_div3", "fclk_div4", + "fclk_div5", "fclk_div7", "hifi_pll", "gp0_pll", + "mpll0", "mpll1", "mpll2", "mpll3", "hdmi_pll", "xtal"; + #clock-cells = <1>; + }; +... diff --git a/Documentation/devicetree/bindings/clock/amlogic,s4-pll-clkc.yaml b/Documentation/devicetree/bindings/clock/amlogic,s4-pll-clkc.yaml new file mode 100644 index 0000000000..d8932ec26c --- /dev/null +++ b/Documentation/devicetree/bindings/clock/amlogic,s4-pll-clkc.yaml @@ -0,0 +1,49 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright (C) 2022-2023 Amlogic, Inc. All rights reserved +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/amlogic,s4-pll-clkc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Amlogic S4 PLL Clock Controller + +maintainers: + - Yu Tu + +properties: + compatible: + const: amlogic,s4-pll-clkc + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + clock-names: + items: + - const: xtal + + "#clock-cells": + const: 1 + +required: + - compatible + - reg + - clocks + - clock-names + - "#clock-cells" + +additionalProperties: false + +examples: + - | + clkc_pll: clock-controller@fe008000 { + compatible = "amlogic,s4-pll-clkc"; + reg = <0xfe008000 0x1e8>; + clocks = <&xtal>; + clock-names = "xtal"; + #clock-cells = <1>; + }; + +... diff --git a/Documentation/devicetree/bindings/clock/qcom,hfpll.txt b/Documentation/devicetree/bindings/clock/qcom,hfpll.txt index ec02a02442..5769cbbe76 100644 --- a/Documentation/devicetree/bindings/clock/qcom,hfpll.txt +++ b/Documentation/devicetree/bindings/clock/qcom,hfpll.txt @@ -12,6 +12,9 @@ PROPERTIES "qcom,hfpll-apq8064", "qcom,hfpll" "qcom,hfpll-msm8974", "qcom,hfpll" "qcom,hfpll-msm8960", "qcom,hfpll" + "qcom,msm8976-hfpll-a53", "qcom,hfpll" + "qcom,msm8976-hfpll-a72", "qcom,hfpll" + "qcom,msm8976-hfpll-cci", "qcom,hfpll" - reg: Usage: required diff --git a/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml b/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml index fd14d18309..4eb5e59f67 100644 --- a/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml @@ -28,6 +28,7 @@ properties: - qcom,sdx55-rpmh-clk - qcom,sdx65-rpmh-clk - qcom,sdx75-rpmh-clk + - qcom,sm4450-rpmh-clk - qcom,sm6350-rpmh-clk - qcom,sm8150-rpmh-clk - qcom,sm8250-rpmh-clk diff --git a/Documentation/devicetree/bindings/clock/qcom,sm4450-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm4450-gcc.yaml new file mode 100644 index 0000000000..5953c8d924 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,sm4450-gcc.yaml @@ -0,0 +1,55 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,sm4450-gcc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Global Clock & Reset Controller on SM4450 + +maintainers: + - Ajit Pandey + - Taniya Das + +description: | + Qualcomm global clock control module provides the clocks, resets and power + domains on SM4450 + + See also:: include/dt-bindings/clock/qcom,sm4450-gcc.h + +properties: + compatible: + const: qcom,sm4450-gcc + + clocks: + items: + - description: Board XO source + - description: Sleep clock source + - description: UFS Phy Rx symbol 0 clock source + - description: UFS Phy Rx symbol 1 clock source + - description: UFS Phy Tx symbol 0 clock source + - description: USB3 Phy wrapper pipe clock source + +required: + - compatible + - clocks + +allOf: + - $ref: qcom,gcc.yaml# + +unevaluatedProperties: false + +examples: + - | + #include + clock-controller@100000 { + compatible = "qcom,sm4450-gcc"; + reg = <0x00100000 0x001f4200>; + clocks = <&rpmhcc RPMH_CXO_CLK>, <&sleep_clk>, + <&ufs_mem_phy 0>, <&ufs_mem_phy 1>, + <&ufs_mem_phy 2>, <&usb_1_qmpphy>; + #clock-cells = <1>; + #reset-cells = <1>; + #power-domain-cells = <1>; + }; + +... diff --git a/Documentation/devicetree/bindings/clock/qcom,sm8450-camcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm8450-camcc.yaml index 8178c35bc3..dc3c18e4ea 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm8450-camcc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm8450-camcc.yaml @@ -13,11 +13,15 @@ description: | Qualcomm camera clock control module provides the clocks, resets and power domains on SM8450. - See also:: include/dt-bindings/clock/qcom,sm8450-camcc.h + See also:: + include/dt-bindings/clock/qcom,sm8450-camcc.h + include/dt-bindings/clock/qcom,sm8550-camcc.h properties: compatible: - const: qcom,sm8450-camcc + enum: + - qcom,sm8450-camcc + - qcom,sm8550-camcc clocks: items: diff --git a/Documentation/devicetree/bindings/clock/renesas,rzg2l-cpg.yaml b/Documentation/devicetree/bindings/clock/renesas,rzg2l-cpg.yaml index fe2fba18ae..80a8c7114c 100644 --- a/Documentation/devicetree/bindings/clock/renesas,rzg2l-cpg.yaml +++ b/Documentation/devicetree/bindings/clock/renesas,rzg2l-cpg.yaml @@ -27,6 +27,7 @@ properties: - renesas,r9a07g043-cpg # RZ/G2UL{Type-1,Type-2} and RZ/Five - renesas,r9a07g044-cpg # RZ/G2{L,LC} - renesas,r9a07g054-cpg # RZ/V2L + - renesas,r9a08g045-cpg # RZ/G3S - renesas,r9a09g011-cpg # RZ/V2M reg: diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml index c1d225fcf2..56fc71d6a0 100644 --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml @@ -23,6 +23,7 @@ properties: - enum: - qcom,qcm2290-cpufreq-hw - qcom,sc7180-cpufreq-hw + - qcom,sdm670-cpufreq-hw - qcom,sdm845-cpufreq-hw - qcom,sm6115-cpufreq-hw - qcom,sm6350-cpufreq-hw @@ -36,11 +37,13 @@ properties: - qcom,sa8775p-cpufreq-epss - qcom,sc7280-cpufreq-epss - qcom,sc8280xp-cpufreq-epss + - qcom,sdx75-cpufreq-epss - qcom,sm6375-cpufreq-epss - qcom,sm8250-cpufreq-epss - qcom,sm8350-cpufreq-epss - qcom,sm8450-cpufreq-epss - qcom,sm8550-cpufreq-epss + - qcom,sm8650-cpufreq-epss - const: qcom,cpufreq-epss reg: @@ -128,6 +131,7 @@ allOf: - qcom,qdu1000-cpufreq-epss - qcom,sc7180-cpufreq-hw - qcom,sc8280xp-cpufreq-epss + - qcom,sdm670-cpufreq-hw - qcom,sdm845-cpufreq-hw - qcom,sm6115-cpufreq-hw - qcom,sm6350-cpufreq-hw diff --git a/Documentation/devicetree/bindings/cpufreq/qcom-cpufreq-nvmem.yaml b/Documentation/devicetree/bindings/cpufreq/qcom-cpufreq-nvmem.yaml index 7e1bb992ce..547265b8b1 100644 --- a/Documentation/devicetree/bindings/cpufreq/qcom-cpufreq-nvmem.yaml +++ b/Documentation/devicetree/bindings/cpufreq/qcom-cpufreq-nvmem.yaml @@ -27,8 +27,12 @@ select: enum: - qcom,apq8064 - qcom,apq8096 + - qcom,ipq5332 + - qcom,ipq6018 - qcom,ipq8064 - qcom,ipq8074 + - qcom,ipq9574 + - qcom,msm8909 - qcom,msm8939 - qcom,msm8960 - qcom,msm8974 @@ -43,7 +47,9 @@ patternProperties: - if: properties: compatible: - const: operating-points-v2-kryo-cpu + enum: + - operating-points-v2-krait-cpu + - operating-points-v2-kryo-cpu then: $ref: /schemas/opp/opp-v2-kryo-cpu.yaml# diff --git a/Documentation/devicetree/bindings/crypto/fsl-imx-sahara.yaml b/Documentation/devicetree/bindings/crypto/fsl-imx-sahara.yaml index d531f3af3e..41df80bcdc 100644 --- a/Documentation/devicetree/bindings/crypto/fsl-imx-sahara.yaml +++ b/Documentation/devicetree/bindings/crypto/fsl-imx-sahara.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/crypto/fsl-imx-sahara.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Freescale SAHARA Cryptographic Accelerator included in some i.MX chips +title: Freescale SAHARA Cryptographic Accelerator maintainers: - Steffen Trumtrar @@ -19,19 +19,56 @@ properties: maxItems: 1 interrupts: - maxItems: 1 + items: + - description: SAHARA Interrupt for Host 0 + - description: SAHARA Interrupt for Host 1 + minItems: 1 + + clocks: + items: + - description: Sahara IPG clock + - description: Sahara AHB clock + + clock-names: + items: + - const: ipg + - const: ahb required: - compatible - reg - interrupts + - clocks + - clock-names + +allOf: + - if: + properties: + compatible: + contains: + enum: + - fsl,imx53-sahara + then: + properties: + interrupts: + minItems: 2 + maxItems: 2 + else: + properties: + interrupts: + maxItems: 1 additionalProperties: false examples: - | + #include + crypto@10025000 { compatible = "fsl,imx27-sahara"; - reg = < 0x10025000 0x800>; + reg = <0x10025000 0x800>; interrupts = <75>; + clocks = <&clks IMX27_CLK_SAHARA_IPG_GATE>, + <&clks IMX27_CLK_SAHARA_AHB_GATE>; + clock-names = "ipg", "ahb"; }; diff --git a/Documentation/devicetree/bindings/crypto/qcom,inline-crypto-engine.yaml b/Documentation/devicetree/bindings/crypto/qcom,inline-crypto-engine.yaml index 7da9aa82d8..ca4f7d1cef 100644 --- a/Documentation/devicetree/bindings/crypto/qcom,inline-crypto-engine.yaml +++ b/Documentation/devicetree/bindings/crypto/qcom,inline-crypto-engine.yaml @@ -13,6 +13,7 @@ properties: compatible: items: - enum: + - qcom,sa8775p-inline-crypto-engine - qcom,sm8450-inline-crypto-engine - qcom,sm8550-inline-crypto-engine - const: qcom,inline-crypto-engine diff --git a/Documentation/devicetree/bindings/crypto/qcom,prng.yaml b/Documentation/devicetree/bindings/crypto/qcom,prng.yaml index bb42f4588b..13070db0f7 100644 --- a/Documentation/devicetree/bindings/crypto/qcom,prng.yaml +++ b/Documentation/devicetree/bindings/crypto/qcom,prng.yaml @@ -11,9 +11,17 @@ maintainers: properties: compatible: - enum: - - qcom,prng # 8916 etc. - - qcom,prng-ee # 8996 and later using EE + oneOf: + - enum: + - qcom,prng # 8916 etc. + - qcom,prng-ee # 8996 and later using EE + - items: + - enum: + - qcom,sa8775p-trng + - qcom,sc7280-trng + - qcom,sm8450-trng + - qcom,sm8550-trng + - const: qcom,trng reg: maxItems: 1 @@ -28,8 +36,18 @@ properties: required: - compatible - reg - - clocks - - clock-names + +allOf: + - if: + not: + properties: + compatible: + contains: + const: qcom,trng + then: + required: + - clocks + - clock-names additionalProperties: false diff --git a/Documentation/devicetree/bindings/devfreq/event/rockchip,dfi.yaml b/Documentation/devicetree/bindings/devfreq/event/rockchip,dfi.yaml new file mode 100644 index 0000000000..50d3fabe95 --- /dev/null +++ b/Documentation/devicetree/bindings/devfreq/event/rockchip,dfi.yaml @@ -0,0 +1,74 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/devfreq/event/rockchip,dfi.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Rockchip DFI + +maintainers: + - Sascha Hauer + +properties: + compatible: + enum: + - rockchip,rk3399-dfi + - rockchip,rk3568-dfi + - rockchip,rk3588-dfi + + clocks: + maxItems: 1 + + clock-names: + items: + - const: pclk_ddr_mon + + interrupts: + minItems: 1 + maxItems: 4 + + reg: + maxItems: 1 + + rockchip,pmu: + $ref: /schemas/types.yaml#/definitions/phandle + description: + Phandle to the syscon managing the "PMU general register files". + +required: + - compatible + - interrupts + - reg + +if: + properties: + compatible: + contains: + enum: + - rockchip,rk3399-dfi + +then: + required: + - clocks + - clock-names + +additionalProperties: false + +examples: + - | + #include + #include + + bus { + #address-cells = <2>; + #size-cells = <2>; + + dfi: dfi@ff630000 { + compatible = "rockchip,rk3399-dfi"; + reg = <0x00 0xff630000 0x00 0x4000>; + interrupts = ; + rockchip,pmu = <&pmugrf>; + clocks = <&cru PCLK_DDR_MON>; + clock-names = "pclk_ddr_mon"; + }; + }; diff --git a/Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt b/Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt deleted file mode 100644 index 148191b0fc..0000000000 --- a/Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt +++ /dev/null @@ -1,18 +0,0 @@ - -* Rockchip rk3399 DFI device - -Required properties: -- compatible: Must be "rockchip,rk3399-dfi". -- reg: physical base address of each DFI and length of memory mapped region -- rockchip,pmu: phandle to the syscon managing the "pmu general register files" -- clocks: phandles for clock specified in "clock-names" property -- clock-names : the name of clock used by the DFI, must be "pclk_ddr_mon"; - -Example: - dfi: dfi@ff630000 { - compatible = "rockchip,rk3399-dfi"; - reg = <0x00 0xff630000 0x00 0x4000>; - rockchip,pmu = <&pmugrf>; - clocks = <&cru PCLK_DDR_MON>; - clock-names = "pclk_ddr_mon"; - }; diff --git a/Documentation/devicetree/bindings/display/bridge/adi,adv7533.yaml b/Documentation/devicetree/bindings/display/bridge/adi,adv7533.yaml index 987aa83c26..df20a3c9c7 100644 --- a/Documentation/devicetree/bindings/display/bridge/adi,adv7533.yaml +++ b/Documentation/devicetree/bindings/display/bridge/adi,adv7533.yaml @@ -9,6 +9,9 @@ title: Analog Devices ADV7533/35 HDMI Encoders maintainers: - Laurent Pinchart +allOf: + - $ref: /schemas/sound/dai-common.yaml# + description: | The ADV7533 and ADV7535 are HDMI audio and video transmitters compatible with HDMI 1.4 and DVI 1.0. They support color space @@ -89,6 +92,9 @@ properties: $ref: /schemas/types.yaml#/definitions/uint32 enum: [ 1, 2, 3, 4 ] + "#sound-dai-cells": + const: 0 + ports: description: The ADV7533/35 has two video ports and one audio port. diff --git a/Documentation/devicetree/bindings/display/bridge/analogix,anx7814.yaml b/Documentation/devicetree/bindings/display/bridge/analogix,anx7814.yaml index 4a5e5d9d6f..4509c49673 100644 --- a/Documentation/devicetree/bindings/display/bridge/analogix,anx7814.yaml +++ b/Documentation/devicetree/bindings/display/bridge/analogix,anx7814.yaml @@ -17,6 +17,7 @@ properties: - analogix,anx7808 - analogix,anx7812 - analogix,anx7814 + - analogix,anx7816 - analogix,anx7818 reg: diff --git a/Documentation/devicetree/bindings/display/bridge/fsl,imx93-mipi-dsi.yaml b/Documentation/devicetree/bindings/display/bridge/fsl,imx93-mipi-dsi.yaml new file mode 100644 index 0000000000..d6e51d0cf5 --- /dev/null +++ b/Documentation/devicetree/bindings/display/bridge/fsl,imx93-mipi-dsi.yaml @@ -0,0 +1,115 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/bridge/fsl,imx93-mipi-dsi.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Freescale i.MX93 specific extensions to Synopsys Designware MIPI DSI + +maintainers: + - Liu Ying + +description: | + There is a Synopsys Designware MIPI DSI Host Controller and a Synopsys + Designware MIPI DPHY embedded in Freescale i.MX93 SoC. Some configurations + and extensions to them are controlled by i.MX93 media blk-ctrl. + +allOf: + - $ref: snps,dw-mipi-dsi.yaml# + +properties: + compatible: + const: fsl,imx93-mipi-dsi + + clocks: + items: + - description: apb clock + - description: pixel clock + - description: PHY configuration clock + - description: PHY reference clock + + clock-names: + items: + - const: pclk + - const: pix + - const: phy_cfg + - const: phy_ref + + interrupts: + maxItems: 1 + + fsl,media-blk-ctrl: + $ref: /schemas/types.yaml#/definitions/phandle + description: + i.MX93 media blk-ctrl, as a syscon, controls pixel component bit map + configurations from LCDIF display controller to the MIPI DSI host + controller and MIPI DPHY PLL related configurations through PLL SoC + interface. + + power-domains: + maxItems: 1 + +required: + - compatible + - interrupts + - fsl,media-blk-ctrl + - power-domains + +unevaluatedProperties: false + +examples: + - | + #include + #include + #include + #include + + dsi@4ae10000 { + compatible = "fsl,imx93-mipi-dsi"; + reg = <0x4ae10000 0x10000>; + interrupts = ; + clocks = <&clk IMX93_CLK_MIPI_DSI_GATE>, + <&clk IMX93_CLK_MEDIA_DISP_PIX>, + <&clk IMX93_CLK_MIPI_PHY_CFG>, + <&clk IMX93_CLK_24M>; + clock-names = "pclk", "pix", "phy_cfg", "phy_ref"; + fsl,media-blk-ctrl = <&media_blk_ctrl>; + power-domains = <&media_blk_ctrl IMX93_MEDIABLK_PD_MIPI_DSI>; + #address-cells = <1>; + #size-cells = <0>; + + panel@0 { + compatible = "raydium,rm67191"; + reg = <0>; + reset-gpios = <&adp5585gpio 6 GPIO_ACTIVE_LOW>; + dsi-lanes = <4>; + video-mode = <2>; + + port { + panel_in: endpoint { + remote-endpoint = <&dsi_out>; + }; + }; + }; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + dsi_to_lcdif: endpoint { + remote-endpoint = <&lcdif_to_dsi>; + }; + }; + + port@1 { + reg = <1>; + + dsi_out: endpoint { + remote-endpoint = <&panel_in>; + }; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/display/fsl,lcdif.yaml b/Documentation/devicetree/bindings/display/fsl,lcdif.yaml index fc11ab5fc4..1c2be8d6f6 100644 --- a/Documentation/devicetree/bindings/display/fsl,lcdif.yaml +++ b/Documentation/devicetree/bindings/display/fsl,lcdif.yaml @@ -51,7 +51,10 @@ properties: minItems: 1 interrupts: - maxItems: 1 + items: + - description: LCDIF DMA interrupt + - description: LCDIF Error interrupt + minItems: 1 power-domains: maxItems: 1 @@ -131,6 +134,21 @@ allOf: then: required: - power-domains + - if: + properties: + compatible: + contains: + enum: + - fsl,imx23-lcdif + then: + properties: + interrupts: + minItems: 2 + maxItems: 2 + else: + properties: + interrupts: + maxItems: 1 examples: - | diff --git a/Documentation/devicetree/bindings/display/ilitek,ili9486.yaml b/Documentation/devicetree/bindings/display/ilitek,ili9486.yaml index 1f8f2182e2..9cc1fd0751 100644 --- a/Documentation/devicetree/bindings/display/ilitek,ili9486.yaml +++ b/Documentation/devicetree/bindings/display/ilitek,ili9486.yaml @@ -50,10 +50,6 @@ examples: - | #include - backlight: backlight { - compatible = "gpio-backlight"; - gpios = <&gpio 22 GPIO_ACTIVE_HIGH>; - }; spi { #address-cells = <1>; #size-cells = <0>; diff --git a/Documentation/devicetree/bindings/display/lvds-data-mapping.yaml b/Documentation/devicetree/bindings/display/lvds-data-mapping.yaml new file mode 100644 index 0000000000..d68982fe2e --- /dev/null +++ b/Documentation/devicetree/bindings/display/lvds-data-mapping.yaml @@ -0,0 +1,84 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/lvds-data-mapping.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: LVDS Data Mapping + +maintainers: + - Laurent Pinchart + - Thierry Reding + +description: | + LVDS is a physical layer specification defined in ANSI/TIA/EIA-644-A. Multiple + incompatible data link layers have been used over time to transmit image data + to LVDS devices. This bindings supports devices compatible with the following + specifications. + + [JEIDA] "Digital Interface Standards for Monitor", JEIDA-59-1999, February + 1999 (Version 1.0), Japan Electronic Industry Development Association (JEIDA) + [LDI] "Open LVDS Display Interface", May 1999 (Version 0.95), National + Semiconductor + [VESA] "VESA Notebook Panel Standard", October 2007 (Version 1.0), Video + Electronics Standards Association (VESA) + + Device compatible with those specifications have been marketed under the + FPD-Link and FlatLink brands. + +properties: + data-mapping: + enum: + - jeida-18 + - jeida-24 + - vesa-24 + description: | + The color signals mapping order. + + LVDS data mappings are defined as follows. + + - "jeida-18" - 18-bit data mapping compatible with the [JEIDA], [LDI] and + [VESA] specifications. Data are transferred as follows on 3 LVDS lanes. + + Slot 0 1 2 3 4 5 6 + ________________ _________________ + Clock \_______________________/ + ______ ______ ______ ______ ______ ______ ______ + DATA0 ><__G0__><__R5__><__R4__><__R3__><__R2__><__R1__><__R0__>< + DATA1 ><__B1__><__B0__><__G5__><__G4__><__G3__><__G2__><__G1__>< + DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B5__><__B4__><__B3__><__B2__>< + + - "jeida-24" - 24-bit data mapping compatible with the [DSIM] and [LDI] + specifications. Data are transferred as follows on 4 LVDS lanes. + + Slot 0 1 2 3 4 5 6 + ________________ _________________ + Clock \_______________________/ + ______ ______ ______ ______ ______ ______ ______ + DATA0 ><__G2__><__R7__><__R6__><__R5__><__R4__><__R3__><__R2__>< + DATA1 ><__B3__><__B2__><__G7__><__G6__><__G5__><__G4__><__G3__>< + DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B7__><__B6__><__B5__><__B4__>< + DATA3 ><_CTL3_><__B1__><__B0__><__G1__><__G0__><__R1__><__R0__>< + + - "vesa-24" - 24-bit data mapping compatible with the [VESA] specification. + Data are transferred as follows on 4 LVDS lanes. + + Slot 0 1 2 3 4 5 6 + ________________ _________________ + Clock \_______________________/ + ______ ______ ______ ______ ______ ______ ______ + DATA0 ><__G0__><__R5__><__R4__><__R3__><__R2__><__R1__><__R0__>< + DATA1 ><__B1__><__B0__><__G5__><__G4__><__G3__><__G2__><__G1__>< + DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B5__><__B4__><__B3__><__B2__>< + DATA3 ><_CTL3_><__B7__><__B6__><__G7__><__G6__><__R7__><__R6__>< + + Control signals are mapped as follows. + + CTL0: HSync + CTL1: VSync + CTL2: Data Enable + CTL3: 0 + +additionalProperties: true + +... diff --git a/Documentation/devicetree/bindings/display/lvds.yaml b/Documentation/devicetree/bindings/display/lvds.yaml index 7cd2ce7e9c..224db49320 100644 --- a/Documentation/devicetree/bindings/display/lvds.yaml +++ b/Documentation/devicetree/bindings/display/lvds.yaml @@ -6,83 +6,24 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: LVDS Display Common Properties +allOf: + - $ref: lvds-data-mapping.yaml# + maintainers: - Laurent Pinchart - Thierry Reding -description: |+ - LVDS is a physical layer specification defined in ANSI/TIA/EIA-644-A. Multiple - incompatible data link layers have been used over time to transmit image data - to LVDS devices. This bindings supports devices compatible with the following - specifications. - - [JEIDA] "Digital Interface Standards for Monitor", JEIDA-59-1999, February - 1999 (Version 1.0), Japan Electronic Industry Development Association (JEIDA) - [LDI] "Open LVDS Display Interface", May 1999 (Version 0.95), National - Semiconductor - [VESA] "VESA Notebook Panel Standard", October 2007 (Version 1.0), Video - Electronics Standards Association (VESA) - - Device compatible with those specifications have been marketed under the - FPD-Link and FlatLink brands. +description: + This binding extends the data mapping defined in lvds-data-mapping.yaml. + It supports reversing the bit order on the formats defined there in order + to accomodate for even more specialized data formats, since a variety of + data formats and layouts is used to drive LVDS displays. properties: - data-mapping: - enum: - - jeida-18 - - jeida-24 - - vesa-24 - description: | - The color signals mapping order. - - LVDS data mappings are defined as follows. - - - "jeida-18" - 18-bit data mapping compatible with the [JEIDA], [LDI] and - [VESA] specifications. Data are transferred as follows on 3 LVDS lanes. - - Slot 0 1 2 3 4 5 6 - ________________ _________________ - Clock \_______________________/ - ______ ______ ______ ______ ______ ______ ______ - DATA0 ><__G0__><__R5__><__R4__><__R3__><__R2__><__R1__><__R0__>< - DATA1 ><__B1__><__B0__><__G5__><__G4__><__G3__><__G2__><__G1__>< - DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B5__><__B4__><__B3__><__B2__>< - - - "jeida-24" - 24-bit data mapping compatible with the [DSIM] and [LDI] - specifications. Data are transferred as follows on 4 LVDS lanes. - - Slot 0 1 2 3 4 5 6 - ________________ _________________ - Clock \_______________________/ - ______ ______ ______ ______ ______ ______ ______ - DATA0 ><__G2__><__R7__><__R6__><__R5__><__R4__><__R3__><__R2__>< - DATA1 ><__B3__><__B2__><__G7__><__G6__><__G5__><__G4__><__G3__>< - DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B7__><__B6__><__B5__><__B4__>< - DATA3 ><_CTL3_><__B1__><__B0__><__G1__><__G0__><__R1__><__R0__>< - - - "vesa-24" - 24-bit data mapping compatible with the [VESA] specification. - Data are transferred as follows on 4 LVDS lanes. - - Slot 0 1 2 3 4 5 6 - ________________ _________________ - Clock \_______________________/ - ______ ______ ______ ______ ______ ______ ______ - DATA0 ><__G0__><__R5__><__R4__><__R3__><__R2__><__R1__><__R0__>< - DATA1 ><__B1__><__B0__><__G5__><__G4__><__G3__><__G2__><__G1__>< - DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B5__><__B4__><__B3__><__B2__>< - DATA3 ><_CTL3_><__B7__><__B6__><__G7__><__G6__><__R7__><__R6__>< - - Control signals are mapped as follows. - - CTL0: HSync - CTL1: VSync - CTL2: Data Enable - CTL3: 0 - data-mirror: type: boolean description: - If set, reverse the bit order described in the data mappings below on all + If set, reverse the bit order described in the data mappings on all data lanes, transmitting bits for slots 6 to 0 instead of 0 to 6. additionalProperties: true diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,dp.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,dp.yaml index ff781f2174..2aef1eb32e 100644 --- a/Documentation/devicetree/bindings/display/mediatek/mediatek,dp.yaml +++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,dp.yaml @@ -21,6 +21,8 @@ description: | properties: compatible: enum: + - mediatek,mt8188-dp-tx + - mediatek,mt8188-edp-tx - mediatek,mt8195-dp-tx - mediatek,mt8195-edp-tx diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml index 12441b9376..ed24b61709 100644 --- a/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml +++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml @@ -10,7 +10,6 @@ maintainers: - Chun-Kuang Hu - Philipp Zabel - Jitao Shi - - Xinlei Lee description: | The MediaTek DSI function block is a sink of the display subsystem and can @@ -30,6 +29,7 @@ properties: - mediatek,mt8173-dsi - mediatek,mt8183-dsi - mediatek,mt8186-dsi + - mediatek,mt8188-dsi - items: - enum: - mediatek,mt6795-dsi diff --git a/Documentation/devicetree/bindings/display/msm/dp-controller.yaml b/Documentation/devicetree/bindings/display/msm/dp-controller.yaml index f12558960c..dbe398f84f 100644 --- a/Documentation/devicetree/bindings/display/msm/dp-controller.yaml +++ b/Documentation/devicetree/bindings/display/msm/dp-controller.yaml @@ -114,6 +114,7 @@ properties: port@1: $ref: /schemas/graph.yaml#/$defs/port-base + unevaluatedProperties: false description: Output endpoint of the controller properties: endpoint: diff --git a/Documentation/devicetree/bindings/display/msm/gmu.yaml b/Documentation/devicetree/bindings/display/msm/gmu.yaml index d65926b4f0..4e1c25b429 100644 --- a/Documentation/devicetree/bindings/display/msm/gmu.yaml +++ b/Documentation/devicetree/bindings/display/msm/gmu.yaml @@ -21,7 +21,7 @@ properties: compatible: oneOf: - items: - - pattern: '^qcom,adreno-gmu-6[0-9][0-9]\.[0-9]$' + - pattern: '^qcom,adreno-gmu-[67][0-9][0-9]\.[0-9]$' - const: qcom,adreno-gmu - const: qcom,adreno-gmu-wrapper @@ -64,6 +64,10 @@ properties: iommus: maxItems: 1 + qcom,qmp: + $ref: /schemas/types.yaml#/definitions/phandle + description: Reference to the AOSS side-channel message RAM + operating-points-v2: true opp-table: @@ -213,6 +217,47 @@ allOf: - const: axi - const: memnoc + - if: + properties: + compatible: + contains: + enum: + - qcom,adreno-gmu-730.1 + - qcom,adreno-gmu-740.1 + then: + properties: + reg: + items: + - description: Core GMU registers + - description: Resource controller registers + - description: GMU PDC registers + reg-names: + items: + - const: gmu + - const: rscc + - const: gmu_pdc + clocks: + items: + - description: GPU AHB clock + - description: GMU clock + - description: GPU CX clock + - description: GPU AXI clock + - description: GPU MEMNOC clock + - description: GMU HUB clock + - description: GPUSS DEMET clock + clock-names: + items: + - const: ahb + - const: gmu + - const: cxo + - const: axi + - const: memnoc + - const: hub + - const: demet + + required: + - qcom,qmp + - if: properties: compatible: diff --git a/Documentation/devicetree/bindings/display/msm/gpu.yaml b/Documentation/devicetree/bindings/display/msm/gpu.yaml index 56b9b247e8..b019db9547 100644 --- a/Documentation/devicetree/bindings/display/msm/gpu.yaml +++ b/Documentation/devicetree/bindings/display/msm/gpu.yaml @@ -23,7 +23,7 @@ properties: The driver is parsing the compat string for Adreno to figure out the gpu-id and patch level. items: - - pattern: '^qcom,adreno-[3-6][0-9][0-9]\.[0-9]$' + - pattern: '^qcom,adreno-[3-7][0-9][0-9]\.[0-9]$' - const: qcom,adreno - description: | The driver is parsing the compat string for Imageon to @@ -203,7 +203,7 @@ allOf: properties: compatible: contains: - pattern: '^qcom,adreno-6[0-9][0-9]\.[0-9]$' + pattern: '^qcom,adreno-[67][0-9][0-9]\.[0-9]$' then: # Starting with A6xx, the clocks are usually defined in the GMU node properties: diff --git a/Documentation/devicetree/bindings/display/msm/qcom,msm8998-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,msm8998-mdss.yaml index e320ab1de6..2d9edab5a3 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,msm8998-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,msm8998-mdss.yaml @@ -38,12 +38,16 @@ properties: patternProperties: "^display-controller@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,msm8998-dpu "^dsi@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: items: @@ -52,6 +56,8 @@ patternProperties: "^phy@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,dsi-phy-10nm-8998 diff --git a/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml index 4184b84d4c..5ad155612b 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml @@ -44,18 +44,24 @@ properties: patternProperties: "^display-controller@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,qcm2290-dpu "^dsi@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,dsi-ctrl-6g-qcm2290 "^phy@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,dsi-phy-14nm-2290 diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sc7180-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sc7180-mdss.yaml index 3b9c103e50..3432a2407c 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sc7180-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sc7180-mdss.yaml @@ -44,18 +44,24 @@ properties: patternProperties: "^display-controller@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,sc7180-dpu "^displayport-controller@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,sc7180-dp "^dsi@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: items: @@ -64,6 +70,8 @@ patternProperties: "^phy@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,dsi-phy-10nm diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sc7280-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sc7280-mdss.yaml index 43500dad66..bbb727831f 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sc7280-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sc7280-mdss.yaml @@ -44,18 +44,24 @@ properties: patternProperties: "^display-controller@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,sc7280-dpu "^displayport-controller@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,sc7280-dp "^dsi@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: items: @@ -64,12 +70,16 @@ patternProperties: "^edp@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,sc7280-edp "^phy@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: enum: diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sc8280xp-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sc8280xp-mdss.yaml index db680fb12b..af79406e16 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sc8280xp-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sc8280xp-mdss.yaml @@ -34,12 +34,16 @@ properties: patternProperties: "^display-controller@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,sc8280xp-dpu "^displayport-controller@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: enum: diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sdm845-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sdm845-mdss.yaml index d6d7ac1b2e..6e8b69e5ec 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sdm845-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sdm845-mdss.yaml @@ -42,18 +42,24 @@ properties: patternProperties: "^display-controller@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,sdm845-dpu "^displayport-controller@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,sdm845-dp "^dsi@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: items: @@ -62,6 +68,8 @@ patternProperties: "^phy@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,dsi-phy-10nm diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm6115-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm6115-mdss.yaml index 17221b62a6..dde5c2acea 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sm6115-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sm6115-mdss.yaml @@ -32,12 +32,16 @@ properties: patternProperties: "^display-controller@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,sm6115-dpu "^dsi@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: oneOf: @@ -50,6 +54,8 @@ patternProperties: "^phy@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,dsi-phy-14nm-2290 diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm6125-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm6125-mdss.yaml index 57f0e36477..671c2c2aa8 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sm6125-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sm6125-mdss.yaml @@ -43,12 +43,16 @@ properties: patternProperties: "^display-controller@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,sm6125-dpu "^dsi@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: items: @@ -57,6 +61,8 @@ patternProperties: "^phy@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,sm6125-dsi-phy-14nm diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm6350-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm6350-mdss.yaml index db255b1f4c..e1dcb45376 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sm6350-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sm6350-mdss.yaml @@ -43,12 +43,16 @@ properties: patternProperties: "^display-controller@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,sm6350-dpu "^dsi@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: items: @@ -57,6 +61,8 @@ patternProperties: "^phy@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,dsi-phy-10nm diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml index 30d36fffae..b15c3950f0 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml @@ -43,12 +43,16 @@ properties: patternProperties: "^display-controller@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,sm6375-dpu "^dsi@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: items: @@ -57,6 +61,8 @@ patternProperties: "^phy@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,sm6375-dsi-phy-7nm diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml index 54cdaa827c..a2a8be7f64 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml @@ -47,12 +47,16 @@ properties: patternProperties: "^display-controller@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,sm8150-dpu "^dsi@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: items: @@ -61,6 +65,8 @@ patternProperties: "^phy@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,dsi-phy-7nm diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm8250-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm8250-mdss.yaml index e887f031b8..994975909f 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sm8250-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8250-mdss.yaml @@ -46,12 +46,16 @@ properties: patternProperties: "^display-controller@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,sm8250-dpu "^dsi@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: items: @@ -60,6 +64,8 @@ patternProperties: "^phy@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,dsi-phy-7nm diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm8350-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm8350-mdss.yaml index 60d4aae113..163fc83c1e 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sm8350-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8350-mdss.yaml @@ -48,18 +48,24 @@ properties: patternProperties: "^display-controller@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,sm8350-dpu "^displayport-controller@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,sm8350-dp "^dsi@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: items: @@ -68,6 +74,8 @@ patternProperties: "^phy@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,sm8350-dsi-phy-5nm diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm8450-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm8450-mdss.yaml index bb22940b93..001b26e653 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sm8450-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8450-mdss.yaml @@ -38,12 +38,16 @@ properties: patternProperties: "^display-controller@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,sm8450-dpu "^displayport-controller@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: items: @@ -52,6 +56,8 @@ patternProperties: "^dsi@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: items: @@ -60,6 +66,8 @@ patternProperties: "^phy@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,sm8450-dsi-phy-5nm diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm8550-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm8550-mdss.yaml index 48aea8005c..1ea50a2c7c 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sm8550-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8550-mdss.yaml @@ -38,12 +38,16 @@ properties: patternProperties: "^display-controller@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,sm8550-dpu "^displayport-controller@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: items: @@ -52,6 +56,8 @@ patternProperties: "^dsi@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: items: @@ -60,6 +66,8 @@ patternProperties: "^phy@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: const: qcom,sm8550-dsi-phy-4nm diff --git a/Documentation/devicetree/bindings/display/panel/ilitek,ili9163.yaml b/Documentation/devicetree/bindings/display/panel/ilitek,ili9163.yaml index 90e323e19e..3cabbba865 100644 --- a/Documentation/devicetree/bindings/display/panel/ilitek,ili9163.yaml +++ b/Documentation/devicetree/bindings/display/panel/ilitek,ili9163.yaml @@ -48,10 +48,6 @@ examples: - | #include - backlight: backlight { - compatible = "gpio-backlight"; - gpios = <&gpio 22 GPIO_ACTIVE_HIGH>; - }; spi { #address-cells = <1>; #size-cells = <0>; diff --git a/Documentation/devicetree/bindings/display/panel/jdi,lpm102a188a.yaml b/Documentation/devicetree/bindings/display/panel/jdi,lpm102a188a.yaml new file mode 100644 index 0000000000..2f4d27a309 --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/jdi,lpm102a188a.yaml @@ -0,0 +1,94 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/jdi,lpm102a188a.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: JDI LPM102A188A 2560x1800 10.2" DSI Panel + +maintainers: + - Diogo Ivo + +description: | + This panel requires a dual-channel DSI host to operate. It supports two modes: + - left-right: each channel drives the left or right half of the screen + - even-odd: each channel drives the even or odd lines of the screen + + Each of the DSI channels controls a separate DSI peripheral. The peripheral + driven by the first link (DSI-LINK1) is considered the primary peripheral + and controls the device. The 'link2' property contains a phandle to the + peripheral driven by the second link (DSI-LINK2). + +allOf: + - $ref: panel-common.yaml# + +properties: + compatible: + const: jdi,lpm102a188a + + reg: true + enable-gpios: true + reset-gpios: true + power-supply: true + backlight: true + + ddi-supply: + description: The regulator that provides IOVCC (1.8V). + + link2: + $ref: /schemas/types.yaml#/definitions/phandle + description: | + phandle to the DSI peripheral on the secondary link. Note that the + presence of this property marks the containing node as DSI-LINK1. + +required: + - compatible + - reg + +if: + required: + - link2 +then: + required: + - power-supply + - ddi-supply + - enable-gpios + - reset-gpios + +additionalProperties: false + +examples: + - | + #include + #include + + dsia: dsi@54300000 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0x0 0x54300000 0x0 0x00040000>; + + link2: panel@0 { + compatible = "jdi,lpm102a188a"; + reg = <0>; + }; + }; + + dsib: dsi@54400000{ + #address-cells = <1>; + #size-cells = <0>; + reg = <0x0 0x54400000 0x0 0x00040000>; + nvidia,ganged-mode = <&dsia>; + + link1: panel@0 { + compatible = "jdi,lpm102a188a"; + reg = <0>; + power-supply = <&pplcd_vdd>; + ddi-supply = <&pp1800_lcdio>; + enable-gpios = <&gpio TEGRA_GPIO(V, 1) GPIO_ACTIVE_HIGH>; + reset-gpios = <&gpio TEGRA_GPIO(V, 2) GPIO_ACTIVE_LOW>; + link2 = <&link2>; + backlight = <&backlight>; + }; + }; + +... diff --git a/Documentation/devicetree/bindings/display/panel/leadtek,ltk050h3146w.yaml b/Documentation/devicetree/bindings/display/panel/leadtek,ltk050h3146w.yaml index 3f6efbb942..a40ab887ad 100644 --- a/Documentation/devicetree/bindings/display/panel/leadtek,ltk050h3146w.yaml +++ b/Documentation/devicetree/bindings/display/panel/leadtek,ltk050h3146w.yaml @@ -17,6 +17,7 @@ properties: enum: - leadtek,ltk050h3146w - leadtek,ltk050h3146w-a2 + - leadtek,ltk050h3148w reg: true backlight: true reset-gpios: true diff --git a/Documentation/devicetree/bindings/display/panel/newvision,nv3051d.yaml b/Documentation/devicetree/bindings/display/panel/newvision,nv3051d.yaml index 116c1b6030..cce775a87f 100644 --- a/Documentation/devicetree/bindings/display/panel/newvision,nv3051d.yaml +++ b/Documentation/devicetree/bindings/display/panel/newvision,nv3051d.yaml @@ -7,9 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: NewVision NV3051D based LCD panel description: | - The NewVision NV3051D is a driver chip used to drive DSI panels. For now, - this driver only supports the 640x480 panels found in the Anbernic RG353 - based devices. + The NewVision NV3051D is a driver chip used to drive DSI panels. maintainers: - Chris Morgan @@ -21,6 +19,7 @@ properties: compatible: items: - enum: + - anbernic,rg351v-panel - anbernic,rg353p-panel - anbernic,rg353v-panel - const: newvision,nv3051d diff --git a/Documentation/devicetree/bindings/display/panel/panel-simple-dsi.yaml b/Documentation/devicetree/bindings/display/panel/panel-simple-dsi.yaml index 73674baea7..f9160d7bac 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-simple-dsi.yaml +++ b/Documentation/devicetree/bindings/display/panel/panel-simple-dsi.yaml @@ -42,6 +42,8 @@ properties: - lg,acx467akm-7 # LG Corporation 7" WXGA TFT LCD panel - lg,ld070wx3-sl01 + # LG Corporation 5" HD TFT LCD panel + - lg,lh500wx1-sd03 # One Stop Displays OSD101T2587-53TS 10.1" 1920x1200 panel - osddisplays,osd101t2587-53ts # Panasonic 10" WUXGA TFT LCD panel diff --git a/Documentation/devicetree/bindings/display/panel/panel-simple-lvds-dual-ports.yaml b/Documentation/devicetree/bindings/display/panel/panel-simple-lvds-dual-ports.yaml new file mode 100644 index 0000000000..a5a596ff8e --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/panel-simple-lvds-dual-ports.yaml @@ -0,0 +1,118 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/panel-simple-lvds-dual-ports.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Simple LVDS panels with one power supply and dual LVDS ports + +maintainers: + - Liu Ying + - Thierry Reding + - Sam Ravnborg + +description: | + This binding file is a collection of the LVDS panels that + has dual LVDS ports and requires only a single power-supply. + The first port receives odd pixels, and the second port receives even pixels. + There are optionally a backlight and an enable GPIO. + The panel may use an OF graph binding for the association to the display, + or it may be a direct child node of the display. + + If the panel is more advanced a dedicated binding file is required. + +allOf: + - $ref: panel-common.yaml# + +properties: + + compatible: + enum: + # compatible must be listed in alphabetical order, ordered by compatible. + # The description in the comment is mandatory for each compatible. + + # AU Optronics Corporation 13.3" FHD (1920x1080) TFT LCD panel + - auo,g133han01 + # AU Optronics Corporation 18.5" FHD (1920x1080) TFT LCD panel + - auo,g185han01 + # AU Optronics Corporation 19.0" (1280x1024) TFT LCD panel + - auo,g190ean01 + # Kaohsiung Opto-Electronics Inc. 10.1" WUXGA (1920 x 1200) LVDS TFT LCD panel + - koe,tx26d202vm0bwa + # NLT Technologies, Ltd. 15.6" FHD (1920x1080) LVDS TFT LCD panel + - nlt,nl192108ac18-02d + + ports: + $ref: /schemas/graph.yaml#/properties/ports + + properties: + port@0: + $ref: /schemas/graph.yaml#/$defs/port-base + unevaluatedProperties: false + description: The first sink port. + + properties: + dual-lvds-odd-pixels: + type: boolean + description: The first sink port for odd pixels. + + required: + - dual-lvds-odd-pixels + + port@1: + $ref: /schemas/graph.yaml#/$defs/port-base + unevaluatedProperties: false + description: The second sink port. + + properties: + dual-lvds-even-pixels: + type: boolean + description: The second sink port for even pixels. + + required: + - dual-lvds-even-pixels + + required: + - port@0 + - port@1 + + backlight: true + enable-gpios: true + power-supply: true + +additionalProperties: false + +required: + - compatible + - ports + - power-supply + +examples: + - | + panel: panel-lvds { + compatible = "koe,tx26d202vm0bwa"; + power-supply = <&vdd_lcd_reg>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + dual-lvds-odd-pixels; + reg = <0>; + + panel_lvds0_in: endpoint { + remote-endpoint = <&lvds0_out>; + }; + }; + + port@1 { + dual-lvds-even-pixels; + reg = <1>; + + panel_lvds1_in: endpoint { + remote-endpoint = <&lvds1_out>; + }; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml index 25b4589d4a..11422af347 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml +++ b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml @@ -21,9 +21,9 @@ description: | allOf: - $ref: panel-common.yaml# + - $ref: ../lvds-data-mapping.yaml# properties: - compatible: enum: # compatible must be listed in alphabetical order, ordered by compatible. @@ -65,14 +65,8 @@ properties: - auo,g104sn02 # AU Optronics Corporation 12.1" (1280x800) TFT LCD panel - auo,g121ean01 - # AU Optronics Corporation 13.3" FHD (1920x1080) TFT LCD panel - - auo,g133han01 # AU Optronics Corporation 15.6" (1366x768) TFT LCD panel - auo,g156xtn01 - # AU Optronics Corporation 18.5" FHD (1920x1080) TFT LCD panel - - auo,g185han01 - # AU Optronics Corporation 19.0" (1280x1024) TFT LCD panel - - auo,g190ean01 # AU Optronics Corporation 31.5" FHD (1920x1080) TFT LCD panel - auo,p320hvn03 # AU Optronics Corporation 21.5" FHD (1920x1080) color TFT LCD panel @@ -204,8 +198,6 @@ properties: - kingdisplay,kd116n21-30nv-a010 # Kaohsiung Opto-Electronics Inc. 5.7" QVGA (320 x 240) TFT LCD panel - koe,tx14d24vm1bpa - # Kaohsiung Opto-Electronics Inc. 10.1" WUXGA (1920 x 1200) LVDS TFT LCD panel - - koe,tx26d202vm0bwa # Kaohsiung Opto-Electronics. TX31D200VM0BAA 12.3" HSXGA LVDS panel - koe,tx31d200vm0baa # Kyocera Corporation 7" WVGA (800x480) transmissive color TFT @@ -216,8 +208,6 @@ properties: - lemaker,bl035-rgb-002 # LG 7" (800x480 pixels) TFT LCD panel - lg,lb070wv8 - # LG Corporation 5" HD TFT LCD panel - - lg,lh500wx1-sd03 # LG LP079QX1-SP0V 7.9" (1536x2048 pixels) TFT LCD panel - lg,lp079qx1-sp0v # LG 9.7" (2048x1536 pixels) TFT LCD panel @@ -238,6 +228,8 @@ properties: - logictechno,lttd800480070-l6wh-rt # Mitsubishi "AA070MC01 7.0" WVGA TFT LCD panel - mitsubishi,aa070mc01-ca1 + # Mitsubishi AA084XE01 8.4" XGA TFT LCD panel + - mitsubishi,aa084xe01 # Multi-Inno Technology Co.,Ltd MI0700S4T-6 7" 800x480 TFT Resistive Touch Module - multi-inno,mi0700s4t-6 # Multi-Inno Technology Co.,Ltd MI0800FT-9 8" 800x600 TFT Resistive Touch Module @@ -254,8 +246,6 @@ properties: - neweast,wjfh116008a # Newhaven Display International 480 x 272 TFT LCD panel - newhaven,nhd-4.3-480272ef-atxl - # NLT Technologies, Ltd. 15.6" FHD (1920x1080) LVDS TFT LCD panel - - nlt,nl192108ac18-02d # New Vision Display 7.0" 800 RGB x 480 TFT LCD panel - nvd,9128 # OKAYA Electric America, Inc. RS800480T-7X0GP 7" WVGA LCD panel @@ -357,6 +347,17 @@ properties: power-supply: true no-hpd: true hpd-gpios: true + data-mapping: true + +if: + not: + properties: + compatible: + contains: + const: innolux,g101ice-l01 +then: + properties: + data-mapping: false additionalProperties: false @@ -376,3 +377,16 @@ examples: }; }; }; + - | + panel_lvds: panel-lvds { + compatible = "innolux,g101ice-l01"; + power-supply = <&vcc_lcd_reg>; + + data-mapping = "jeida-24"; + + port { + panel_in_lvds: endpoint { + remote-endpoint = <<dc_out_lvds>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/display/panel/raydium,rm692e5.yaml b/Documentation/devicetree/bindings/display/panel/raydium,rm692e5.yaml new file mode 100644 index 0000000000..f436ba6738 --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/raydium,rm692e5.yaml @@ -0,0 +1,73 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/raydium,rm692e5.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Raydium RM692E5 based DSI display panels + +maintainers: + - Konrad Dybcio + +description: + The Raydium RM692E5 is a generic DSI Panel IC used to control + AMOLED panels. + +allOf: + - $ref: panel-common.yaml# + +properties: + compatible: + items: + - const: fairphone,fp5-rm692e5-boe + - const: raydium,rm692e5 + + dvdd-supply: + description: Digital voltage rail + + vci-supply: + description: Analog voltage rail + + vddio-supply: + description: I/O voltage rail + + reg: true + port: true + +required: + - compatible + - reg + - reset-gpios + - dvdd-supply + - vci-supply + - vddio-supply + - port + +unevaluatedProperties: false + +examples: + - | + #include + + dsi { + #address-cells = <1>; + #size-cells = <0>; + + panel@0 { + compatible = "fairphone,fp5-rm692e5-boe", "raydium,rm692e5"; + reg = <0>; + + reset-gpios = <&tlmm 44 GPIO_ACTIVE_LOW>; + dvdd-supply = <&vreg_oled_vci>; + vci-supply = <&vreg_l12c>; + vddio-supply = <&vreg_oled_dvdd>; + + port { + panel_in_0: endpoint { + remote-endpoint = <&dsi0_out>; + }; + }; + }; + }; + +... diff --git a/Documentation/devicetree/bindings/display/panel/rocktech,jh057n00900.yaml b/Documentation/devicetree/bindings/display/panel/rocktech,jh057n00900.yaml index 5ea74426b1..97cccd8a84 100644 --- a/Documentation/devicetree/bindings/display/panel/rocktech,jh057n00900.yaml +++ b/Documentation/devicetree/bindings/display/panel/rocktech,jh057n00900.yaml @@ -22,6 +22,8 @@ properties: enum: # Anberic RG353V-V2 5.0" 640x480 TFT LCD panel - anbernic,rg353v-panel-v2 + # Powkiddy RGB30 3.0" 720x720 TFT LCD panel + - powkiddy,rgb30-panel # Rocktech JH057N00900 5.5" 720x1440 TFT LCD panel - rocktech,jh057n00900 # Xingbangda XBD599 5.99" 720x1440 TFT LCD panel diff --git a/Documentation/devicetree/bindings/display/renesas,shmobile-lcdc.yaml b/Documentation/devicetree/bindings/display/renesas,shmobile-lcdc.yaml new file mode 100644 index 0000000000..9816c4cacc --- /dev/null +++ b/Documentation/devicetree/bindings/display/renesas,shmobile-lcdc.yaml @@ -0,0 +1,130 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/renesas,shmobile-lcdc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Renesas SH-Mobile LCD Controller (LCDC) + +maintainers: + - Laurent Pinchart + - Geert Uytterhoeven + +properties: + compatible: + enum: + - renesas,r8a7740-lcdc # R-Mobile A1 + - renesas,sh73a0-lcdc # SH-Mobile AG5 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + minItems: 1 + maxItems: 5 + description: + Only the functional clock is mandatory. + Some of the optional clocks are model-dependent (e.g. "video" (a.k.a. + "vou" or "dv_clk") is available on R-Mobile A1 only). + + clock-names: + minItems: 1 + items: + - const: fck + - enum: [ media, lclk, hdmi, video ] + - enum: [ media, lclk, hdmi, video ] + - enum: [ media, lclk, hdmi, video ] + - enum: [ media, lclk, hdmi, video ] + + power-domains: + maxItems: 1 + + ports: + $ref: /schemas/graph.yaml#/properties/ports + + properties: + port@0: + $ref: /schemas/graph.yaml#/properties/port + description: LCD port (R-Mobile A1 and SH-Mobile AG5) + unevaluatedProperties: false + + port@1: + $ref: /schemas/graph.yaml#/properties/port + description: HDMI port (R-Mobile A1 LCDC1 and SH-Mobile AG5) + unevaluatedProperties: false + + port@2: + $ref: /schemas/graph.yaml#/properties/port + description: MIPI-DSI port (SH-Mobile AG5) + unevaluatedProperties: false + + required: + - port@0 + + unevaluatedProperties: false + +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + - power-domains + - ports + +additionalProperties: false + +allOf: + - if: + properties: + compatible: + contains: + const: renesas,r8a7740-lcdc + then: + properties: + ports: + properties: + port@2: false + + - if: + properties: + compatible: + contains: + const: renesas,sh73a0-lcdc + then: + properties: + ports: + required: + - port@1 + - port@2 + +examples: + - | + #include + #include + + lcd-controller@fe940000 { + compatible = "renesas,r8a7740-lcdc"; + reg = <0xfe940000 0x4000>; + interrupts = ; + clocks = <&mstp1_clks R8A7740_CLK_LCDC0>, + <&cpg_clocks R8A7740_CLK_M3>, <&lcdlclk0_clk>, + <&vou_clk>; + clock-names = "fck", "media", "lclk", "video"; + power-domains = <&pd_a4lc>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + lcdc0_rgb: endpoint { + }; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/display/rockchip/rockchip,dw-mipi-dsi.yaml b/Documentation/devicetree/bindings/display/rockchip/rockchip,dw-mipi-dsi.yaml index 8e8a408791..ccf79e738f 100644 --- a/Documentation/devicetree/bindings/display/rockchip/rockchip,dw-mipi-dsi.yaml +++ b/Documentation/devicetree/bindings/display/rockchip/rockchip,dw-mipi-dsi.yaml @@ -18,6 +18,7 @@ properties: - rockchip,rk3288-mipi-dsi - rockchip,rk3399-mipi-dsi - rockchip,rk3568-mipi-dsi + - rockchip,rv1126-mipi-dsi - const: snps,dw-mipi-dsi interrupts: @@ -77,6 +78,7 @@ allOf: enum: - rockchip,px30-mipi-dsi - rockchip,rk3568-mipi-dsi + - rockchip,rv1126-mipi-dsi then: properties: diff --git a/Documentation/devicetree/bindings/display/rockchip/rockchip-vop.yaml b/Documentation/devicetree/bindings/display/rockchip/rockchip-vop.yaml index df61cb5f5c..b339b7e708 100644 --- a/Documentation/devicetree/bindings/display/rockchip/rockchip-vop.yaml +++ b/Documentation/devicetree/bindings/display/rockchip/rockchip-vop.yaml @@ -31,6 +31,7 @@ properties: - rockchip,rk3368-vop - rockchip,rk3399-vop-big - rockchip,rk3399-vop-lit + - rockchip,rv1126-vop reg: minItems: 1 diff --git a/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml b/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml index 621f271484..3b0ebc0db8 100644 --- a/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml +++ b/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml @@ -54,11 +54,6 @@ examples: - | #include - backlight: backlight { - compatible = "gpio-backlight"; - gpios = <&gpio 44 GPIO_ACTIVE_HIGH>; - }; - spi { #address-cells = <1>; #size-cells = <0>; diff --git a/Documentation/devicetree/bindings/display/solomon,ssd-common.yaml b/Documentation/devicetree/bindings/display/solomon,ssd-common.yaml new file mode 100644 index 0000000000..3e6998481a --- /dev/null +++ b/Documentation/devicetree/bindings/display/solomon,ssd-common.yaml @@ -0,0 +1,42 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/solomon,ssd-common.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Common properties for Solomon OLED Display Controllers + +maintainers: + - Javier Martinez Canillas + +properties: + reg: + maxItems: 1 + + reset-gpios: + maxItems: 1 + + # Only required for SPI + dc-gpios: + description: + GPIO connected to the controller's D/C# (Data/Command) pin, + that is needed for 4-wire SPI to tell the controller if the + data sent is for a command register or the display data RAM + maxItems: 1 + + solomon,height: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + Height in pixel of the screen driven by the controller. + The default value is controller-dependent. + + solomon,width: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + Width in pixel of the screen driven by the controller. + The default value is controller-dependent. + +allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml# + +additionalProperties: true diff --git a/Documentation/devicetree/bindings/display/solomon,ssd1307fb.yaml b/Documentation/devicetree/bindings/display/solomon,ssd1307fb.yaml index 20e2bd15d4..3afbb52d1b 100644 --- a/Documentation/devicetree/bindings/display/solomon,ssd1307fb.yaml +++ b/Documentation/devicetree/bindings/display/solomon,ssd1307fb.yaml @@ -27,38 +27,12 @@ properties: - solomon,ssd1307 - solomon,ssd1309 - reg: - maxItems: 1 - pwms: maxItems: 1 - reset-gpios: - maxItems: 1 - - # Only required for SPI - dc-gpios: - description: - GPIO connected to the controller's D/C# (Data/Command) pin, - that is needed for 4-wire SPI to tell the controller if the - data sent is for a command register or the display data RAM - maxItems: 1 - vbat-supply: description: The supply for VBAT - solomon,height: - $ref: /schemas/types.yaml#/definitions/uint32 - description: - Height in pixel of the screen driven by the controller. - The default value is controller-dependent. - - solomon,width: - $ref: /schemas/types.yaml#/definitions/uint32 - description: - Width in pixel of the screen driven by the controller. - The default value is controller-dependent. - solomon,page-offset: $ref: /schemas/types.yaml#/definitions/uint32 default: 1 @@ -148,7 +122,7 @@ required: - reg allOf: - - $ref: /schemas/spi/spi-peripheral-props.yaml# + - $ref: solomon,ssd-common.yaml# - if: properties: diff --git a/Documentation/devicetree/bindings/display/solomon,ssd132x.yaml b/Documentation/devicetree/bindings/display/solomon,ssd132x.yaml new file mode 100644 index 0000000000..37975ee61c --- /dev/null +++ b/Documentation/devicetree/bindings/display/solomon,ssd132x.yaml @@ -0,0 +1,89 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/solomon,ssd132x.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Solomon SSD132x OLED Display Controllers + +maintainers: + - Javier Martinez Canillas + +properties: + compatible: + enum: + - solomon,ssd1322 + - solomon,ssd1325 + - solomon,ssd1327 + +required: + - compatible + - reg + +allOf: + - $ref: solomon,ssd-common.yaml# + + - if: + properties: + compatible: + contains: + const: solomon,ssd1322 + then: + properties: + width: + default: 480 + height: + default: 128 + + - if: + properties: + compatible: + contains: + const: solomon,ssd1325 + then: + properties: + width: + default: 128 + height: + default: 80 + + - if: + properties: + compatible: + contains: + const: solomon,ssd1327 + then: + properties: + width: + default: 128 + height: + default: 128 + +unevaluatedProperties: false + +examples: + - | + i2c { + #address-cells = <1>; + #size-cells = <0>; + + oled@3c { + compatible = "solomon,ssd1327"; + reg = <0x3c>; + reset-gpios = <&gpio2 7>; + }; + + }; + - | + spi { + #address-cells = <1>; + #size-cells = <0>; + + oled@0 { + compatible = "solomon,ssd1327"; + reg = <0x0>; + reset-gpios = <&gpio2 7>; + dc-gpios = <&gpio2 8>; + spi-max-frequency = <10000000>; + }; + }; diff --git a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml index f61145c91b..88d0de3d1b 100644 --- a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml +++ b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml @@ -69,6 +69,8 @@ properties: dma-channel-mask: maxItems: 1 + dma-coherent: true + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/eeprom/at24.yaml b/Documentation/devicetree/bindings/eeprom/at24.yaml index 98139489d4..b6864d0ee8 100644 --- a/Documentation/devicetree/bindings/eeprom/at24.yaml +++ b/Documentation/devicetree/bindings/eeprom/at24.yaml @@ -12,6 +12,7 @@ maintainers: allOf: - $ref: /schemas/nvmem/nvmem.yaml + - $ref: /schemas/nvmem/nvmem-deprecated-cells.yaml select: properties: @@ -67,10 +68,14 @@ properties: pattern: cs16$ - items: pattern: c32$ + - items: + pattern: c32d-wl$ - items: pattern: cs32$ - items: pattern: c64$ + - items: + pattern: c64d-wl$ - items: pattern: cs64$ - items: diff --git a/Documentation/devicetree/bindings/firmware/arm,scmi.yaml b/Documentation/devicetree/bindings/firmware/arm,scmi.yaml index b138f3d23d..4591523b51 100644 --- a/Documentation/devicetree/bindings/firmware/arm,scmi.yaml +++ b/Documentation/devicetree/bindings/firmware/arm,scmi.yaml @@ -38,6 +38,9 @@ properties: with shmem address(4KB-page, offset) as parameters items: - const: arm,scmi-smc-param + - description: SCMI compliant firmware with Qualcomm SMC/HVC transport + items: + - const: qcom,scmi-smc - description: SCMI compliant firmware with SCMI Virtio transport. The virtio transport only supports a single device. items: @@ -149,8 +152,15 @@ properties: '#clock-cells': const: 1 - required: - - '#clock-cells' + '#power-domain-cells': + const: 1 + + oneOf: + - required: + - '#clock-cells' + + - required: + - '#power-domain-cells' protocol@14: $ref: '#/$defs/protocol-node' @@ -306,6 +316,7 @@ else: enum: - arm,scmi-smc - arm,scmi-smc-param + - qcom,scmi-smc then: required: - arm,smc-id diff --git a/Documentation/devicetree/bindings/firmware/qcom,scm.yaml b/Documentation/devicetree/bindings/firmware/qcom,scm.yaml index 4233ea839b..0613a37a85 100644 --- a/Documentation/devicetree/bindings/firmware/qcom,scm.yaml +++ b/Documentation/devicetree/bindings/firmware/qcom,scm.yaml @@ -24,6 +24,7 @@ properties: - qcom,scm-apq8064 - qcom,scm-apq8084 - qcom,scm-ipq4019 + - qcom,scm-ipq5018 - qcom,scm-ipq5332 - qcom,scm-ipq6018 - qcom,scm-ipq806x @@ -56,6 +57,7 @@ properties: - qcom,scm-sm6125 - qcom,scm-sm6350 - qcom,scm-sm6375 + - qcom,scm-sm7150 - qcom,scm-sm8150 - qcom,scm-sm8250 - qcom,scm-sm8350 @@ -89,6 +91,14 @@ properties: protocol to handle sleeping SCM calls. maxItems: 1 + qcom,sdi-enabled: + description: + Indicates that the SDI (Secure Debug Image) has been enabled by TZ + by default and it needs to be disabled. + If not disabled WDT assertion or reboot will cause the board to hang + in the debug mode. + type: boolean + qcom,dload-mode: $ref: /schemas/types.yaml#/definitions/phandle-array items: diff --git a/Documentation/devicetree/bindings/gpio/fsl-imx-gpio.yaml b/Documentation/devicetree/bindings/gpio/fsl-imx-gpio.yaml index d0ca2af89f..918776d16e 100644 --- a/Documentation/devicetree/bindings/gpio/fsl-imx-gpio.yaml +++ b/Documentation/devicetree/bindings/gpio/fsl-imx-gpio.yaml @@ -18,9 +18,17 @@ properties: - fsl,imx31-gpio - fsl,imx35-gpio - fsl,imx7d-gpio + - items: + - enum: + - fsl,imx27-gpio + - const: fsl,imx21-gpio - items: - const: fsl,imx35-gpio - const: fsl,imx31-gpio + - items: + - enum: + - fsl,imx25-gpio + - const: fsl,imx35-gpio - items: - enum: - fsl,imx50-gpio diff --git a/Documentation/devicetree/bindings/gpio/gpio-vf610.yaml b/Documentation/devicetree/bindings/gpio/gpio-vf610.yaml index 7c2d152e86..a27f929502 100644 --- a/Documentation/devicetree/bindings/gpio/gpio-vf610.yaml +++ b/Documentation/devicetree/bindings/gpio/gpio-vf610.yaml @@ -20,6 +20,7 @@ description: | properties: compatible: oneOf: + - const: fsl,imx8ulp-gpio - const: fsl,vf610-gpio - items: - const: fsl,imx7ulp-gpio @@ -27,16 +28,18 @@ properties: - items: - enum: - fsl,imx93-gpio - - fsl,imx8ulp-gpio - - const: fsl,imx7ulp-gpio + - fsl,imx95-gpio + - const: fsl,imx8ulp-gpio reg: - description: The first reg tuple represents the PORT module, the second tuple - represents the GPIO module. + minItems: 1 maxItems: 2 interrupts: - maxItems: 1 + items: + - description: GPIO Trustzone non-secure interrupt number + - description: GPIO Trustzone secure interrupt number + minItems: 1 interrupt-controller: true @@ -59,7 +62,8 @@ properties: - const: port gpio-ranges: - maxItems: 1 + minItems: 1 + maxItems: 4 patternProperties: "^.+-hog(-[0-9]+)?$": @@ -77,6 +81,30 @@ required: - "#gpio-cells" - gpio-controller +allOf: + - if: + properties: + compatible: + contains: + enum: + - fsl,vf610-gpio + - fsl,imx7ulp-gpio + then: + properties: + interrupts: + maxItems: 1 + reg: + items: + - description: PORT register base address + - description: GPIO register base address + else: + properties: + interrupts: + minItems: 2 + reg: + items: + - description: GPIO register base address + additionalProperties: false examples: diff --git a/Documentation/devicetree/bindings/gpio/intel,ixp4xx-gpio.txt b/Documentation/devicetree/bindings/gpio/intel,ixp4xx-gpio.txt deleted file mode 100644 index 8dc41ed996..0000000000 --- a/Documentation/devicetree/bindings/gpio/intel,ixp4xx-gpio.txt +++ /dev/null @@ -1,38 +0,0 @@ -Intel IXP4xx XScale Networking Processors GPIO - -This GPIO controller is found in the Intel IXP4xx processors. -It supports 16 GPIO lines. - -The interrupt portions of the GPIO controller is hierarchical: -the synchronous edge detector is part of the GPIO block, but the -actual enabling/disabling of the interrupt line is done in the -main IXP4xx interrupt controller which has a 1:1 mapping for -the first 12 GPIO lines to 12 system interrupts. - -The remaining 4 GPIO lines can not be used for receiving -interrupts. - -The interrupt parent of this GPIO controller must be the -IXP4xx interrupt controller. - -Required properties: - -- compatible : Should be - "intel,ixp4xx-gpio" -- reg : Should contain registers location and length -- gpio-controller : marks this as a GPIO controller -- #gpio-cells : Should be 2, see gpio/gpio.txt -- interrupt-controller : marks this as an interrupt controller -- #interrupt-cells : a standard two-cell interrupt, see - interrupt-controller/interrupts.txt - -Example: - -gpio0: gpio@c8004000 { - compatible = "intel,ixp4xx-gpio"; - reg = <0xc8004000 0x1000>; - gpio-controller; - #gpio-cells = <2>; - interrupt-controller; - #interrupt-cells = <2>; -}; diff --git a/Documentation/devicetree/bindings/gpio/intel,ixp4xx-gpio.yaml b/Documentation/devicetree/bindings/gpio/intel,ixp4xx-gpio.yaml new file mode 100644 index 0000000000..bfcb1f364c --- /dev/null +++ b/Documentation/devicetree/bindings/gpio/intel,ixp4xx-gpio.yaml @@ -0,0 +1,73 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/gpio/intel,ixp4xx-gpio.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Intel IXP4xx XScale Networking Processors GPIO Controller + +description: | + This GPIO controller is found in the Intel IXP4xx + processors. It supports 16 GPIO lines. + The interrupt portions of the GPIO controller is hierarchical. + The synchronous edge detector is part of the GPIO block, but the + actual enabling/disabling of the interrupt line is done in the + main IXP4xx interrupt controller which has a 1-to-1 mapping for + the first 12 GPIO lines to 12 system interrupts. + The remaining 4 GPIO lines can not be used for receiving + interrupts. + The interrupt parent of this GPIO controller must be the + IXP4xx interrupt controller. + GPIO 14 and 15 can be used as clock outputs rather than GPIO, + and this can be enabled by a special flag. + +maintainers: + - Linus Walleij + +properties: + compatible: + const: intel,ixp4xx-gpio + + reg: + maxItems: 1 + + gpio-controller: true + + "#gpio-cells": + const: 2 + + interrupt-controller: true + + "#interrupt-cells": + const: 2 + + intel,ixp4xx-gpio14-clkout: + description: If defined, enables clock output on GPIO 14 + instead of GPIO. + type: boolean + + intel,ixp4xx-gpio15-clkout: + description: If defined, enables clock output on GPIO 15 + instead of GPIO. + type: boolean + +required: + - compatible + - reg + - "#gpio-cells" + - interrupt-controller + - "#interrupt-cells" + +additionalProperties: false + +examples: + - | + #include + gpio@c8004000 { + compatible = "intel,ixp4xx-gpio"; + reg = <0xc8004000 0x1000>; + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + }; diff --git a/Documentation/devicetree/bindings/gpio/loongson,ls-gpio.yaml b/Documentation/devicetree/bindings/gpio/loongson,ls-gpio.yaml index fb86e8ce63..cf3b1b270a 100644 --- a/Documentation/devicetree/bindings/gpio/loongson,ls-gpio.yaml +++ b/Documentation/devicetree/bindings/gpio/loongson,ls-gpio.yaml @@ -11,9 +11,22 @@ maintainers: properties: compatible: - enum: - - loongson,ls2k-gpio - - loongson,ls7a-gpio + oneOf: + - enum: + - loongson,ls2k-gpio + - loongson,ls2k0500-gpio0 + - loongson,ls2k0500-gpio1 + - loongson,ls2k2000-gpio0 + - loongson,ls2k2000-gpio1 + - loongson,ls2k2000-gpio2 + - loongson,ls3a5000-gpio + - loongson,ls7a-gpio + - items: + - const: loongson,ls2k1000-gpio + - const: loongson,ls2k-gpio + - items: + - const: loongson,ls7a1000-gpio + - const: loongson,ls7a-gpio reg: maxItems: 1 @@ -49,7 +62,7 @@ examples: #include gpio0: gpio@1fe00500 { - compatible = "loongson,ls2k-gpio"; + compatible = "loongson,ls2k1000-gpio", "loongson,ls2k-gpio"; reg = <0x1fe00500 0x38>; ngpios = <64>; #gpio-cells = <2>; diff --git a/Documentation/devicetree/bindings/hwmon/adi,ltc2991.yaml b/Documentation/devicetree/bindings/hwmon/adi,ltc2991.yaml new file mode 100644 index 0000000000..011e5b65c7 --- /dev/null +++ b/Documentation/devicetree/bindings/hwmon/adi,ltc2991.yaml @@ -0,0 +1,128 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- + +$id: http://devicetree.org/schemas/hwmon/adi,ltc2991.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Analog Devices LTC2991 Octal I2C Voltage, Current and Temperature Monitor + +maintainers: + - Antoniu Miclaus + +description: | + The LTC2991 is used to monitor system temperatures, voltages and currents. + Through the I2C serial interface, the eight monitors can individually measure + supply voltages and can be paired for differential measurements of current + sense resistors or temperature sensing transistors. + + Datasheet: + https://www.analog.com/en/products/ltc2991.html + +properties: + compatible: + const: adi,ltc2991 + + reg: + maxItems: 1 + + '#address-cells': + const: 1 + + '#size-cells': + const: 0 + + vcc-supply: true + +patternProperties: + "^channel@[0-3]$": + type: object + description: + Represents the differential/temperature channels. + + properties: + reg: + description: + The channel number. LTC2991 can monitor 4 currents/temperatures. + items: + minimum: 0 + maximum: 3 + + shunt-resistor-micro-ohms: + description: + The value of curent sense resistor in micro ohms. Pin configuration is + set for differential input pair. + + adi,temperature-enable: + description: + Enables temperature readings. Pin configuration is set for remote + diode temperature measurement. + type: boolean + + required: + - reg + + allOf: + - if: + required: + - shunt-resistor-micro-ohms + then: + properties: + adi,temperature-enable: false + + additionalProperties: false + +required: + - compatible + - reg + - vcc-supply + +additionalProperties: false + +examples: + - | + i2c { + #address-cells = <1>; + #size-cells = <0>; + + hwmon@48 { + compatible = "adi,ltc2991"; + reg = <0x48>; + vcc-supply = <&vcc>; + }; + }; + - | + i2c { + #address-cells = <1>; + #size-cells = <0>; + + hwmon@48 { + #address-cells = <1>; + #size-cells = <0>; + + compatible = "adi,ltc2991"; + reg = <0x48>; + vcc-supply = <&vcc>; + + channel@0 { + reg = <0x0>; + shunt-resistor-micro-ohms = <100000>; + }; + + channel@1 { + reg = <0x1>; + shunt-resistor-micro-ohms = <100000>; + }; + + channel@2 { + reg = <0x2>; + adi,temperature-enable; + }; + + channel@3 { + reg = <0x3>; + adi,temperature-enable; + }; + }; + }; +... diff --git a/Documentation/devicetree/bindings/hwmon/adi,max31827.yaml b/Documentation/devicetree/bindings/hwmon/adi,max31827.yaml index 2dc8b07b4d..f60e06ab7d 100644 --- a/Documentation/devicetree/bindings/hwmon/adi,max31827.yaml +++ b/Documentation/devicetree/bindings/hwmon/adi,max31827.yaml @@ -32,6 +32,68 @@ properties: Must have values in the interval (1.6V; 3.6V) in order for the device to function correctly. + adi,comp-int: + description: + If present interrupt mode is used. If not present comparator mode is used + (default). + type: boolean + + adi,alarm-pol: + description: + Sets the alarms active state. + - 0 = active low + - 1 = active high + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1] + + adi,fault-q: + description: + Select how many consecutive temperature faults must occur before + overtemperature or undertemperature faults are indicated in the + corresponding status bits. + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [1, 2, 4, 8] + + adi,timeout-enable: + description: + Enables timeout. Bus timeout resets the I2C-compatible interface when SCL + is low for more than 30ms (nominal). + type: boolean + +allOf: + - if: + properties: + compatible: + contains: + const: adi,max31829 + + then: + properties: + adi,alarm-pol: + default: 1 + + else: + properties: + adi,alarm-pol: + default: 0 + + - if: + properties: + compatible: + contains: + const: adi,max31827 + + then: + properties: + adi,fault-q: + default: 1 + + else: + properties: + adi,fault-q: + default: 4 + + required: - compatible - reg @@ -49,6 +111,10 @@ examples: compatible = "adi,max31827"; reg = <0x42>; vref-supply = <®_vdd>; + adi,comp-int; + adi,alarm-pol = <0>; + adi,fault-q = <1>; + adi,timeout-enable; }; }; ... diff --git a/Documentation/devicetree/bindings/hwmon/ina3221.txt b/Documentation/devicetree/bindings/hwmon/ina3221.txt deleted file mode 100644 index fa63b61714..0000000000 --- a/Documentation/devicetree/bindings/hwmon/ina3221.txt +++ /dev/null @@ -1,54 +0,0 @@ -Texas Instruments INA3221 Device Tree Bindings - -1) ina3221 node - Required properties: - - compatible: Must be "ti,ina3221" - - reg: I2C address - - Optional properties: - - ti,single-shot: This chip has two power modes: single-shot (chip takes one - measurement and then shuts itself down) and continuous ( - chip takes continuous measurements). The continuous mode is - more reliable and suitable for hardware monitor type device, - but the single-shot mode is more power-friendly and useful - for battery-powered device which cares power consumptions - while still needs some measurements occasionally. - If this property is present, the single-shot mode will be - used, instead of the default continuous one for monitoring. - - = The node contains optional child nodes for three channels = - = Each child node describes the information of input source = - - - #address-cells: Required only if a child node is present. Must be 1. - - #size-cells: Required only if a child node is present. Must be 0. - -2) child nodes - Required properties: - - reg: Must be 0, 1 or 2, corresponding to IN1, IN2 or IN3 port of INA3221 - - Optional properties: - - label: Name of the input source - - shunt-resistor-micro-ohms: Shunt resistor value in micro-Ohm - -Example: - -ina3221@40 { - compatible = "ti,ina3221"; - reg = <0x40>; - #address-cells = <1>; - #size-cells = <0>; - - input@0 { - reg = <0x0>; - status = "disabled"; - }; - input@1 { - reg = <0x1>; - shunt-resistor-micro-ohms = <5000>; - }; - input@2 { - reg = <0x2>; - label = "VDD_5V"; - shunt-resistor-micro-ohms = <5000>; - }; -}; diff --git a/Documentation/devicetree/bindings/hwmon/npcm750-pwm-fan.txt b/Documentation/devicetree/bindings/hwmon/npcm750-pwm-fan.txt index 8523777f56..18095ba87a 100644 --- a/Documentation/devicetree/bindings/hwmon/npcm750-pwm-fan.txt +++ b/Documentation/devicetree/bindings/hwmon/npcm750-pwm-fan.txt @@ -1,12 +1,16 @@ -Nuvoton NPCM7xx PWM and Fan Tacho controller device +Nuvoton NPCM PWM and Fan Tacho controller device The Nuvoton BMC NPCM7XX supports 8 Pulse-width modulation (PWM) controller outputs and 16 Fan tachometer controller inputs. +The Nuvoton BMC NPCM8XX supports 12 Pulse-width modulation (PWM) +controller outputs and 16 Fan tachometer controller inputs. + Required properties for pwm-fan node - #address-cells : should be 1. - #size-cells : should be 0. - compatible : "nuvoton,npcm750-pwm-fan" for Poleg NPCM7XX. + : "nuvoton,npcm845-pwm-fan" for Arbel NPCM8XX. - reg : specifies physical base address and size of the registers. - reg-names : must contain: * "pwm" for the PWM registers. diff --git a/Documentation/devicetree/bindings/hwmon/pmbus/infineon,tda38640.yaml b/Documentation/devicetree/bindings/hwmon/pmbus/infineon,tda38640.yaml new file mode 100644 index 0000000000..ded1c11576 --- /dev/null +++ b/Documentation/devicetree/bindings/hwmon/pmbus/infineon,tda38640.yaml @@ -0,0 +1,49 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- + +$id: http://devicetree.org/schemas/hwmon/pmbus/infineon,tda38640.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Infineon TDA38640 Synchronous Buck Regulator with SVID and I2C + +maintainers: + - Naresh Solanki + +description: | + The Infineon TDA38640 is a 40A Single-voltage Synchronous Buck + Regulator with SVID and I2C designed for Industrial use. + + Datasheet: https://www.infineon.com/dgdl/Infineon-TDA38640-0000-DataSheet-v02_04-EN.pdf?fileId=8ac78c8c80027ecd018042f2337f00c9 + +properties: + compatible: + enum: + - infineon,tda38640 + + reg: + maxItems: 1 + + infineon,en-pin-fixed-level: + description: + Indicates that the chip EN pin is at fixed level or left + unconnected(has internal pull-down). + type: boolean + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + i2c { + #address-cells = <1>; + #size-cells = <0>; + + tda38640@40 { + compatible = "infineon,tda38640"; + reg = <0x40>; + }; + }; diff --git a/Documentation/devicetree/bindings/hwmon/ti,ina2xx.yaml b/Documentation/devicetree/bindings/hwmon/ti,ina2xx.yaml index 8648877d2d..378d1f6aee 100644 --- a/Documentation/devicetree/bindings/hwmon/ti,ina2xx.yaml +++ b/Documentation/devicetree/bindings/hwmon/ti,ina2xx.yaml @@ -26,6 +26,7 @@ properties: - ti,ina226 - ti,ina230 - ti,ina231 + - ti,ina237 - ti,ina238 reg: diff --git a/Documentation/devicetree/bindings/hwmon/ti,ina3221.yaml b/Documentation/devicetree/bindings/hwmon/ti,ina3221.yaml new file mode 100644 index 0000000000..5f10f1207d --- /dev/null +++ b/Documentation/devicetree/bindings/hwmon/ti,ina3221.yaml @@ -0,0 +1,121 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/hwmon/ti,ina3221.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Texas Instruments INA3221 Current and Voltage Monitor + +maintainers: + - Jean Delvare + - Guenter Roeck + +properties: + compatible: + const: ti,ina3221 + + reg: + maxItems: 1 + + ti,single-shot: + description: | + This chip has two power modes: single-shot (chip takes one measurement + and then shuts itself down) and continuous (chip takes continuous + measurements). The continuous mode is more reliable and suitable for + hardware monitor type device, but the single-shot mode is more power- + friendly and useful for battery-powered device which cares power + consumptions while still needs some measurements occasionally. + + If this property is present, the single-shot mode will be used, instead + of the default continuous one for monitoring. + $ref: /schemas/types.yaml#/definitions/flag + + "#address-cells": + description: Required only if a child node is present. + const: 1 + + "#size-cells": + description: Required only if a child node is present. + const: 0 + +patternProperties: + "^input@[0-2]$": + description: The node contains optional child nodes for three channels. + Each child node describes the information of input source. Input channels + default to enabled in the chip. Unless channels are explicitly disabled + in device-tree, input channels will be enabled. + type: object + additionalProperties: false + properties: + reg: + description: Must be 0, 1 and 2, corresponding to the IN1, IN2 or IN3 + ports of the INA3221, respectively. + enum: [ 0, 1, 2 ] + + label: + description: name of the input source + + shunt-resistor-micro-ohms: + description: shunt resistor value in micro-Ohm + + ti,summation-disable: + description: | + The INA3221 has a critical alert pin that can be controlled by the + summation control function. This function adds the single + shunt-voltage conversions for the desired channels in order to + compare the combined sum to the programmed limit. The Shunt-Voltage + Sum Limit register contains the programmed value that is compared + to the value in the Shunt-Voltage Sum register in order to + determine if the total summed limit is exceeded. If the + shunt-voltage sum limit value is exceeded, the critical alert pin + is asserted. + + For the summation limit to have a meaningful value, it is necessary + to use the same shunt-resistor value on all enabled channels. If + this is not the case or if a channel should not be used for + triggering the critical alert pin, then this property can be used + exclude specific channels from the summation control function. + type: boolean + + required: + - reg + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + i2c { + #address-cells = <1>; + #size-cells = <0>; + + power-sensor@40 { + compatible = "ti,ina3221"; + reg = <0x40>; + #address-cells = <1>; + #size-cells = <0>; + + input@0 { + reg = <0x0>; + /* + * Input channels are enabled by default in the device and so + * to disable, must be explicitly disabled in device-tree. + */ + status = "disabled"; + }; + + input@1 { + reg = <0x1>; + shunt-resistor-micro-ohms = <5000>; + }; + + input@2 { + reg = <0x2>; + label = "VDD_5V"; + shunt-resistor-micro-ohms = <5000>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/i2c/i2c-demux-pinctrl.txt b/Documentation/devicetree/bindings/i2c/i2c-demux-pinctrl.txt deleted file mode 100644 index 86b2e433a9..0000000000 --- a/Documentation/devicetree/bindings/i2c/i2c-demux-pinctrl.txt +++ /dev/null @@ -1,135 +0,0 @@ -Pinctrl-based I2C Bus DeMux - -This binding describes an I2C bus demultiplexer that uses pin multiplexing to -route the I2C signals, and represents the pin multiplexing configuration using -the pinctrl device tree bindings. This may be used to select one I2C IP core at -runtime which may have a better feature set for a given task than another I2C -IP core on the SoC. The most simple example is to fall back to GPIO bitbanging -if your current runtime configuration hits an errata of the internal IP core. - - +-------------------------------+ - | SoC | - | | +-----+ +-----+ - | +------------+ | | dev | | dev | - | |I2C IP Core1|--\ | +-----+ +-----+ - | +------------+ \-------+ | | | - | |Pinctrl|--|------+--------+ - | +------------+ +-------+ | - | |I2C IP Core2|--/ | - | +------------+ | - | | - +-------------------------------+ - -Required properties: -- compatible: "i2c-demux-pinctrl" -- i2c-parent: List of phandles of I2C masters available for selection. The first - one will be used as default. -- i2c-bus-name: The name of this bus. Also needed as pinctrl-name for the I2C - parents. - -Furthermore, I2C mux properties and child nodes. See i2c-mux.yaml in this -directory. - -Example: - -Here is a snipplet for a bus to be demuxed. It contains various i2c clients for -HDMI, so the bus is named "i2c-hdmi": - - i2chdmi: i2c@8 { - - compatible = "i2c-demux-pinctrl"; - i2c-parent = <&gpioi2c>, <&iic2>, <&i2c2>; - i2c-bus-name = "i2c-hdmi"; - #address-cells = <1>; - #size-cells = <0>; - - ak4643: sound-codec@12 { - compatible = "asahi-kasei,ak4643"; - - #sound-dai-cells = <0>; - reg = <0x12>; - }; - - composite-in@20 { - compatible = "adi,adv7180"; - reg = <0x20>; - remote = <&vin1>; - - port { - adv7180: endpoint { - bus-width = <8>; - remote-endpoint = <&vin1ep0>; - }; - }; - }; - - hdmi@39 { - compatible = "adi,adv7511w"; - reg = <0x39>; - interrupt-parent = <&gpio1>; - interrupts = <15 IRQ_TYPE_LEVEL_LOW>; - - adi,input-depth = <8>; - adi,input-colorspace = "rgb"; - adi,input-clock = "1x"; - adi,input-style = <1>; - adi,input-justification = "evenly"; - - ports { - #address-cells = <1>; - #size-cells = <0>; - - port@0 { - reg = <0>; - adv7511_in: endpoint { - remote-endpoint = <&du_out_lvds0>; - }; - }; - - port@1 { - reg = <1>; - adv7511_out: endpoint { - remote-endpoint = <&hdmi_con>; - }; - }; - }; - }; - }; - -And for clarification, here are the snipplets for the i2c-parents: - - gpioi2c: i2c@9 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "i2c-gpio"; - gpios = <&gpio5 6 GPIO_ACTIVE_HIGH /* sda */ - &gpio5 5 GPIO_ACTIVE_HIGH /* scl */ - >; - i2c-gpio,delay-us = <5>; - }; - -... - -&i2c2 { - pinctrl-0 = <&i2c2_pins>; - pinctrl-names = "i2c-hdmi"; - - clock-frequency = <100000>; -}; - -... - -&iic2 { - pinctrl-0 = <&iic2_pins>; - pinctrl-names = "i2c-hdmi"; - - clock-frequency = <100000>; -}; - -Please note: - -- pinctrl properties for the parent I2C controllers need a pinctrl state - with the same name as i2c-bus-name, not "default"! - -- the i2c masters must have their status "disabled". This driver will - enable them at runtime when needed. diff --git a/Documentation/devicetree/bindings/i2c/i2c-demux-pinctrl.yaml b/Documentation/devicetree/bindings/i2c/i2c-demux-pinctrl.yaml new file mode 100644 index 0000000000..2c08f2a7cf --- /dev/null +++ b/Documentation/devicetree/bindings/i2c/i2c-demux-pinctrl.yaml @@ -0,0 +1,172 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/i2c/i2c-demux-pinctrl.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Pinctrl-based I2C Bus Demultiplexer + +maintainers: + - Wolfram Sang + +description: | + This binding describes an I2C bus demultiplexer that uses pin multiplexing to + route the I2C signals, and represents the pin multiplexing configuration + using the pinctrl device tree bindings. This may be used to select one I2C + IP core at runtime which may have a better feature set for a given task than + another I2C IP core on the SoC. The most simple example is to fall back to + GPIO bitbanging if your current runtime configuration hits an errata of the + internal IP core. + + +-------------------------------+ + | SoC | + | | +-----+ +-----+ + | +------------+ | | dev | | dev | + | |I2C IP Core1|--\ | +-----+ +-----+ + | +------------+ \-------+ | | | + | |Pinctrl|--|------+--------+ + | +------------+ +-------+ | + | |I2C IP Core2|--/ | + | +------------+ | + | | + +-------------------------------+ + +allOf: + - $ref: i2c-mux.yaml + - $ref: /schemas/i2c/i2c-controller.yaml# + +properties: + compatible: + const: i2c-demux-pinctrl + + i2c-parent: + $ref: /schemas/types.yaml#/definitions/phandle-array + description: + List of phandles of I2C masters available for selection. The first one + will be used as default. + + i2c-bus-name: + $ref: /schemas/types.yaml#/definitions/string + description: + The name of this bus. Also needed as pinctrl-name for the I2C parents. + +required: + - compatible + - i2c-parent + - i2c-bus-name + +unevaluatedProperties: false + +examples: + - | + #include + #include + + gpioi2c2: i2c-9 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "i2c-gpio"; + scl-gpios = <&gpio5 5 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>; + sda-gpios = <&gpio5 6 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>; + i2c-gpio,delay-us = <5>; + + // The I2C controller must have its status "disabled". The I2C bus + // demultiplexer will enable it at runtime when needed. + status = "disabled"; + }; + + iic2: i2c@e6520000 { + reg = <0xe6520000 0x425>; + pinctrl-0 = <&iic2_pins>; + // The pinctrl property for the parent I2C controller needs a pinctrl + // state with the same name as i2c-bus-name in the I2C bus demultiplexer + // node, not "default"! + pinctrl-names = "i2c-hdmi"; + + clock-frequency = <100000>; + + // The I2C controller must have its status "disabled". The I2C bus + // demultiplexer will enable it at runtime when needed. + status = "disabled"; + }; + + i2c2: i2c@e6530000 { + reg = <0 0xe6530000 0 0x40>; + pinctrl-0 = <&i2c2_pins>; + // The pinctrl property for the parent I2C controller needs a pinctrl + // state with the same name as i2c-bus-name in the I2C bus demultiplexer + // node, not "default"! + pinctrl-names = "i2c-hdmi"; + + clock-frequency = <100000>; + + // The I2C controller must have its status "disabled". The I2C bus + // demultiplexer will enable it at runtime when needed. + status = "disabled"; + }; + + // Example for a bus to be demuxed. It contains various I2C clients for + // HDMI, so the bus is named "i2c-hdmi": + i2chdmi: i2c-mux3 { + compatible = "i2c-demux-pinctrl"; + i2c-parent = <&iic2>, <&i2c2>, <&gpioi2c2>; + i2c-bus-name = "i2c-hdmi"; + #address-cells = <1>; + #size-cells = <0>; + + ak4643: codec@12 { + compatible = "asahi-kasei,ak4643"; + #sound-dai-cells = <0>; + reg = <0x12>; + }; + + composite-in@20 { + compatible = "adi,adv7180"; + reg = <0x20>; + + port { + adv7180: endpoint { + bus-width = <8>; + remote-endpoint = <&vin1ep0>; + }; + }; + }; + + hdmi@39 { + compatible = "adi,adv7511w"; + reg = <0x39>; + interrupt-parent = <&gpio1>; + interrupts = <15 IRQ_TYPE_LEVEL_LOW>; + clocks = <&cec_clock>; + clock-names = "cec"; + + avdd-supply = <&fixedregulator1v8>; + dvdd-supply = <&fixedregulator1v8>; + pvdd-supply = <&fixedregulator1v8>; + dvdd-3v-supply = <&fixedregulator3v3>; + bgvdd-supply = <&fixedregulator1v8>; + + adi,input-depth = <8>; + adi,input-colorspace = "rgb"; + adi,input-clock = "1x"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + adv7511_in: endpoint { + remote-endpoint = <&lvds0_out>; + }; + }; + + port@1 { + reg = <1>; + adv7511_out: endpoint { + remote-endpoint = <&hdmi_con_out>; + }; + }; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/i2c/qcom,i2c-cci.yaml b/Documentation/devicetree/bindings/i2c/qcom,i2c-cci.yaml index 042d4dc636..8386cfe215 100644 --- a/Documentation/devicetree/bindings/i2c/qcom,i2c-cci.yaml +++ b/Documentation/devicetree/bindings/i2c/qcom,i2c-cci.yaml @@ -25,6 +25,7 @@ properties: - items: - enum: + - qcom,sc7280-cci - qcom,sdm845-cci - qcom,sm6350-cci - qcom,sm8250-cci @@ -159,6 +160,7 @@ allOf: compatible: contains: enum: + - qcom,sc7280-cci - qcom,sm8250-cci - qcom,sm8450-cci then: diff --git a/Documentation/devicetree/bindings/i3c/i3c.yaml b/Documentation/devicetree/bindings/i3c/i3c.yaml index ab69f4115d..c816e295d5 100644 --- a/Documentation/devicetree/bindings/i3c/i3c.yaml +++ b/Documentation/devicetree/bindings/i3c/i3c.yaml @@ -55,6 +55,12 @@ properties: May not be supported by all controllers. + mctp-controller: + type: boolean + description: | + Indicates that the system is accessible via this bus as an endpoint for + MCTP over I3C transport. + required: - "#address-cells" - "#size-cells" @@ -119,12 +125,12 @@ patternProperties: minimum: 0 maximum: 0x7f - description: | - First half of the Provisional ID (following the PID + First half of the Provisioned ID (following the PID definition provided by the I3C specification). Contains the manufacturer ID left-shifted by 1. - description: | - Second half of the Provisional ID (following the PID + Second half of the Provisioned ID (following the PID definition provided by the I3C specification). Contains the ORing of the part ID left-shifted by 16, diff --git a/Documentation/devicetree/bindings/iio/accel/kionix,kx022a.yaml b/Documentation/devicetree/bindings/iio/accel/kionix,kx022a.yaml index 986df1a6ff..66ea894dbe 100644 --- a/Documentation/devicetree/bindings/iio/accel/kionix,kx022a.yaml +++ b/Documentation/devicetree/bindings/iio/accel/kionix,kx022a.yaml @@ -4,19 +4,23 @@ $id: http://devicetree.org/schemas/iio/accel/kionix,kx022a.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: ROHM/Kionix KX022A Accelerometer +title: ROHM/Kionix KX022A, KX132-1211 and KX132ACR-LBZ Accelerometers maintainers: - Matti Vaittinen description: | - KX022A is a 3-axis accelerometer supporting +/- 2G, 4G, 8G and 16G ranges, - output data-rates from 0.78Hz to 1600Hz and a hardware-fifo buffering. - KX022A can be accessed either via I2C or SPI. + KX022A, KX132ACR-LBZ and KX132-1211 are 3-axis accelerometers supporting + +/- 2G, 4G, 8G and 16G ranges, variable output data-rates and a + hardware-fifo buffering. These accelerometers can be accessed either + via I2C or SPI. properties: compatible: - const: kionix,kx022a + enum: + - kionix,kx022a + - kionix,kx132-1211 + - rohm,kx132acr-lbz reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/iio/adc/lltc,ltc2497.yaml b/Documentation/devicetree/bindings/iio/adc/lltc,ltc2497.yaml index 875f394576..5cc6a96840 100644 --- a/Documentation/devicetree/bindings/iio/adc/lltc,ltc2497.yaml +++ b/Documentation/devicetree/bindings/iio/adc/lltc,ltc2497.yaml @@ -4,21 +4,31 @@ $id: http://devicetree.org/schemas/iio/adc/lltc,ltc2497.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Linear Technology / Analog Devices LTC2497 ADC +title: Linear Technology / Analog Devices LTC2497 and LTC2309 ADC maintainers: - Michael Hennerich + - Liam Beguin description: | - 16bit ADC supporting up to 16 single ended or 8 differential inputs. - I2C interface. + LTC2309: + low noise, low power, 8-channel, 12-bit successive approximation ADC with an + I2C compatible serial interface. - https://www.analog.com/media/en/technical-documentation/data-sheets/2497fb.pdf - https://www.analog.com/media/en/technical-documentation/data-sheets/2499fe.pdf + https://www.analog.com/media/en/technical-documentation/data-sheets/2309fd.pdf + + LTC2497: + LTC2499: + 16bit ADC supporting up to 16 single ended or 8 differential inputs. + I2C interface. + + https://www.analog.com/media/en/technical-documentation/data-sheets/2497fb.pdf + https://www.analog.com/media/en/technical-documentation/data-sheets/2499fe.pdf properties: compatible: enum: + - lltc,ltc2309 - lltc,ltc2497 - lltc,ltc2499 diff --git a/Documentation/devicetree/bindings/iio/adc/microchip,mcp3564.yaml b/Documentation/devicetree/bindings/iio/adc/microchip,mcp3564.yaml new file mode 100644 index 0000000000..6753192761 --- /dev/null +++ b/Documentation/devicetree/bindings/iio/adc/microchip,mcp3564.yaml @@ -0,0 +1,205 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/iio/adc/microchip,mcp3564.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Microchip MCP346X and MCP356X ADC Family + +maintainers: + - Marius Cristea + +description: | + Bindings for the Microchip family of 153.6 ksps, Low-Noise 16/24-Bit + Delta-Sigma ADCs with an SPI interface. Datasheet can be found here: + Datasheet for MCP3561, MCP3562, MCP3564 can be found here: + https://ww1.microchip.com/downloads/aemDocuments/documents/MSLD/ProductDocuments/DataSheets/MCP3561-2-4-Family-Data-Sheet-DS20006181C.pdf + Datasheet for MCP3561R, MCP3562R, MCP3564R can be found here: + https://ww1.microchip.com/downloads/aemDocuments/documents/APID/ProductDocuments/DataSheets/MCP3561_2_4R-Data-Sheet-DS200006391C.pdf + Datasheet for MCP3461, MCP3462, MCP3464 can be found here: + https://ww1.microchip.com/downloads/aemDocuments/documents/APID/ProductDocuments/DataSheets/MCP3461-2-4-Two-Four-Eight-Channel-153.6-ksps-Low-Noise-16-Bit-Delta-Sigma-ADC-Data-Sheet-20006180D.pdf + Datasheet for MCP3461R, MCP3462R, MCP3464R can be found here: + https://ww1.microchip.com/downloads/aemDocuments/documents/APID/ProductDocuments/DataSheets/MCP3461-2-4R-Family-Data-Sheet-DS20006404C.pdf + +properties: + compatible: + enum: + - microchip,mcp3461 + - microchip,mcp3462 + - microchip,mcp3464 + - microchip,mcp3461r + - microchip,mcp3462r + - microchip,mcp3464r + - microchip,mcp3561 + - microchip,mcp3562 + - microchip,mcp3564 + - microchip,mcp3561r + - microchip,mcp3562r + - microchip,mcp3564r + + reg: + maxItems: 1 + + spi-max-frequency: + maximum: 20000000 + + spi-cpha: true + + spi-cpol: true + + vdd-supply: true + + avdd-supply: true + + clocks: + description: + Phandle and clock identifier for external sampling clock. + If not specified, the internal crystal oscillator will be used. + maxItems: 1 + + interrupts: + description: IRQ line of the ADC + maxItems: 1 + + drive-open-drain: + description: + Whether to drive the IRQ signal as push-pull (default) or open-drain. Note + that the device requires this pin to become "high", otherwise it will stop + converting. + type: boolean + + vref-supply: + description: + Some devices have a specific reference voltage supplied on a different + pin to the other supplies. Needed to be able to establish channel scaling + unless there is also an internal reference available (e.g. mcp3564r). In + case of "r" devices (e. g. mcp3564r), if it does not exists the internal + reference will be used. + + microchip,hw-device-address: + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 0 + maximum: 3 + description: + The address is set on a per-device basis by fuses in the factory, + configured on request. If not requested, the fuses are set for 0x1. + The device address is part of the device markings to avoid + potential confusion. This address is coded on two bits, so four possible + addresses are available when multiple devices are present on the same + SPI bus with only one Chip Select line for all devices. + Each device communication starts by a CS falling edge, followed by the + clocking of the device address (BITS[7:6] - top two bits of COMMAND BYTE + which is first one on the wire). + + "#io-channel-cells": + const: 1 + + "#address-cells": + const: 1 + + "#size-cells": + const: 0 + +patternProperties: + "^channel@([0-9]|([1-7][0-9]))$": + $ref: adc.yaml + type: object + unevaluatedProperties: false + description: Represents the external channels which are connected to the ADC. + + properties: + reg: + description: The channel number in single-ended and differential mode. + minimum: 0 + maximum: 79 + + required: + - reg + +dependencies: + spi-cpol: [ spi-cpha ] + spi-cpha: [ spi-cpol ] + +required: + - compatible + - reg + - microchip,hw-device-address + - spi-max-frequency + +allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml# + - # External vref, no internal reference + if: + properties: + compatible: + contains: + enum: + - microchip,mcp3461 + - microchip,mcp3462 + - microchip,mcp3464 + - microchip,mcp3561 + - microchip,mcp3562 + - microchip,mcp3564 + then: + required: + - vref-supply + +unevaluatedProperties: false + +examples: + - | + spi { + #address-cells = <1>; + #size-cells = <0>; + + adc@0 { + compatible = "microchip,mcp3564r"; + reg = <0>; + vref-supply = <&vref_reg>; + spi-cpha; + spi-cpol; + spi-max-frequency = <10000000>; + microchip,hw-device-address = <1>; + + #address-cells = <1>; + #size-cells = <0>; + + channel@0 { + /* CH0 to AGND */ + reg = <0>; + label = "CH0"; + }; + + channel@1 { + /* CH1 to AGND */ + reg = <1>; + label = "CH1"; + }; + + /* diff-channels */ + channel@11 { + reg = <11>; + + /* CN0, CN1 */ + diff-channels = <0 1>; + label = "CH0_CH1"; + }; + + channel@22 { + reg = <0x22>; + + /* CN1, CN2 */ + diff-channels = <1 2>; + label = "CH1_CH3"; + }; + + channel@23 { + reg = <0x23>; + + /* CN1, CN3 */ + diff-channels = <1 3>; + label = "CH1_CH3"; + }; + }; + }; +... diff --git a/Documentation/devicetree/bindings/iio/adc/microchip,mcp3911.yaml b/Documentation/devicetree/bindings/iio/adc/microchip,mcp3911.yaml index f7b3fde411..06951ec5f5 100644 --- a/Documentation/devicetree/bindings/iio/adc/microchip,mcp3911.yaml +++ b/Documentation/devicetree/bindings/iio/adc/microchip,mcp3911.yaml @@ -18,7 +18,13 @@ description: | properties: compatible: enum: + - microchip,mcp3910 - microchip,mcp3911 + - microchip,mcp3912 + - microchip,mcp3913 + - microchip,mcp3914 + - microchip,mcp3918 + - microchip,mcp3919 reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/iio/adc/ti,ads1015.yaml b/Documentation/devicetree/bindings/iio/adc/ti,ads1015.yaml index e004659099..d605999ffe 100644 --- a/Documentation/devicetree/bindings/iio/adc/ti,ads1015.yaml +++ b/Documentation/devicetree/bindings/iio/adc/ti,ads1015.yaml @@ -23,6 +23,9 @@ properties: reg: maxItems: 1 + interrupts: + maxItems: 1 + "#address-cells": const: 1 diff --git a/Documentation/devicetree/bindings/iio/adc/ti,twl6030-gpadc.yaml b/Documentation/devicetree/bindings/iio/adc/ti,twl6030-gpadc.yaml new file mode 100644 index 0000000000..e779a8986e --- /dev/null +++ b/Documentation/devicetree/bindings/iio/adc/ti,twl6030-gpadc.yaml @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/iio/adc/ti,twl6030-gpadc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: GPADC subsystem in the TWL6030 power module + +maintainers: + - Andreas Kemnade + +description: + The GPADC subsystem in the TWL603X consists of a 10-bit ADC + combined with a 15-input analog multiplexer in the TWL6030 resp. a + 19-input analog muliplexer in the TWL6032. + +properties: + compatible: + enum: + - ti,twl6030-gpadc + - ti,twl6032-gpadc + + interrupts: + maxItems: 1 + + "#io-channel-cells": + const: 1 + +required: + - compatible + - interrupts + - "#io-channel-cells" + +additionalProperties: false + +examples: + - | + gpadc { + compatible = "ti,twl6030-gpadc"; + interrupts = <3>; + #io-channel-cells = <1>; + }; +... diff --git a/Documentation/devicetree/bindings/iio/amplifiers/adi,hmc425a.yaml b/Documentation/devicetree/bindings/iio/amplifiers/adi,hmc425a.yaml index 9fda56fa49..2ee6080dea 100644 --- a/Documentation/devicetree/bindings/iio/amplifiers/adi,hmc425a.yaml +++ b/Documentation/devicetree/bindings/iio/amplifiers/adi,hmc425a.yaml @@ -4,20 +4,26 @@ $id: http://devicetree.org/schemas/iio/amplifiers/adi,hmc425a.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: HMC425A 6-bit Digital Step Attenuator +title: Analog Devices HMC425A and similar Digital Step Attenuators maintainers: - Michael Hennerich description: | - Digital Step Attenuator IIO device with gpio interface. + Digital Step Attenuator IIO devices with gpio interface. + Offer various frequency and attenuation ranges. HMC425A 0.5 dB LSB GaAs MMIC 6-BIT DIGITAL POSITIVE CONTROL ATTENUATOR, 2.2 - 8.0 GHz - https://www.analog.com/media/en/technical-documentation/data-sheets/hmc425A.pdf + https://www.analog.com/media/en/technical-documentation/data-sheets/hmc425A.pdf + + HMC540S 1 dB LSB Silicon MMIC 4-Bit Digital Positive Control Attenuator, 0.1 - 8 GHz + https://www.analog.com/media/en/technical-documentation/data-sheets/hmc540s.pdf + properties: compatible: enum: - adi,hmc425a + - adi,hmc540s vcc-supply: true diff --git a/Documentation/devicetree/bindings/iio/imu/invensense,mpu6050.yaml b/Documentation/devicetree/bindings/iio/imu/invensense,mpu6050.yaml index 1db6952ddc..297b8a1a7f 100644 --- a/Documentation/devicetree/bindings/iio/imu/invensense,mpu6050.yaml +++ b/Documentation/devicetree/bindings/iio/imu/invensense,mpu6050.yaml @@ -48,6 +48,11 @@ properties: mount-matrix: true + invensense,level-shifter: + type: boolean + description: | + From ancient platform data struct: false: VLogic, true: VDD + i2c-gate: $ref: /schemas/i2c/i2c-controller.yaml unevaluatedProperties: false diff --git a/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml b/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml index ee8724ad33..28b667a9cb 100644 --- a/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml +++ b/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml @@ -93,6 +93,9 @@ properties: wakeup-source: $ref: /schemas/types.yaml#/definitions/flag + mount-matrix: + description: an optional 3x3 mounting rotation matrix + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/iio/pressure/rohm,bm1390.yaml b/Documentation/devicetree/bindings/iio/pressure/rohm,bm1390.yaml new file mode 100644 index 0000000000..7c4ca6322b --- /dev/null +++ b/Documentation/devicetree/bindings/iio/pressure/rohm,bm1390.yaml @@ -0,0 +1,52 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/iio/pressure/rohm,bm1390.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: ROHM BM1390 pressure sensor + +maintainers: + - Matti Vaittinen + +description: + BM1390GLV-Z is a pressure sensor which performs internal temperature + compensation for the MEMS. Pressure range is from 300 hPa to 1300 hPa + and sample averaging and IIR filtering is built in. Temperature + measurement is also supported. + +properties: + compatible: + const: rohm,bm1390glv-z + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + vdd-supply: true + +required: + - compatible + - reg + - vdd-supply + +additionalProperties: false + +examples: + - | + #include + i2c { + #address-cells = <1>; + #size-cells = <0>; + pressure-sensor@5d { + compatible = "rohm,bm1390glv-z"; + reg = <0x5d>; + + interrupt-parent = <&gpio1>; + interrupts = <29 IRQ_TYPE_LEVEL_LOW>; + + vdd-supply = <&vdd>; + }; + }; diff --git a/Documentation/devicetree/bindings/iio/resolver/adi,ad2s1210.yaml b/Documentation/devicetree/bindings/iio/resolver/adi,ad2s1210.yaml new file mode 100644 index 0000000000..8980b3cd83 --- /dev/null +++ b/Documentation/devicetree/bindings/iio/resolver/adi,ad2s1210.yaml @@ -0,0 +1,177 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/iio/resolver/adi,ad2s1210.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Analog Devices AD2S1210 Resolver-to-Digital Converter + +maintainers: + - Michael Hennerich + +description: | + The AD2S1210 is a complete 10-bit to 16-bit resolution tracking + resolver-to-digital converter, integrating an on-board programmable + sinusoidal oscillator that provides sine wave excitation for + resolvers. + + The AD2S1210 allows the user to read the angular position or the + angular velocity data directly from the parallel outputs or through + the serial interface. + + The mode of operation of the communication channel (parallel or serial) is + selected by the A0 and A1 input pins. In normal mode, data is latched by + toggling the SAMPLE line and can then be read directly. In configuration mode, + data is read or written using a register access scheme (address byte with + read/write flag and data byte). + + A1 A0 Result + 0 0 Normal mode - position output + 0 1 Normal mode - velocity output + 1 0 Reserved + 1 1 Configuration mode + + In normal mode, the resolution of the digital output is selected using + the RES0 and RES1 input pins. In configuration mode, the resolution is + selected by setting the RES0 and RES1 bits in the control register. + + RES1 RES0 Resolution (Bits) + 0 0 10 + 0 1 12 + 1 0 14 + 1 1 16 + + Note on SPI connections: The CS line on the AD2S1210 should hard-wired to + logic low and the WR/FSYNC line on the AD2S1210 should be connected to the + SPI CSn output of the SPI controller. + + Datasheet: + https://www.analog.com/media/en/technical-documentation/data-sheets/ad2s1210.pdf + +properties: + compatible: + const: adi,ad2s1210 + + reg: + maxItems: 1 + + spi-max-frequency: + maximum: 25000000 + + spi-cpha: true + + avdd-supply: + description: + A 4.75 to 5.25 V regulator that powers the Analog Supply Voltage (AVDD) + pin. + + dvdd-supply: + description: + A 4.75 to 5.25 V regulator that powers the Digital Supply Voltage (DVDD) + pin. + + vdrive-supply: + description: + A 2.3 to 5.25 V regulator that powers the Logic Power Supply Input + (VDrive) pin. + + clocks: + maxItems: 1 + description: External oscillator clock (CLKIN). + + reset-gpios: + description: + GPIO connected to the /RESET pin. As the line needs to be low for the + reset to be active, it should be configured as GPIO_ACTIVE_LOW. + maxItems: 1 + + sample-gpios: + description: + GPIO connected to the /SAMPLE pin. As the line needs to be low to trigger + a sample, it should be configured as GPIO_ACTIVE_LOW. + maxItems: 1 + + mode-gpios: + description: + GPIO lines connected to the A0 and A1 pins. These pins select the data + transfer mode. + minItems: 2 + maxItems: 2 + + resolution-gpios: + description: + GPIO lines connected to the RES0 and RES1 pins. These pins select the + resolution of the digital output. If omitted, it is assumed that the + RES0 and RES1 pins are hard-wired to match the assigned-resolution-bits + property. + minItems: 2 + maxItems: 2 + + fault-gpios: + description: + GPIO lines connected to the LOT and DOS pins. These pins combined indicate + the type of fault present, if any. As these pins a pulled low to indicate + a fault condition, they should be configured as GPIO_ACTIVE_LOW. + minItems: 2 + maxItems: 2 + + adi,fixed-mode: + description: + This is used to indicate the selected mode if A0 and A1 are hard-wired + instead of connected to GPIOS (i.e. mode-gpios is omitted). + $ref: /schemas/types.yaml#/definitions/string + enum: [config, velocity, position] + + assigned-resolution-bits: + description: + Resolution of the digital output required by the application. This + determines the precision of the angle and/or the maximum speed that can + be measured. If resolution-gpios is omitted, it is assumed that RES0 and + RES1 are hard-wired to match this value. + enum: [10, 12, 14, 16] + +required: + - compatible + - reg + - spi-cpha + - avdd-supply + - dvdd-supply + - vdrive-supply + - clocks + - sample-gpios + - assigned-resolution-bits + +oneOf: + - required: + - mode-gpios + - required: + - adi,fixed-mode + +allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml# + +unevaluatedProperties: false + +examples: + - | + #include + + spi { + #address-cells = <1>; + #size-cells = <0>; + + resolver@0 { + compatible = "adi,ad2s1210"; + reg = <0>; + spi-max-frequency = <20000000>; + spi-cpha; + avdd-supply = <&avdd_regulator>; + dvdd-supply = <&dvdd_regulator>; + vdrive-supply = <&vdrive_regulator>; + clocks = <&ext_osc>; + sample-gpios = <&gpio0 90 GPIO_ACTIVE_LOW>; + mode-gpios = <&gpio0 86 0>, <&gpio0 87 0>; + resolution-gpios = <&gpio0 88 0>, <&gpio0 89 0>; + assigned-resolution-bits = <16>; + }; + }; diff --git a/Documentation/devicetree/bindings/input/fsl,scu-key.yaml b/Documentation/devicetree/bindings/input/fsl,scu-key.yaml index e5a3c355ee..29921aab9d 100644 --- a/Documentation/devicetree/bindings/input/fsl,scu-key.yaml +++ b/Documentation/devicetree/bindings/input/fsl,scu-key.yaml @@ -24,6 +24,8 @@ properties: linux,keycodes: maxItems: 1 + wakeup-source: true + required: - compatible - linux,keycodes diff --git a/Documentation/devicetree/bindings/input/qcom,pm8921-keypad.yaml b/Documentation/devicetree/bindings/input/qcom,pm8921-keypad.yaml new file mode 100644 index 0000000000..88764adcd6 --- /dev/null +++ b/Documentation/devicetree/bindings/input/qcom,pm8921-keypad.yaml @@ -0,0 +1,89 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/input/qcom,pm8921-keypad.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm PM8921 PMIC KeyPad + +maintainers: + - Dmitry Baryshkov + +allOf: + - $ref: input.yaml# + - $ref: matrix-keymap.yaml# + +properties: + compatible: + enum: + - qcom,pm8058-keypad + - qcom,pm8921-keypad + + reg: + maxItems: 1 + + interrupts: + items: + - description: key sense + - description: key stuck + + wakeup-source: + type: boolean + description: use any event on keypad as wakeup event + + linux,keypad-wakeup: + type: boolean + deprecated: true + description: legacy version of the wakeup-source property + + debounce: + description: + Time in microseconds that key must be pressed or + released for state change interrupt to trigger. + $ref: /schemas/types.yaml#/definitions/uint32 + + scan-delay: + $ref: /schemas/types.yaml#/definitions/uint32 + description: time in microseconds to pause between successive scans of the + matrix array + + row-hold: + $ref: /schemas/types.yaml#/definitions/uint32 + description: time in nanoseconds to pause between scans of each row in the + matrix array. + +required: + - compatible + - reg + - interrupts + - linux,keymap + +unevaluatedProperties: false + +examples: + - | + #include + #include + pmic { + #address-cells = <1>; + #size-cells = <0>; + + keypad@148 { + compatible = "qcom,pm8921-keypad"; + reg = <0x148>; + interrupt-parent = <&pmicintc>; + interrupts = <74 IRQ_TYPE_EDGE_RISING>, <75 IRQ_TYPE_EDGE_RISING>; + linux,keymap = < + MATRIX_KEY(0, 0, KEY_VOLUMEUP) + MATRIX_KEY(0, 1, KEY_VOLUMEDOWN) + MATRIX_KEY(0, 2, KEY_CAMERA_FOCUS) + MATRIX_KEY(0, 3, KEY_CAMERA) + >; + keypad,num-rows = <1>; + keypad,num-columns = <5>; + debounce = <15>; + scan-delay = <32>; + row-hold = <91500>; + }; + }; +... diff --git a/Documentation/devicetree/bindings/input/qcom,pm8xxx-keypad.txt b/Documentation/devicetree/bindings/input/qcom,pm8xxx-keypad.txt deleted file mode 100644 index 4a9dc6ba96..0000000000 --- a/Documentation/devicetree/bindings/input/qcom,pm8xxx-keypad.txt +++ /dev/null @@ -1,90 +0,0 @@ -Qualcomm PM8xxx PMIC Keypad - -PROPERTIES - -- compatible: - Usage: required - Value type: - Definition: must be one of: - "qcom,pm8058-keypad" - "qcom,pm8921-keypad" - -- reg: - Usage: required - Value type: - Definition: address of keypad control register - -- interrupts: - Usage: required - Value type: - Definition: the first interrupt specifies the key sense interrupt - and the second interrupt specifies the key stuck interrupt. - The format of the specifier is defined by the binding - document describing the node's interrupt parent. - -- linux,keymap: - Usage: required - Value type: - Definition: the linux keymap. More information can be found in - input/matrix-keymap.txt. - -- linux,keypad-no-autorepeat: - Usage: optional - Value type: - Definition: don't enable autorepeat feature. - -- wakeup-source: - Usage: optional - Value type: - Definition: use any event on keypad as wakeup event. - (Legacy property supported: "linux,keypad-wakeup") - -- keypad,num-rows: - Usage: required - Value type: - Definition: number of rows in the keymap. More information can be found - in input/matrix-keymap.txt. - -- keypad,num-columns: - Usage: required - Value type: - Definition: number of columns in the keymap. More information can be - found in input/matrix-keymap.txt. - -- debounce: - Usage: optional - Value type: - Definition: time in microseconds that key must be pressed or release - for key sense interrupt to trigger. - -- scan-delay: - Usage: optional - Value type: - Definition: time in microseconds to pause between successive scans - of the matrix array. - -- row-hold: - Usage: optional - Value type: - Definition: time in nanoseconds to pause between scans of each row in - the matrix array. - -EXAMPLE - - keypad@148 { - compatible = "qcom,pm8921-keypad"; - reg = <0x148>; - interrupt-parent = <&pmicintc>; - interrupts = <74 1>, <75 1>; - linux,keymap = < - MATRIX_KEY(0, 0, KEY_VOLUMEUP) - MATRIX_KEY(0, 1, KEY_VOLUMEDOWN) - MATRIX_KEY(0, 2, KEY_CAMERA_FOCUS) - MATRIX_KEY(0, 3, KEY_CAMERA) - >; - keypad,num-rows = <1>; - keypad,num-columns = <5>; - debounce = <15>; - scan-delay = <32>; - row-hold = <91500>; - }; diff --git a/Documentation/devicetree/bindings/input/syna,rmi4.yaml b/Documentation/devicetree/bindings/input/syna,rmi4.yaml index 4d4e1a8e36..b522c8d3ce 100644 --- a/Documentation/devicetree/bindings/input/syna,rmi4.yaml +++ b/Documentation/devicetree/bindings/input/syna,rmi4.yaml @@ -164,6 +164,8 @@ patternProperties: "^rmi4-f[0-9a-f]+@[0-9a-f]+$": type: object + additionalProperties: true + description: Other functions, not documented yet. diff --git a/Documentation/devicetree/bindings/input/touchscreen/cypress,tt21000.yaml b/Documentation/devicetree/bindings/input/touchscreen/cypress,tt21000.yaml index 4080422a9e..037e5d3c44 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/cypress,tt21000.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/cypress,tt21000.yaml @@ -34,6 +34,9 @@ properties: vdd-supply: description: Regulator for voltage. + vddio-supply: + description: Optional Regulator for I/O voltage. + reset-gpios: maxItems: 1 diff --git a/Documentation/devicetree/bindings/input/twl4030-pwrbutton.txt b/Documentation/devicetree/bindings/input/twl4030-pwrbutton.txt index f5021214ed..6c201a2ba8 100644 --- a/Documentation/devicetree/bindings/input/twl4030-pwrbutton.txt +++ b/Documentation/devicetree/bindings/input/twl4030-pwrbutton.txt @@ -1,7 +1,7 @@ Texas Instruments TWL family (twl4030) pwrbutton module This module is part of the TWL4030. For more details about the whole -chip see Documentation/devicetree/bindings/mfd/twl-family.txt. +chip see Documentation/devicetree/bindings/mfd/ti,twl.yaml. This module provides a simple power button event via an Interrupt. diff --git a/Documentation/devicetree/bindings/interconnect/qcom,msm8939.yaml b/Documentation/devicetree/bindings/interconnect/qcom,msm8939.yaml new file mode 100644 index 0000000000..fd15ab5014 --- /dev/null +++ b/Documentation/devicetree/bindings/interconnect/qcom,msm8939.yaml @@ -0,0 +1,74 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/interconnect/qcom,msm8939.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm MSM8939 Network-On-Chip interconnect + +maintainers: + - Konrad Dybcio + +description: | + The Qualcomm MSM8939 interconnect providers support adjusting the + bandwidth requirements between the various NoC fabrics. + +allOf: + - $ref: qcom,rpm-common.yaml# + +properties: + compatible: + enum: + - qcom,msm8939-bimc + - qcom,msm8939-pcnoc + - qcom,msm8939-snoc + + reg: + maxItems: 1 + +patternProperties: + '^interconnect-[a-z0-9\-]+$': + type: object + $ref: qcom,rpm-common.yaml# + description: + The interconnect providers do not have a separate QoS register space, + but share parent's space. + + allOf: + - $ref: qcom,rpm-common.yaml# + + properties: + compatible: + const: qcom,msm8939-snoc-mm + + required: + - compatible + + unevaluatedProperties: false + +required: + - compatible + - reg + +unevaluatedProperties: false + +examples: + - | + #include + + snoc: interconnect@580000 { + compatible = "qcom,msm8939-snoc"; + reg = <0x00580000 0x14000>; + #interconnect-cells = <1>; + }; + + bimc: interconnect@400000 { + compatible = "qcom,msm8939-bimc"; + reg = <0x00400000 0x62000>; + #interconnect-cells = <1>; + + snoc_mm: interconnect-snoc { + compatible = "qcom,msm8939-snoc-mm"; + #interconnect-cells = <1>; + }; + }; diff --git a/Documentation/devicetree/bindings/interconnect/qcom,msm8996.yaml b/Documentation/devicetree/bindings/interconnect/qcom,msm8996.yaml new file mode 100644 index 0000000000..e3f964aaad --- /dev/null +++ b/Documentation/devicetree/bindings/interconnect/qcom,msm8996.yaml @@ -0,0 +1,126 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/interconnect/qcom,msm8996.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm MSM8996 Network-On-Chip interconnect + +maintainers: + - Konrad Dybcio + +description: | + The Qualcomm MSM8996 interconnect providers support adjusting the + bandwidth requirements between the various NoC fabrics. + +properties: + compatible: + enum: + - qcom,msm8996-a0noc + - qcom,msm8996-a1noc + - qcom,msm8996-a2noc + - qcom,msm8996-bimc + - qcom,msm8996-cnoc + - qcom,msm8996-mnoc + - qcom,msm8996-pnoc + - qcom,msm8996-snoc + + reg: + maxItems: 1 + + clock-names: + minItems: 1 + maxItems: 3 + + clocks: + minItems: 1 + maxItems: 3 + + power-domains: + maxItems: 1 + +required: + - compatible + - reg + +unevaluatedProperties: false + +allOf: + - $ref: qcom,rpm-common.yaml# + - if: + properties: + compatible: + const: qcom,msm8996-a0noc + + then: + properties: + clocks: + items: + - description: Aggregate0 System NoC AXI Clock. + - description: Aggregate0 Config NoC AHB Clock. + - description: Aggregate0 NoC MPU Clock. + + clock-names: + items: + - const: aggre0_snoc_axi + - const: aggre0_cnoc_ahb + - const: aggre0_noc_mpu_cfg + + required: + - power-domains + + - if: + properties: + compatible: + const: qcom,msm8996-mnoc + + then: + properties: + clocks: + items: + - description: CPU-NoC High-performance Bus Clock. + + clock-names: + const: iface + + - if: + properties: + compatible: + const: qcom,msm8996-a2noc + + then: + properties: + clocks: + items: + - description: Aggregate2 NoC UFS AXI Clock + - description: UFS AXI Clock + + clock-names: + items: + - const: aggre2_ufs_axi + - const: ufs_axi + +examples: + - | + #include + #include + #include + + bimc: interconnect@408000 { + compatible = "qcom,msm8996-bimc"; + reg = <0x00408000 0x5a000>; + #interconnect-cells = <1>; + }; + + a0noc: interconnect@543000 { + compatible = "qcom,msm8996-a0noc"; + reg = <0x00543000 0x6000>; + #interconnect-cells = <1>; + clocks = <&gcc GCC_AGGRE0_SNOC_AXI_CLK>, + <&gcc GCC_AGGRE0_CNOC_AHB_CLK>, + <&gcc GCC_AGGRE0_NOC_MPU_CFG_AHB_CLK>; + clock-names = "aggre0_snoc_axi", + "aggre0_cnoc_ahb", + "aggre0_noc_mpu_cfg"; + power-domains = <&gcc AGGRE0_NOC_GDSC>; + }; diff --git a/Documentation/devicetree/bindings/interconnect/qcom,qcm2290.yaml b/Documentation/devicetree/bindings/interconnect/qcom,qcm2290.yaml index f65a2fe846..b6c15314c5 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,qcm2290.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,qcm2290.yaml @@ -13,6 +13,9 @@ description: | The Qualcomm QCM2290 interconnect providers support adjusting the bandwidth requirements between the various NoC fabrics. +allOf: + - $ref: qcom,rpm-common.yaml# + properties: reg: maxItems: 1 @@ -23,19 +26,6 @@ properties: - qcom,qcm2290-cnoc - qcom,qcm2290-snoc - '#interconnect-cells': - const: 1 - - clock-names: - items: - - const: bus - - const: bus_a - - clocks: - items: - - description: Bus Clock - - description: Bus A Clock - # Child node's properties patternProperties: '^interconnect-[a-z0-9]+$': @@ -44,6 +34,9 @@ patternProperties: The interconnect providers do not have a separate QoS register space, but share parent's space. + allOf: + - $ref: qcom,rpm-common.yaml# + properties: compatible: enum: @@ -51,35 +44,16 @@ patternProperties: - qcom,qcm2290-mmrt-virt - qcom,qcm2290-mmnrt-virt - '#interconnect-cells': - const: 1 - - clock-names: - items: - - const: bus - - const: bus_a - - clocks: - items: - - description: Bus Clock - - description: Bus A Clock - required: - compatible - - '#interconnect-cells' - - clock-names - - clocks - additionalProperties: false + unevaluatedProperties: false required: - compatible - reg - - '#interconnect-cells' - - clock-names - - clocks -additionalProperties: false +unevaluatedProperties: false examples: - | @@ -89,32 +63,20 @@ examples: compatible = "qcom,qcm2290-snoc"; reg = <0x01880000 0x60200>; #interconnect-cells = <1>; - clock-names = "bus", "bus_a"; - clocks = <&rpmcc RPM_SMD_SNOC_CLK>, - <&rpmcc RPM_SMD_SNOC_A_CLK>; qup_virt: interconnect-qup { compatible = "qcom,qcm2290-qup-virt"; #interconnect-cells = <1>; - clock-names = "bus", "bus_a"; - clocks = <&rpmcc RPM_SMD_QUP_CLK>, - <&rpmcc RPM_SMD_QUP_A_CLK>; }; mmnrt_virt: interconnect-mmnrt { compatible = "qcom,qcm2290-mmnrt-virt"; #interconnect-cells = <1>; - clock-names = "bus", "bus_a"; - clocks = <&rpmcc RPM_SMD_MMNRT_CLK>, - <&rpmcc RPM_SMD_MMNRT_A_CLK>; }; mmrt_virt: interconnect-mmrt { compatible = "qcom,qcm2290-mmrt-virt"; #interconnect-cells = <1>; - clock-names = "bus", "bus_a"; - clocks = <&rpmcc RPM_SMD_MMRT_CLK>, - <&rpmcc RPM_SMD_MMRT_A_CLK>; }; }; @@ -122,16 +84,10 @@ examples: compatible = "qcom,qcm2290-cnoc"; reg = <0x01900000 0x8200>; #interconnect-cells = <1>; - clock-names = "bus", "bus_a"; - clocks = <&rpmcc RPM_SMD_CNOC_CLK>, - <&rpmcc RPM_SMD_CNOC_A_CLK>; }; bimc: interconnect@4480000 { compatible = "qcom,qcm2290-bimc"; reg = <0x04480000 0x80000>; #interconnect-cells = <1>; - clock-names = "bus", "bus_a"; - clocks = <&rpmcc RPM_SMD_BIMC_CLK>, - <&rpmcc RPM_SMD_BIMC_A_CLK>; }; diff --git a/Documentation/devicetree/bindings/interconnect/qcom,rpm-common.yaml b/Documentation/devicetree/bindings/interconnect/qcom,rpm-common.yaml new file mode 100644 index 0000000000..1ea52b0916 --- /dev/null +++ b/Documentation/devicetree/bindings/interconnect/qcom,rpm-common.yaml @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/interconnect/qcom,rpm-common.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm RPMh Network-On-Chip Interconnect + +maintainers: + - Konrad Dybcio + +description: + RPM interconnect providers support for managing system bandwidth requirements + through manual requests based on either predefined values or as indicated by + the bus monitor hardware. Each provider node represents a NoC bus master, + driven by a dedicated clock source. + +properties: + '#interconnect-cells': + oneOf: + - const: 2 + - const: 1 + deprecated: true + +required: + - '#interconnect-cells' + +additionalProperties: true diff --git a/Documentation/devicetree/bindings/interconnect/qcom,rpm.yaml b/Documentation/devicetree/bindings/interconnect/qcom,rpm.yaml index 4f95d51201..08c1c6b9d7 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,rpm.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,rpm.yaml @@ -7,13 +7,16 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm RPM Network-On-Chip Interconnect maintainers: - - Georgi Djakov + - Georgi Djakov description: | RPM interconnect providers support system bandwidth requirements through RPM processor. The provider is able to communicate with the RPM through the RPM shared memory device. +allOf: + - $ref: qcom,rpm-common.yaml# + properties: reg: maxItems: 1 @@ -23,259 +26,22 @@ properties: - qcom,msm8916-bimc - qcom,msm8916-pcnoc - qcom,msm8916-snoc - - qcom,msm8939-bimc - - qcom,msm8939-pcnoc - - qcom,msm8939-snoc - - qcom,msm8996-a0noc - - qcom,msm8996-a1noc - - qcom,msm8996-a2noc - - qcom,msm8996-bimc - - qcom,msm8996-cnoc - - qcom,msm8996-mnoc - - qcom,msm8996-pnoc - - qcom,msm8996-snoc - qcom,qcs404-bimc - qcom,qcs404-pcnoc - qcom,qcs404-snoc - - qcom,sdm660-a2noc - - qcom,sdm660-bimc - - qcom,sdm660-cnoc - - qcom,sdm660-gnoc - - qcom,sdm660-mnoc - - qcom,sdm660-snoc - - '#interconnect-cells': - description: | - Value: <1> is one cell in an interconnect specifier for the - interconnect node id, <2> requires the interconnect node id and an - extra path tag. - enum: [ 1, 2 ] - - clocks: - minItems: 2 - maxItems: 7 - - clock-names: - minItems: 2 - maxItems: 7 - - power-domains: - maxItems: 1 - -# Child node's properties -patternProperties: - '^interconnect-[a-z0-9]+$': - type: object - additionalProperties: false - description: - snoc-mm is a child of snoc, sharing snoc's register address space. - - properties: - compatible: - enum: - - qcom,msm8939-snoc-mm - - '#interconnect-cells': - const: 1 - - clock-names: - items: - - const: bus - - const: bus_a - - clocks: - items: - - description: Bus Clock - - description: Bus A Clock - - required: - - compatible - - '#interconnect-cells' - - clock-names - - clocks required: - compatible - reg - - '#interconnect-cells' - - clock-names - - clocks - -additionalProperties: false - -allOf: - - if: - properties: - compatible: - contains: - enum: - - qcom,msm8916-bimc - - qcom,msm8916-pcnoc - - qcom,msm8916-snoc - - qcom,msm8939-bimc - - qcom,msm8939-pcnoc - - qcom,msm8939-snoc - - qcom,msm8996-a1noc - - qcom,msm8996-bimc - - qcom,msm8996-cnoc - - qcom,msm8996-pnoc - - qcom,msm8996-snoc - - qcom,qcs404-bimc - - qcom,qcs404-pcnoc - - qcom,qcs404-snoc - - qcom,sdm660-bimc - - qcom,sdm660-cnoc - - qcom,sdm660-gnoc - - qcom,sdm660-snoc - - then: - properties: - clock-names: - items: - - const: bus - - const: bus_a - - clocks: - items: - - description: Bus Clock - - description: Bus A Clock - - if: - properties: - compatible: - contains: - enum: - - qcom,msm8996-mnoc - - qcom,sdm660-mnoc - - then: - properties: - clock-names: - items: - - const: bus - - const: bus_a - - const: iface - - clocks: - items: - - description: Bus Clock. - - description: Bus A Clock. - - description: CPU-NoC High-performance Bus Clock. - - - if: - properties: - compatible: - contains: - enum: - - qcom,msm8996-a0noc - - then: - properties: - clock-names: - items: - - const: aggre0_snoc_axi - - const: aggre0_cnoc_ahb - - const: aggre0_noc_mpu_cfg - - clocks: - items: - - description: Aggregate0 System NoC AXI Clock. - - description: Aggregate0 Config NoC AHB Clock. - - description: Aggregate0 NoC MPU Clock. - - required: - - power-domains - - - if: - properties: - compatible: - contains: - enum: - - qcom,msm8996-a2noc - - then: - properties: - clock-names: - items: - - const: bus - - const: bus_a - - const: aggre2_ufs_axi - - const: ufs_axi - - clocks: - items: - - description: Bus Clock - - description: Bus A Clock - - description: Aggregate2 NoC UFS AXI Clock - - description: UFS AXI Clock - - - if: - properties: - compatible: - contains: - enum: - - qcom,sdm660-a2noc - - then: - properties: - clock-names: - items: - - const: bus - - const: bus_a - - const: ipa - - const: ufs_axi - - const: aggre2_ufs_axi - - const: aggre2_usb3_axi - - const: cfg_noc_usb2_axi - - clocks: - items: - - description: Bus Clock. - - description: Bus A Clock. - - description: IPA Clock. - - description: UFS AXI Clock. - - description: Aggregate2 UFS AXI Clock. - - description: Aggregate2 USB3 AXI Clock. - - description: Config NoC USB2 AXI Clock. - - - if: - not: - properties: - compatible: - contains: - enum: - - qcom,msm8939-snoc - then: - patternProperties: - '^interconnect-[a-z0-9]+$': false +unevaluatedProperties: false examples: - | #include bimc: interconnect@400000 { - compatible = "qcom,msm8916-bimc"; - reg = <0x00400000 0x62000>; - #interconnect-cells = <1>; - clock-names = "bus", "bus_a"; - clocks = <&rpmcc RPM_SMD_BIMC_CLK>, - <&rpmcc RPM_SMD_BIMC_A_CLK>; - }; - - pcnoc: interconnect@500000 { - compatible = "qcom,msm8916-pcnoc"; - reg = <0x00500000 0x11000>; - #interconnect-cells = <1>; - clock-names = "bus", "bus_a"; - clocks = <&rpmcc RPM_SMD_PCNOC_CLK>, - <&rpmcc RPM_SMD_PCNOC_A_CLK>; - }; - - snoc: interconnect@580000 { - compatible = "qcom,msm8916-snoc"; - reg = <0x00580000 0x14000>; - #interconnect-cells = <1>; - clock-names = "bus", "bus_a"; - clocks = <&rpmcc RPM_SMD_SNOC_CLK>, - <&rpmcc RPM_SMD_SNOC_A_CLK>; + compatible = "qcom,msm8916-bimc"; + reg = <0x00400000 0x62000>; + #interconnect-cells = <1>; }; diff --git a/Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml b/Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml index a46497af1f..74ab080249 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml @@ -113,6 +113,7 @@ allOf: properties: compatible: enum: + - qcom,sdx65-mc-virt - qcom,sm8250-qup-virt then: required: diff --git a/Documentation/devicetree/bindings/interconnect/qcom,sdm660.yaml b/Documentation/devicetree/bindings/interconnect/qcom,sdm660.yaml new file mode 100644 index 0000000000..8f6bc63996 --- /dev/null +++ b/Documentation/devicetree/bindings/interconnect/qcom,sdm660.yaml @@ -0,0 +1,108 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/interconnect/qcom,sdm660.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm SDM660 Network-On-Chip interconnect + +maintainers: + - Konrad Dybcio + +description: | + The Qualcomm SDM660 interconnect providers support adjusting the + bandwidth requirements between the various NoC fabrics. + +properties: + compatible: + enum: + - qcom,sdm660-a2noc + - qcom,sdm660-bimc + - qcom,sdm660-cnoc + - qcom,sdm660-gnoc + - qcom,sdm660-mnoc + - qcom,sdm660-snoc + + reg: + maxItems: 1 + + clock-names: + minItems: 1 + maxItems: 5 + + clocks: + minItems: 1 + maxItems: 5 + +required: + - compatible + - reg + +unevaluatedProperties: false + +allOf: + - $ref: qcom,rpm-common.yaml# + - if: + properties: + compatible: + const: qcom,sdm660-mnoc + + then: + properties: + clocks: + items: + - description: CPU-NoC High-performance Bus Clock. + + clock-names: + const: iface + + - if: + properties: + compatible: + const: qcom,sdm660-a2noc + + then: + properties: + clocks: + items: + - description: IPA Clock. + - description: UFS AXI Clock. + - description: Aggregate2 UFS AXI Clock. + - description: Aggregate2 USB3 AXI Clock. + - description: Config NoC USB2 AXI Clock. + + clock-names: + items: + - const: ipa + - const: ufs_axi + - const: aggre2_ufs_axi + - const: aggre2_usb3_axi + - const: cfg_noc_usb2_axi + +examples: + - | + #include + #include + #include + + bimc: interconnect@1008000 { + compatible = "qcom,sdm660-bimc"; + reg = <0x01008000 0x78000>; + #interconnect-cells = <1>; + }; + + a2noc: interconnect@1704000 { + compatible = "qcom,sdm660-a2noc"; + reg = <0x01704000 0xc100>; + #interconnect-cells = <1>; + clocks = <&rpmcc RPM_SMD_IPA_CLK>, + <&gcc GCC_UFS_AXI_CLK>, + <&gcc GCC_AGGRE2_UFS_AXI_CLK>, + <&gcc GCC_AGGRE2_USB3_AXI_CLK>, + <&gcc GCC_CFG_NOC_USB2_AXI_CLK>; + clock-names = "ipa", + "ufs_axi", + "aggre2_ufs_axi", + "aggre2_usb3_axi", + "cfg_noc_usb2_axi"; + }; diff --git a/Documentation/devicetree/bindings/interconnect/qcom,sdx75-rpmh.yaml b/Documentation/devicetree/bindings/interconnect/qcom,sdx75-rpmh.yaml new file mode 100644 index 0000000000..71cf7e252b --- /dev/null +++ b/Documentation/devicetree/bindings/interconnect/qcom,sdx75-rpmh.yaml @@ -0,0 +1,92 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/interconnect/qcom,sdx75-rpmh.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm RPMh Network-On-Chip Interconnect on SDX75 + +maintainers: + - Rohit Agarwal + +description: + RPMh interconnect providers support system bandwidth requirements through + RPMh hardware accelerators known as Bus Clock Manager (BCM). The provider is + able to communicate with the BCM through the Resource State Coordinator (RSC) + associated with each execution environment. Provider nodes must point to at + least one RPMh device child node pertaining to their RSC and each provider + can map to multiple RPMh resources. + +properties: + compatible: + enum: + - qcom,sdx75-clk-virt + - qcom,sdx75-dc-noc + - qcom,sdx75-gem-noc + - qcom,sdx75-mc-virt + - qcom,sdx75-pcie-anoc + - qcom,sdx75-system-noc + + '#interconnect-cells': true + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + +required: + - compatible + +allOf: + - $ref: qcom,rpmh-common.yaml# + - if: + properties: + compatible: + contains: + enum: + - qcom,sdx75-clk-virt + - qcom,sdx75-mc-virt + then: + properties: + reg: false + else: + required: + - reg + + - if: + properties: + compatible: + contains: + enum: + - qcom,sdx75-clk-virt + then: + properties: + clocks: + items: + - description: RPMH CC QPIC Clock + required: + - clocks + else: + properties: + clocks: false + +unevaluatedProperties: false + +examples: + - | + #include + + clk_virt: interconnect-0 { + compatible = "qcom,sdx75-clk-virt"; + #interconnect-cells = <2>; + qcom,bcm-voters = <&apps_bcm_voter>; + clocks = <&rpmhcc RPMH_QPIC_CLK>; + }; + + system_noc: interconnect@1640000 { + compatible = "qcom,sdx75-system-noc"; + reg = <0x1640000 0x4b400>; + #interconnect-cells = <2>; + qcom,bcm-voters = <&apps_bcm_voter>; + }; diff --git a/Documentation/devicetree/bindings/interrupt-controller/qcom,pdc.yaml b/Documentation/devicetree/bindings/interrupt-controller/qcom,pdc.yaml index 4847b04be1..86d61896f5 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/qcom,pdc.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/qcom,pdc.yaml @@ -35,6 +35,7 @@ properties: - qcom,sdm845-pdc - qcom,sdx55-pdc - qcom,sdx65-pdc + - qcom,sm4450-pdc - qcom,sm6350-pdc - qcom,sm8150-pdc - qcom,sm8250-pdc diff --git a/Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml b/Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml index dc1f28e552..0c07e8dda4 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml @@ -65,6 +65,8 @@ properties: - items: - enum: - allwinner,sun20i-d1-plic + - sophgo,cv1800b-plic + - sophgo,sg2042-plic - thead,th1520-plic - const: thead,c900-plic - items: diff --git a/Documentation/devicetree/bindings/interrupt-controller/thead,c900-aclint-mswi.yaml b/Documentation/devicetree/bindings/interrupt-controller/thead,c900-aclint-mswi.yaml new file mode 100644 index 0000000000..065f2544b6 --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/thead,c900-aclint-mswi.yaml @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/interrupt-controller/thead,c900-aclint-mswi.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Sophgo sg2042 CLINT Machine-level Software Interrupt Device + +maintainers: + - Inochi Amaoto + +properties: + compatible: + items: + - enum: + - sophgo,sg2042-aclint-mswi + - const: thead,c900-aclint-mswi + + reg: + maxItems: 1 + + interrupts-extended: + minItems: 1 + maxItems: 4095 + +additionalProperties: false + +required: + - compatible + - reg + - interrupts-extended + +examples: + - | + interrupt-controller@94000000 { + compatible = "sophgo,sg2042-aclint-mswi", "thead,c900-aclint-mswi"; + interrupts-extended = <&cpu1intc 3>, + <&cpu2intc 3>, + <&cpu3intc 3>, + <&cpu4intc 3>; + reg = <0x94000000 0x00010000>; + }; +... diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index b1b2cf81b4..aa9e1c0895 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -110,6 +110,7 @@ properties: - qcom,sdm630-smmu-v2 - qcom,sdm845-smmu-v2 - qcom,sm6350-smmu-v2 + - qcom,sm7150-smmu-v2 - const: qcom,adreno-smmu - const: qcom,smmu-v2 - description: Qcom Adreno GPUs on Google Cheza platform @@ -409,6 +410,7 @@ allOf: contains: enum: - qcom,sm6350-smmu-v2 + - qcom,sm7150-smmu-v2 - qcom,sm8150-smmu-500 - qcom,sm8250-smmu-500 then: diff --git a/Documentation/devicetree/bindings/leds/backlight/common.yaml b/Documentation/devicetree/bindings/leds/backlight/common.yaml index 3b60afbab6..e0983e4493 100644 --- a/Documentation/devicetree/bindings/leds/backlight/common.yaml +++ b/Documentation/devicetree/bindings/leds/backlight/common.yaml @@ -33,4 +33,21 @@ properties: due to restrictions in a specific system, such as mounting conditions. $ref: /schemas/types.yaml#/definitions/uint32 + brightness-levels: + description: + Array of distinct brightness levels. The levels must be in the range + accepted by the underlying LED device. Typically these are in the range + from 0 to 255, but any range starting at 0 will do, as long as they are + accepted by the LED. + The 0 value means a 0% of brightness (darkest/off), while the last value + in the array represents a full 100% brightness (brightest). + If this array is not provided, the driver default mapping is used. + $ref: /schemas/types.yaml#/definitions/uint32-array + + default-brightness-level: + description: + The default brightness level (index into the array defined by the + "brightness-levels" property). + $ref: /schemas/types.yaml#/definitions/uint32 + additionalProperties: true diff --git a/Documentation/devicetree/bindings/leds/backlight/led-backlight.yaml b/Documentation/devicetree/bindings/leds/backlight/led-backlight.yaml index d7b78198ab..f5554da6bc 100644 --- a/Documentation/devicetree/bindings/leds/backlight/led-backlight.yaml +++ b/Documentation/devicetree/bindings/leds/backlight/led-backlight.yaml @@ -16,6 +16,9 @@ description: can also be used to describe a backlight device controlled by the output of a LED driver. +allOf: + - $ref: common.yaml# + properties: compatible: const: led-backlight @@ -26,25 +29,11 @@ properties: items: maxItems: 1 - brightness-levels: - description: - Array of distinct brightness levels. The levels must be in the range - accepted by the underlying LED devices. This is used to translate a - backlight brightness level into a LED brightness level. If it is not - provided, the identity mapping is used. - $ref: /schemas/types.yaml#/definitions/uint32-array - - default-brightness-level: - description: - The default brightness level (index into the array defined by the - "brightness-levels" property). - $ref: /schemas/types.yaml#/definitions/uint32 - required: - compatible - leds -additionalProperties: false +unevaluatedProperties: false examples: - | diff --git a/Documentation/devicetree/bindings/leds/backlight/max8925-backlight.txt b/Documentation/devicetree/bindings/leds/backlight/max8925-backlight.txt deleted file mode 100644 index b4cffdaa41..0000000000 --- a/Documentation/devicetree/bindings/leds/backlight/max8925-backlight.txt +++ /dev/null @@ -1,10 +0,0 @@ -88pm860x-backlight bindings - -Optional properties: - - maxim,max8925-dual-string: whether support dual string - -Example: - - backlights { - maxim,max8925-dual-string = <0>; - }; diff --git a/Documentation/devicetree/bindings/leds/backlight/mps,mp3309c.yaml b/Documentation/devicetree/bindings/leds/backlight/mps,mp3309c.yaml new file mode 100644 index 0000000000..4191e33626 --- /dev/null +++ b/Documentation/devicetree/bindings/leds/backlight/mps,mp3309c.yaml @@ -0,0 +1,73 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/leds/backlight/mps,mp3309c.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: MPS MP3309C backlight + +maintainers: + - Flavio Suligoi + +description: | + The Monolithic Power (MPS) MP3309C is a WLED step-up converter, featuring a + programmable switching frequency to optimize efficiency. + It supports two different dimming modes: + + - analog mode, via I2C commands (default) + - PWM controlled mode. + + The datasheet is available at: + https://www.monolithicpower.com/en/mp3309c.html + +allOf: + - $ref: common.yaml# + +properties: + compatible: + const: mps,mp3309c + + reg: + maxItems: 1 + + pwms: + description: if present, the backlight is controlled in PWM mode. + maxItems: 1 + + enable-gpios: + description: GPIO used to enable the backlight in "analog-i2c" dimming mode. + maxItems: 1 + + mps,overvoltage-protection-microvolt: + description: Overvoltage protection (13.5V, 24V or 35.5V). + enum: [ 13500000, 24000000, 35500000 ] + default: 35500000 + + mps,no-sync-mode: + description: disable synchronous rectification mode + type: boolean + +required: + - compatible + - reg + - max-brightness + - default-brightness + +unevaluatedProperties: false + +examples: + - | + i2c { + #address-cells = <1>; + #size-cells = <0>; + + /* Backlight with PWM control */ + backlight_pwm: backlight@17 { + compatible = "mps,mp3309c"; + reg = <0x17>; + pwms = <&pwm1 0 3333333 0>; /* 300 Hz --> (1/f) * 1*10^9 */ + max-brightness = <100>; + default-brightness = <80>; + mps,overvoltage-protection-microvolt = <24000000>; + }; + }; diff --git a/Documentation/devicetree/bindings/leds/backlight/pwm-backlight.yaml b/Documentation/devicetree/bindings/leds/backlight/pwm-backlight.yaml index 5356902889..b71f6454a4 100644 --- a/Documentation/devicetree/bindings/leds/backlight/pwm-backlight.yaml +++ b/Documentation/devicetree/bindings/leds/backlight/pwm-backlight.yaml @@ -11,6 +11,9 @@ maintainers: - Daniel Thompson - Jingoo Han +allOf: + - $ref: common.yaml# + properties: compatible: const: pwm-backlight @@ -39,21 +42,6 @@ properties: Delay in ms between disabling the backlight using GPIO and setting PWM value to 0. - brightness-levels: - description: - Array of distinct brightness levels. Typically these are in the range - from 0 to 255, but any range starting at 0 will do. The actual brightness - level (PWM duty cycle) will be interpolated from these values. 0 means a - 0% duty cycle (darkest/off), while the last value in the array represents - a 100% duty cycle (brightest). - $ref: /schemas/types.yaml#/definitions/uint32-array - - default-brightness-level: - description: - The default brightness level (index into the array defined by the - "brightness-levels" property). - $ref: /schemas/types.yaml#/definitions/uint32 - num-interpolated-steps: description: Number of interpolated steps between each value of brightness-levels @@ -69,7 +57,7 @@ required: - compatible - pwms -additionalProperties: false +unevaluatedProperties: false examples: - | diff --git a/Documentation/devicetree/bindings/leds/common.yaml b/Documentation/devicetree/bindings/leds/common.yaml index 5fb7007f36..c8d0ba5f23 100644 --- a/Documentation/devicetree/bindings/leds/common.yaml +++ b/Documentation/devicetree/bindings/leds/common.yaml @@ -43,7 +43,7 @@ properties: LED_COLOR_ID available, add a new one. $ref: /schemas/types.yaml#/definitions/uint32 minimum: 0 - maximum: 9 + maximum: 14 function-enumerator: description: @@ -191,6 +191,8 @@ properties: each of them having its own LED assigned (assuming they are not hardwired). In such cases this property should contain phandle(s) of related source device(s). + Another example is a GPIO line that will be monitored and mirror the + state of the line (with or without inversion flags) to the LED. In many cases LED can be related to more than one device (e.g. one USB LED vs. multiple USB ports). Each source should be represented by a node in the device tree and be referenced by a phandle and a set of phandle diff --git a/Documentation/devicetree/bindings/leds/irled/pwm-ir-tx.yaml b/Documentation/devicetree/bindings/leds/irled/pwm-ir-tx.yaml index f2a6fa140f..7526e3149f 100644 --- a/Documentation/devicetree/bindings/leds/irled/pwm-ir-tx.yaml +++ b/Documentation/devicetree/bindings/leds/irled/pwm-ir-tx.yaml @@ -15,7 +15,10 @@ description: properties: compatible: - const: pwm-ir-tx + oneOf: + - const: pwm-ir-tx + - const: nokia,n900-ir + deprecated: true pwms: maxItems: 1 diff --git a/Documentation/devicetree/bindings/leds/kinetic,ktd202x.yaml b/Documentation/devicetree/bindings/leds/kinetic,ktd202x.yaml new file mode 100644 index 0000000000..832c030a5a --- /dev/null +++ b/Documentation/devicetree/bindings/leds/kinetic,ktd202x.yaml @@ -0,0 +1,171 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/leds/kinetic,ktd202x.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Kinetic KTD2026/7 RGB/White LED Driver + +maintainers: + - André Apitzsch + +description: | + The KTD2026/7 is a RGB/White LED driver with I2C interface. + + The data sheet can be found at: + https://www.kinet-ic.com/uploads/KTD2026-7-04h.pdf + +properties: + compatible: + enum: + - kinetic,ktd2026 + - kinetic,ktd2027 + + reg: + maxItems: 1 + + vin-supply: + description: Regulator providing power to the "VIN" pin. + + vio-supply: + description: Regulator providing power for pull-up of the I/O lines. + Note that this regulator does not directly connect to KTD2026, but is + needed for the correct operation of the status ("ST") and I2C lines. + + "#address-cells": + const: 1 + + "#size-cells": + const: 0 + + multi-led: + type: object + $ref: leds-class-multicolor.yaml# + unevaluatedProperties: false + + properties: + "#address-cells": + const: 1 + + "#size-cells": + const: 0 + + patternProperties: + "^led@[0-3]$": + type: object + $ref: common.yaml# + unevaluatedProperties: false + + properties: + reg: + description: Index of the LED. + minimum: 0 + maximum: 3 + + required: + - reg + - color + + required: + - "#address-cells" + - "#size-cells" + +patternProperties: + "^led@[0-3]$": + type: object + $ref: common.yaml# + unevaluatedProperties: false + + properties: + reg: + description: Index of the LED. + minimum: 0 + maximum: 3 + + required: + - reg + +required: + - compatible + - reg + - "#address-cells" + - "#size-cells" + +additionalProperties: false + +examples: + - | + #include + + i2c { + #address-cells = <1>; + #size-cells = <0>; + + led-controller@30 { + compatible = "kinetic,ktd2026"; + reg = <0x30>; + #address-cells = <1>; + #size-cells = <0>; + + vin-supply = <&pm8916_l17>; + vio-supply = <&pm8916_l6>; + + led@0 { + reg = <0>; + function = LED_FUNCTION_STATUS; + color = ; + }; + + led@1 { + reg = <1>; + function = LED_FUNCTION_STATUS; + color = ; + }; + + led@2 { + reg = <2>; + function = LED_FUNCTION_STATUS; + color = ; + }; + }; + }; + - | + #include + + i2c { + #address-cells = <1>; + #size-cells = <0>; + + led-controller@30 { + compatible = "kinetic,ktd2026"; + reg = <0x30>; + #address-cells = <1>; + #size-cells = <0>; + + vin-supply = <&pm8916_l17>; + vio-supply = <&pm8916_l6>; + + multi-led { + color = ; + function = LED_FUNCTION_STATUS; + + #address-cells = <1>; + #size-cells = <0>; + + led@0 { + reg = <0>; + color = ; + }; + + led@1 { + reg = <1>; + color = ; + }; + + led@2 { + reg = <2>; + color = ; + }; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/leds/register-bit-led.yaml b/Documentation/devicetree/bindings/leds/register-bit-led.yaml index ed26ec19ec..20930d327a 100644 --- a/Documentation/devicetree/bindings/leds/register-bit-led.yaml +++ b/Documentation/devicetree/bindings/leds/register-bit-led.yaml @@ -60,7 +60,7 @@ examples: - | syscon@10000000 { - compatible = "arm,realview-pb1176-syscon", "syscon"; + compatible = "arm,realview-pb1176-syscon", "syscon", "simple-mfd"; reg = <0x10000000 0x1000>; #address-cells = <1>; #size-cells = <1>; diff --git a/Documentation/devicetree/bindings/mailbox/fsl,mu.yaml b/Documentation/devicetree/bindings/mailbox/fsl,mu.yaml index 191c1ce150..12e7a7d536 100644 --- a/Documentation/devicetree/bindings/mailbox/fsl,mu.yaml +++ b/Documentation/devicetree/bindings/mailbox/fsl,mu.yaml @@ -72,9 +72,9 @@ properties: type : Channel type channel : Channel number - This MU support 5 type of unidirectional channels, each type + This MU support 6 type of unidirectional channels, each type has 4 channels except RST channel which only has 1 channel. - A total of 17 channels. Following types are + A total of 21 channels. Following types are supported: 0 - TX channel with 32bit transmit register and IRQ transmit acknowledgment support. @@ -82,6 +82,7 @@ properties: 2 - TX doorbell channel. Without own register and no ACK support. 3 - RX doorbell channel. 4 - RST channel + 5 - Tx doorbell channel. With S/W ACK from the other side. const: 2 clocks: diff --git a/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml b/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml index d2e25ff6db..a38413f8d1 100644 --- a/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml +++ b/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml @@ -125,10 +125,12 @@ allOf: items: - description: primary pll parent of the clock driver - description: XO clock + - description: GCC GPLL0 clock source clock-names: items: - const: pll - const: xo + - const: gpll0 - if: properties: diff --git a/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml b/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml index cc6f66eccc..a35f9483dc 100644 --- a/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml +++ b/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml @@ -34,6 +34,7 @@ properties: - qcom,sm8350-ipcc - qcom,sm8450-ipcc - qcom,sm8550-ipcc + - qcom,sm8650-ipcc - const: qcom,ipcc reg: diff --git a/Documentation/devicetree/bindings/mailbox/xlnx,zynqmp-ipi-mailbox.yaml b/Documentation/devicetree/bindings/mailbox/xlnx,zynqmp-ipi-mailbox.yaml index aeaddbf574..8b15a05321 100644 --- a/Documentation/devicetree/bindings/mailbox/xlnx,zynqmp-ipi-mailbox.yaml +++ b/Documentation/devicetree/bindings/mailbox/xlnx,zynqmp-ipi-mailbox.yaml @@ -74,6 +74,10 @@ patternProperties: type: object # DT nodes are json objects additionalProperties: false properties: + + compatible: + const: xlnx,zynqmp-ipi-dest-mailbox + xlnx,ipi-id: description: Remote Xilinx IPI agent ID of which the mailbox is connected to. @@ -95,6 +99,7 @@ patternProperties: - const: remote_response_region required: + - compatible - reg - reg-names - "#mbox-cells" @@ -124,6 +129,7 @@ examples: ranges; mailbox: mailbox@ff9905c0 { + compatible = "xlnx,zynqmp-ipi-dest-mailbox"; reg = <0x0 0xff9905c0 0x0 0x20>, <0x0 0xff9905e0 0x0 0x20>, <0x0 0xff990e80 0x0 0x20>, diff --git a/Documentation/devicetree/bindings/media/amlogic,meson6-ir.yaml b/Documentation/devicetree/bindings/media/amlogic,meson6-ir.yaml index 3f9fa92703..0f95fe8dd9 100644 --- a/Documentation/devicetree/bindings/media/amlogic,meson6-ir.yaml +++ b/Documentation/devicetree/bindings/media/amlogic,meson6-ir.yaml @@ -19,6 +19,7 @@ properties: - amlogic,meson6-ir - amlogic,meson8b-ir - amlogic,meson-gxbb-ir + - amlogic,meson-s4-ir - items: - const: amlogic,meson-gx-ir - const: amlogic,meson-gxbb-ir diff --git a/Documentation/devicetree/bindings/media/cdns,csi2rx.yaml b/Documentation/devicetree/bindings/media/cdns,csi2rx.yaml index 30a335b107..2008a47c05 100644 --- a/Documentation/devicetree/bindings/media/cdns,csi2rx.yaml +++ b/Documentation/devicetree/bindings/media/cdns,csi2rx.yaml @@ -18,6 +18,7 @@ properties: items: - enum: - starfive,jh7110-csi2rx + - ti,j721e-csi2rx - const: cdns,csi2rx reg: diff --git a/Documentation/devicetree/bindings/media/i2c/hynix,hi846.yaml b/Documentation/devicetree/bindings/media/i2c/hynix,hi846.yaml index 1e2df8cf29..60f19e1152 100644 --- a/Documentation/devicetree/bindings/media/i2c/hynix,hi846.yaml +++ b/Documentation/devicetree/bindings/media/i2c/hynix,hi846.yaml @@ -14,6 +14,9 @@ description: |- interface and CCI (I2C compatible) control bus. The output format is raw Bayer. +allOf: + - $ref: /schemas/media/video-interface-devices.yaml# + properties: compatible: const: hynix,hi846 @@ -86,7 +89,7 @@ required: - vddd-supply - port -additionalProperties: false +unevaluatedProperties: false examples: - | @@ -109,6 +112,8 @@ examples: vddio-supply = <®_camera_vddio>; reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>; shutdown-gpios = <&gpio5 4 GPIO_ACTIVE_LOW>; + orientation = <0>; + rotation = <0>; port { camera_out: endpoint { diff --git a/Documentation/devicetree/bindings/media/i2c/onnn,mt9m114.yaml b/Documentation/devicetree/bindings/media/i2c/onnn,mt9m114.yaml new file mode 100644 index 0000000000..f6b8789206 --- /dev/null +++ b/Documentation/devicetree/bindings/media/i2c/onnn,mt9m114.yaml @@ -0,0 +1,114 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/media/i2c/onnn,mt9m114.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: onsemi 1/6-inch 720p CMOS Digital Image Sensor + +maintainers: + - Laurent Pinchart + +description: |- + The onsemi MT9M114 is a 1/6-inch 720p (1.26 Mp) CMOS digital image sensor + with an active pixel-array size of 1296H x 976V. It is programmable through + an I2C interface and outputs image data over a 8-bit parallel or 1-lane MIPI + CSI-2 connection. + +properties: + compatible: + const: onnn,mt9m114 + + reg: + description: I2C device address + enum: + - 0x48 + - 0x5d + + clocks: + description: EXTCLK clock signal + maxItems: 1 + + vdd-supply: + description: + Core digital voltage supply, 1.8V + + vddio-supply: + description: + I/O digital voltage supply, 1.8V or 2.8V + + vaa-supply: + description: + Analog voltage supply, 2.8V + + reset-gpios: + description: |- + Reference to the GPIO connected to the RESET_BAR pin, if any (active + low). + + port: + $ref: /schemas/graph.yaml#/$defs/port-base + additionalProperties: false + + properties: + endpoint: + $ref: /schemas/media/video-interfaces.yaml# + additionalProperties: false + + properties: + bus-type: + enum: [4, 5, 6] + + link-frequencies: true + remote-endpoint: true + + # The number and mapping of lanes (for CSI-2), and the bus width and + # signal polarities (for parallel and BT.656) are fixed and must not + # be specified. + + required: + - bus-type + - link-frequencies + +required: + - compatible + - reg + - clocks + - vdd-supply + - vddio-supply + - vaa-supply + - port + +additionalProperties: false + +examples: + - | + #include + #include + + i2c0 { + #address-cells = <1>; + #size-cells = <0>; + + sensor@48 { + compatible = "onnn,mt9m114"; + reg = <0x48>; + + clocks = <&clk24m 0>; + + reset-gpios = <&gpio5 21 GPIO_ACTIVE_LOW>; + + vddio-supply = <®_cam_1v8>; + vdd-supply = <®_cam_1v8>; + vaa-supply = <®_2p8v>; + + port { + endpoint { + bus-type = ; + link-frequencies = /bits/ 64 <384000000>; + remote-endpoint = <&mipi_csi_in>; + }; + }; + }; + }; +... diff --git a/Documentation/devicetree/bindings/media/i2c/ovti,ov02a10.yaml b/Documentation/devicetree/bindings/media/i2c/ovti,ov02a10.yaml index 763cebe03d..67c1c29132 100644 --- a/Documentation/devicetree/bindings/media/i2c/ovti,ov02a10.yaml +++ b/Documentation/devicetree/bindings/media/i2c/ovti,ov02a10.yaml @@ -68,12 +68,6 @@ properties: marked GPIO_ACTIVE_LOW. maxItems: 1 - rotation: - enum: - - 0 # Sensor Mounted Upright - - 180 # Sensor Mounted Upside Down - default: 0 - port: $ref: /schemas/graph.yaml#/$defs/port-base additionalProperties: false @@ -114,7 +108,7 @@ required: - reset-gpios - port -additionalProperties: false +unevaluatedProperties: false examples: - | diff --git a/Documentation/devicetree/bindings/media/i2c/ovti,ov4689.yaml b/Documentation/devicetree/bindings/media/i2c/ovti,ov4689.yaml index 50579c947f..d96199031b 100644 --- a/Documentation/devicetree/bindings/media/i2c/ovti,ov4689.yaml +++ b/Documentation/devicetree/bindings/media/i2c/ovti,ov4689.yaml @@ -52,10 +52,6 @@ properties: description: GPIO connected to the reset pin (active low) - orientation: true - - rotation: true - port: $ref: /schemas/graph.yaml#/$defs/port-base additionalProperties: false @@ -95,7 +91,7 @@ required: - dvdd-supply - port -additionalProperties: false +unevaluatedProperties: false examples: - | diff --git a/Documentation/devicetree/bindings/media/i2c/ovti,ov5640.yaml b/Documentation/devicetree/bindings/media/i2c/ovti,ov5640.yaml index a621032f9b..2c5e693566 100644 --- a/Documentation/devicetree/bindings/media/i2c/ovti,ov5640.yaml +++ b/Documentation/devicetree/bindings/media/i2c/ovti,ov5640.yaml @@ -44,11 +44,6 @@ properties: description: > Reference to the GPIO connected to the reset pin, if any. - rotation: - enum: - - 0 - - 180 - port: description: Digital Output Port $ref: /schemas/graph.yaml#/$defs/port-base @@ -85,7 +80,7 @@ required: - DOVDD-supply - port -additionalProperties: false +unevaluatedProperties: false examples: - | diff --git a/Documentation/devicetree/bindings/media/i2c/ovti,ov5642.yaml b/Documentation/devicetree/bindings/media/i2c/ovti,ov5642.yaml new file mode 100644 index 0000000000..01f8b2b3fd --- /dev/null +++ b/Documentation/devicetree/bindings/media/i2c/ovti,ov5642.yaml @@ -0,0 +1,141 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/media/i2c/ovti,ov5642.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: OmniVision OV5642 Image Sensor + +maintainers: + - Fabio Estevam + +allOf: + - $ref: /schemas/media/video-interface-devices.yaml# + +properties: + compatible: + const: ovti,ov5642 + + reg: + maxItems: 1 + + clocks: + description: XCLK Input Clock + + AVDD-supply: + description: Analog voltage supply, 2.8V. + + DVDD-supply: + description: Digital core voltage supply, 1.5V. + + DOVDD-supply: + description: Digital I/O voltage supply, 1.8V. + + powerdown-gpios: + maxItems: 1 + description: Reference to the GPIO connected to the powerdown pin, if any. + + reset-gpios: + maxItems: 1 + description: Reference to the GPIO connected to the reset pin, if any. + + port: + $ref: /schemas/graph.yaml#/$defs/port-base + description: | + Video output port. + + properties: + endpoint: + $ref: /schemas/media/video-interfaces.yaml# + unevaluatedProperties: false + + properties: + bus-type: + enum: [5, 6] + + bus-width: + enum: [8, 10] + default: 10 + + data-shift: + enum: [0, 2] + default: 0 + + hsync-active: + enum: [0, 1] + default: 1 + + vsync-active: + enum: [0, 1] + default: 1 + + pclk-sample: + enum: [0, 1] + default: 1 + + allOf: + - if: + properties: + bus-type: + const: 6 + then: + properties: + hsync-active: false + vsync-active: false + + - if: + properties: + bus-width: + const: 10 + then: + properties: + data-shift: + const: 0 + + required: + - bus-type + + additionalProperties: false + +required: + - compatible + - reg + - clocks + - port + +additionalProperties: false + +examples: + - | + #include + #include + + i2c { + #address-cells = <1>; + #size-cells = <0>; + + camera@3c { + compatible = "ovti,ov5642"; + reg = <0x3c>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_ov5642>; + clocks = <&clk_ext_camera>; + DOVDD-supply = <&vgen4_reg>; + AVDD-supply = <&vgen3_reg>; + DVDD-supply = <&vgen2_reg>; + powerdown-gpios = <&gpio1 19 GPIO_ACTIVE_HIGH>; + reset-gpios = <&gpio1 20 GPIO_ACTIVE_LOW>; + + port { + ov5642_to_parallel: endpoint { + bus-type = ; + remote-endpoint = <¶llel_from_ov5642>; + bus-width = <8>; + data-shift = <2>; /* lines 9:2 are used */ + hsync-active = <0>; + vsync-active = <0>; + pclk-sample = <1>; + }; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/media/i2c/ovti,ov5693.yaml b/Documentation/devicetree/bindings/media/i2c/ovti,ov5693.yaml index 6829a4aadd..3368b3bd8e 100644 --- a/Documentation/devicetree/bindings/media/i2c/ovti,ov5693.yaml +++ b/Documentation/devicetree/bindings/media/i2c/ovti,ov5693.yaml @@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Omnivision OV5693/OV5695 CMOS Sensors maintainers: - - Tommaso Merciai + - Tommaso Merciai description: | The Omnivision OV5693/OV5695 are high performance, 1/4-inch, 5 megapixel, CMOS diff --git a/Documentation/devicetree/bindings/media/i2c/sony,imx214.yaml b/Documentation/devicetree/bindings/media/i2c/sony,imx214.yaml index e2470dd592..60903da84e 100644 --- a/Documentation/devicetree/bindings/media/i2c/sony,imx214.yaml +++ b/Documentation/devicetree/bindings/media/i2c/sony,imx214.yaml @@ -91,7 +91,7 @@ required: - vddd-supply - port -additionalProperties: false +unevaluatedProperties: false examples: - | diff --git a/Documentation/devicetree/bindings/media/i2c/sony,imx415.yaml b/Documentation/devicetree/bindings/media/i2c/sony,imx415.yaml index 642f9b15d3..9a00dab2e8 100644 --- a/Documentation/devicetree/bindings/media/i2c/sony,imx415.yaml +++ b/Documentation/devicetree/bindings/media/i2c/sony,imx415.yaml @@ -44,14 +44,6 @@ properties: description: Sensor reset (XCLR) GPIO maxItems: 1 - flash-leds: true - - lens-focus: true - - orientation: true - - rotation: true - port: $ref: /schemas/graph.yaml#/$defs/port-base unevaluatedProperties: false @@ -89,7 +81,7 @@ required: - ovdd-supply - port -additionalProperties: false +unevaluatedProperties: false examples: - | diff --git a/Documentation/devicetree/bindings/media/i2c/ti,ds90ub960.yaml b/Documentation/devicetree/bindings/media/i2c/ti,ds90ub960.yaml index 289737721c..0b71e6f911 100644 --- a/Documentation/devicetree/bindings/media/i2c/ti,ds90ub960.yaml +++ b/Documentation/devicetree/bindings/media/i2c/ti,ds90ub960.yaml @@ -69,6 +69,7 @@ properties: maxItems: 1 i2c-alias: + $ref: /schemas/types.yaml#/definitions/uint32 description: The I2C address used for the serializer. Transactions to this address on the I2C bus where the deserializer resides are diff --git a/Documentation/devicetree/bindings/media/nokia,n900-ir b/Documentation/devicetree/bindings/media/nokia,n900-ir deleted file mode 100644 index 13a18ce37d..0000000000 --- a/Documentation/devicetree/bindings/media/nokia,n900-ir +++ /dev/null @@ -1,20 +0,0 @@ -Device-Tree bindings for LIRC TX driver for Nokia N900(RX51) - -Required properties: - - compatible: should be "nokia,n900-ir". - - pwms: specifies PWM used for IR signal transmission. - -Example node: - - pwm9: dmtimer-pwm@9 { - compatible = "ti,omap-dmtimer-pwm"; - ti,timers = <&timer9>; - ti,clock-source = <0x00>; /* timer_sys_ck */ - #pwm-cells = <3>; - }; - - ir: n900-ir { - compatible = "nokia,n900-ir"; - - pwms = <&pwm9 0 26316 0>; /* 38000 Hz */ - }; diff --git a/Documentation/devicetree/bindings/media/nuvoton,npcm-ece.yaml b/Documentation/devicetree/bindings/media/nuvoton,npcm-ece.yaml new file mode 100644 index 0000000000..b47468e545 --- /dev/null +++ b/Documentation/devicetree/bindings/media/nuvoton,npcm-ece.yaml @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/media/nuvoton,npcm-ece.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Nuvoton NPCM Encoding Compression Engine + +maintainers: + - Joseph Liu + - Marvin Lin + +description: | + Video Encoding Compression Engine (ECE) present on Nuvoton NPCM SoCs. + +properties: + compatible: + enum: + - nuvoton,npcm750-ece + - nuvoton,npcm845-ece + + reg: + maxItems: 1 + + resets: + maxItems: 1 + +required: + - compatible + - reg + - resets + +additionalProperties: false + +examples: + - | + #include + + ece: video-codec@f0820000 { + compatible = "nuvoton,npcm750-ece"; + reg = <0xf0820000 0x2000>; + resets = <&rstc NPCM7XX_RESET_IPSRST2 NPCM7XX_RESET_ECE>; + }; diff --git a/Documentation/devicetree/bindings/media/nuvoton,npcm-vcd.yaml b/Documentation/devicetree/bindings/media/nuvoton,npcm-vcd.yaml new file mode 100644 index 0000000000..c885f559d2 --- /dev/null +++ b/Documentation/devicetree/bindings/media/nuvoton,npcm-vcd.yaml @@ -0,0 +1,72 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/media/nuvoton,npcm-vcd.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Nuvoton NPCM Video Capture/Differentiation Engine + +maintainers: + - Joseph Liu + - Marvin Lin + +description: | + Video Capture/Differentiation Engine (VCD) present on Nuvoton NPCM SoCs. + +properties: + compatible: + enum: + - nuvoton,npcm750-vcd + - nuvoton,npcm845-vcd + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + resets: + maxItems: 1 + + nuvoton,sysgcr: + $ref: /schemas/types.yaml#/definitions/phandle + description: phandle to access GCR (Global Control Register) registers. + + nuvoton,sysgfxi: + $ref: /schemas/types.yaml#/definitions/phandle + description: phandle to access GFXI (Graphics Core Information) registers. + + nuvoton,ece: + $ref: /schemas/types.yaml#/definitions/phandle + description: phandle to access ECE (Encoding Compression Engine) registers. + + memory-region: + maxItems: 1 + description: + CMA pool to use for buffers allocation instead of the default CMA pool. + +required: + - compatible + - reg + - interrupts + - resets + - nuvoton,sysgcr + - nuvoton,sysgfxi + - nuvoton,ece + +additionalProperties: false + +examples: + - | + #include + #include + + vcd: vcd@f0810000 { + compatible = "nuvoton,npcm750-vcd"; + reg = <0xf0810000 0x10000>; + interrupts = ; + resets = <&rstc NPCM7XX_RESET_IPSRST2 NPCM7XX_RESET_VCD>; + nuvoton,sysgcr = <&gcr>; + nuvoton,sysgfxi = <&gfxi>; + nuvoton,ece = <&ece>; + }; diff --git a/Documentation/devicetree/bindings/media/qcom,sdm845-venus-v2.yaml b/Documentation/devicetree/bindings/media/qcom,sdm845-venus-v2.yaml index d5f80976f4..6228fd2b32 100644 --- a/Documentation/devicetree/bindings/media/qcom,sdm845-venus-v2.yaml +++ b/Documentation/devicetree/bindings/media/qcom,sdm845-venus-v2.yaml @@ -48,6 +48,14 @@ properties: iommus: maxItems: 2 + interconnects: + maxItems: 2 + + interconnect-names: + items: + - const: video-mem + - const: cpu-cfg + operating-points-v2: true opp-table: type: object diff --git a/Documentation/devicetree/bindings/media/rockchip-vpu.yaml b/Documentation/devicetree/bindings/media/rockchip-vpu.yaml index 772ec3283b..c57e1f4888 100644 --- a/Documentation/devicetree/bindings/media/rockchip-vpu.yaml +++ b/Documentation/devicetree/bindings/media/rockchip-vpu.yaml @@ -68,6 +68,13 @@ properties: iommus: maxItems: 1 + resets: + items: + - description: AXI reset line + - description: AXI bus interface unit reset line + - description: APB reset line + - description: APB bus interface unit reset line + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/media/samsung,exynos4212-fimc-is.yaml b/Documentation/devicetree/bindings/media/samsung,exynos4212-fimc-is.yaml index 3691cd4962..3a5ff3f470 100644 --- a/Documentation/devicetree/bindings/media/samsung,exynos4212-fimc-is.yaml +++ b/Documentation/devicetree/bindings/media/samsung,exynos4212-fimc-is.yaml @@ -75,13 +75,20 @@ properties: power-domains: maxItems: 1 + samsung,pmu-syscon: + $ref: /schemas/types.yaml#/definitions/phandle + description: + Power Management Unit (PMU) system controller interface, used to + power/start the ISP. + patternProperties: "^pmu@[0-9a-f]+$": type: object additionalProperties: false + deprecated: true description: Node representing the SoC's Power Management Unit (duplicated with the - correct PMU node in the SoC). + correct PMU node in the SoC). Deprecated, use samsung,pmu-syscon. properties: reg: @@ -131,6 +138,7 @@ required: - clock-names - interrupts - ranges + - samsung,pmu-syscon - '#size-cells' additionalProperties: false @@ -179,15 +187,12 @@ examples: <&sysmmu_fimc_fd>, <&sysmmu_fimc_mcuctl>; iommu-names = "isp", "drc", "fd", "mcuctl"; power-domains = <&pd_isp>; + samsung,pmu-syscon = <&pmu_system_controller>; #address-cells = <1>; #size-cells = <1>; ranges; - pmu@10020000 { - reg = <0x10020000 0x3000>; - }; - i2c-isp@12140000 { compatible = "samsung,exynos4212-i2c-isp"; reg = <0x12140000 0x100>; diff --git a/Documentation/devicetree/bindings/media/samsung,fimc.yaml b/Documentation/devicetree/bindings/media/samsung,fimc.yaml index b3486c38a0..7808d61f1f 100644 --- a/Documentation/devicetree/bindings/media/samsung,fimc.yaml +++ b/Documentation/devicetree/bindings/media/samsung,fimc.yaml @@ -118,7 +118,7 @@ examples: #clock-cells = <1>; #address-cells = <1>; #size-cells = <1>; - ranges = <0x0 0x0 0x18000000>; + ranges = <0x0 0x0 0xba1000>; clocks = <&clock CLK_SCLK_CAM0>, <&clock CLK_SCLK_CAM1>, <&clock CLK_PIXELASYNCM0>, <&clock CLK_PIXELASYNCM1>; @@ -133,9 +133,9 @@ examples: pinctrl-0 = <&cam_port_a_clk_active &cam_port_b_clk_active>; pinctrl-names = "default"; - fimc@11800000 { + fimc@0 { compatible = "samsung,exynos4212-fimc"; - reg = <0x11800000 0x1000>; + reg = <0x00000000 0x1000>; interrupts = ; clocks = <&clock CLK_FIMC0>, <&clock CLK_SCLK_FIMC0>; @@ -152,9 +152,9 @@ examples: /* ... FIMC 1-3 */ - csis@11880000 { + csis@80000 { compatible = "samsung,exynos4210-csis"; - reg = <0x11880000 0x4000>; + reg = <0x00080000 0x4000>; interrupts = ; clocks = <&clock CLK_CSIS0>, <&clock CLK_SCLK_CSIS0>; @@ -187,9 +187,9 @@ examples: /* ... CSIS 1 */ - fimc-lite@12390000 { + fimc-lite@b90000 { compatible = "samsung,exynos4212-fimc-lite"; - reg = <0x12390000 0x1000>; + reg = <0xb90000 0x1000>; interrupts = ; power-domains = <&pd_isp>; clocks = <&isp_clock CLK_ISP_FIMC_LITE0>; @@ -199,9 +199,9 @@ examples: /* ... FIMC-LITE 1 */ - fimc-is@12000000 { + fimc-is@800000 { compatible = "samsung,exynos4212-fimc-is"; - reg = <0x12000000 0x260000>; + reg = <0x00800000 0x260000>; interrupts = , ; clocks = <&isp_clock CLK_ISP_FIMC_LITE0>, @@ -237,18 +237,15 @@ examples: <&sysmmu_fimc_fd>, <&sysmmu_fimc_mcuctl>; iommu-names = "isp", "drc", "fd", "mcuctl"; power-domains = <&pd_isp>; + samsung,pmu-syscon = <&pmu_system_controller>; #address-cells = <1>; #size-cells = <1>; ranges; - pmu@10020000 { - reg = <0x10020000 0x3000>; - }; - - i2c-isp@12140000 { + i2c-isp@940000 { compatible = "samsung,exynos4212-i2c-isp"; - reg = <0x12140000 0x100>; + reg = <0x00940000 0x100>; clocks = <&isp_clock CLK_ISP_I2C1_ISP>; clock-names = "i2c_isp"; pinctrl-0 = <&fimc_is_i2c1>; diff --git a/Documentation/devicetree/bindings/media/ti,j721e-csi2rx-shim.yaml b/Documentation/devicetree/bindings/media/ti,j721e-csi2rx-shim.yaml new file mode 100644 index 0000000000..f762fdc05e --- /dev/null +++ b/Documentation/devicetree/bindings/media/ti,j721e-csi2rx-shim.yaml @@ -0,0 +1,100 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/media/ti,j721e-csi2rx-shim.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: TI J721E CSI2RX Shim + +description: | + The TI J721E CSI2RX Shim is a wrapper around Cadence CSI2RX bridge that + enables sending captured frames to memory over PSI-L DMA. In the J721E + Technical Reference Manual (SPRUIL1B) it is referred to as "SHIM" under the + CSI_RX_IF section. + +maintainers: + - Jai Luthra + +properties: + compatible: + const: ti,j721e-csi2rx-shim + + dmas: + maxItems: 1 + + dma-names: + items: + - const: rx0 + + reg: + maxItems: 1 + + power-domains: + maxItems: 1 + + ranges: true + + "#address-cells": true + + "#size-cells": true + +patternProperties: + "^csi-bridge@": + type: object + description: CSI2 bridge node. + $ref: cdns,csi2rx.yaml# + +required: + - compatible + - reg + - dmas + - dma-names + - power-domains + - ranges + - "#address-cells" + - "#size-cells" + +additionalProperties: false + +examples: + - | + #include + + ti_csi2rx0: ticsi2rx@4500000 { + compatible = "ti,j721e-csi2rx-shim"; + dmas = <&main_udmap 0x4940>; + dma-names = "rx0"; + reg = <0x4500000 0x1000>; + power-domains = <&k3_pds 26 TI_SCI_PD_EXCLUSIVE>; + #address-cells = <1>; + #size-cells = <1>; + ranges; + + cdns_csi2rx: csi-bridge@4504000 { + compatible = "ti,j721e-csi2rx", "cdns,csi2rx"; + reg = <0x4504000 0x1000>; + clocks = <&k3_clks 26 2>, <&k3_clks 26 0>, <&k3_clks 26 2>, + <&k3_clks 26 2>, <&k3_clks 26 3>, <&k3_clks 26 3>; + clock-names = "sys_clk", "p_clk", "pixel_if0_clk", + "pixel_if1_clk", "pixel_if2_clk", "pixel_if3_clk"; + phys = <&dphy0>; + phy-names = "dphy"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + csi2_0: port@0 { + + reg = <0>; + + csi2rx0_in_sensor: endpoint { + remote-endpoint = <&csi2_cam0>; + bus-type = <4>; /* CSI2 DPHY. */ + clock-lanes = <0>; + data-lanes = <1 2>; + }; + }; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/media/video-interfaces.yaml b/Documentation/devicetree/bindings/media/video-interfaces.yaml index a211d49dc2..26e3e7d7c6 100644 --- a/Documentation/devicetree/bindings/media/video-interfaces.yaml +++ b/Documentation/devicetree/bindings/media/video-interfaces.yaml @@ -160,6 +160,7 @@ properties: $ref: /schemas/types.yaml#/definitions/uint32-array minItems: 1 maxItems: 8 + uniqueItems: true items: # Assume up to 9 physical lane indices maximum: 8 diff --git a/Documentation/devicetree/bindings/memory-controllers/ingenic,nemc.yaml b/Documentation/devicetree/bindings/memory-controllers/ingenic,nemc.yaml index b40cec0eb6..ee74a362f4 100644 --- a/Documentation/devicetree/bindings/memory-controllers/ingenic,nemc.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/ingenic,nemc.yaml @@ -40,6 +40,7 @@ patternProperties: ".*@[0-9]+$": type: object $ref: mc-peripheral-props.yaml# + additionalProperties: true required: - compatible diff --git a/Documentation/devicetree/bindings/memory-controllers/renesas,rpc-if.yaml b/Documentation/devicetree/bindings/memory-controllers/renesas,rpc-if.yaml index 56e62cd0b3..25f3bb9890 100644 --- a/Documentation/devicetree/bindings/memory-controllers/renesas,rpc-if.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/renesas,rpc-if.yaml @@ -80,6 +80,8 @@ properties: patternProperties: "flash@[0-9a-f]+$": type: object + additionalProperties: true + properties: compatible: contains: diff --git a/Documentation/devicetree/bindings/memory-controllers/rockchip,rk3399-dmc.yaml b/Documentation/devicetree/bindings/memory-controllers/rockchip,rk3399-dmc.yaml index 4e4af3cfc0..1f58ee99be 100644 --- a/Documentation/devicetree/bindings/memory-controllers/rockchip,rk3399-dmc.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/rockchip,rk3399-dmc.yaml @@ -18,7 +18,7 @@ properties: $ref: /schemas/types.yaml#/definitions/phandle description: Node to get DDR loading. Refer to - Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt. + Documentation/devicetree/bindings/devfreq/event/rockchip,dfi.yaml. clocks: maxItems: 1 diff --git a/Documentation/devicetree/bindings/memory-controllers/ti,gpmc.yaml b/Documentation/devicetree/bindings/memory-controllers/ti,gpmc.yaml index b049837ee6..c7a8a041da 100644 --- a/Documentation/devicetree/bindings/memory-controllers/ti,gpmc.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/ti,gpmc.yaml @@ -130,7 +130,7 @@ patternProperties: bus. The device can be a NAND chip, SRAM device, NOR device or an ASIC. $ref: ti,gpmc-child.yaml - + additionalProperties: true required: - compatible diff --git a/Documentation/devicetree/bindings/memory-controllers/xlnx,versal-ddrmc-edac.yaml b/Documentation/devicetree/bindings/memory-controllers/xlnx,versal-ddrmc-edac.yaml new file mode 100644 index 0000000000..12f8e9f350 --- /dev/null +++ b/Documentation/devicetree/bindings/memory-controllers/xlnx,versal-ddrmc-edac.yaml @@ -0,0 +1,57 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/memory-controllers/xlnx,versal-ddrmc-edac.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Xilinx Versal DDRMC (Integrated DDR Memory Controller) + +maintainers: + - Shubhrajyoti Datta + - Sai Krishna Potthuri + +description: + The integrated DDR Memory Controllers (DDRMCs) support both DDR4 and LPDDR4/ + 4X memory interfaces. Versal DDR memory controller has an optional ECC support + which correct single bit ECC errors and detect double bit ECC errors. + +properties: + compatible: + const: xlnx,versal-ddrmc + + reg: + items: + - description: DDR Memory Controller registers + - description: NOC registers corresponding to DDR Memory Controller + + reg-names: + items: + - const: base + - const: noc + + interrupts: + maxItems: 1 + +required: + - compatible + - reg + - reg-names + - interrupts + +additionalProperties: false + +examples: + - | + #include + + bus { + #address-cells = <2>; + #size-cells = <2>; + memory-controller@f6150000 { + compatible = "xlnx,versal-ddrmc"; + reg = <0x0 0xf6150000 0x0 0x2000>, <0x0 0xf6070000 0x0 0x20000>; + reg-names = "base", "noc"; + interrupt-parent = <&gic>; + interrupts = ; + }; + }; diff --git a/Documentation/devicetree/bindings/mfd/arm,dev-platforms-syscon.yaml b/Documentation/devicetree/bindings/mfd/arm,dev-platforms-syscon.yaml new file mode 100644 index 0000000000..46b164ae08 --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/arm,dev-platforms-syscon.yaml @@ -0,0 +1,67 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/mfd/arm,dev-platforms-syscon.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Arm Ltd Developer Platforms System Controllers + +maintainers: + - Linus Walleij + +description: + The Arm Ltd Integrator, Realview, and Versatile families of developer + platforms are contain various system controller blocks. Often these blocks + are part of a daughterboard or motherboard module. + +properties: + compatible: + oneOf: + - items: + - enum: + - arm,integrator-ap-syscon + - arm,integrator-cp-syscon + - arm,integrator-sp-syscon + - arm,im-pd1-syscon + - const: syscon + - items: + - enum: + - arm,core-module-integrator + - arm,integrator-ap-syscon + - arm,integrator-cp-syscon + - arm,integrator-sp-syscon + - arm,realview-eb-syscon + - arm,realview-pb1176-syscon + - arm,realview-pb11mp-syscon + - arm,realview-pba8-syscon + - arm,realview-pbx-syscon + - arm,versatile-ib2-syscon + - const: syscon + - const: simple-mfd + - items: + - enum: + - arm,realview-eb11mp-revb-syscon + - arm,realview-eb11mp-revc-syscon + - const: arm,realview-eb-syscon + - const: syscon + - const: simple-mfd + + reg: + maxItems: 1 + + ranges: true + + '#address-cells': + const: 1 + + '#size-cells': + const: 1 + +required: + - compatible + - reg + +additionalProperties: + type: object + +... diff --git a/Documentation/devicetree/bindings/mfd/brcm,bcm63268-gpio-sysctl.yaml b/Documentation/devicetree/bindings/mfd/brcm,bcm63268-gpio-sysctl.yaml index c14def1b2a..9c2a04829d 100644 --- a/Documentation/devicetree/bindings/mfd/brcm,bcm63268-gpio-sysctl.yaml +++ b/Documentation/devicetree/bindings/mfd/brcm,bcm63268-gpio-sysctl.yaml @@ -148,47 +148,47 @@ examples: pinctrl_nand: nand-pins { function = "nand"; - group = "nand_grp"; + pins = "nand_grp"; }; pinctrl_gpio35_alt: gpio35_alt-pins { function = "gpio35_alt"; - pin = "gpio35"; + pins = "gpio35"; }; pinctrl_dectpd: dectpd-pins { function = "dectpd"; - group = "dectpd_grp"; + pins = "dectpd_grp"; }; pinctrl_vdsl_phy_override_0: vdsl_phy_override_0-pins { function = "vdsl_phy_override_0"; - group = "vdsl_phy_override_0_grp"; + pins = "vdsl_phy_override_0_grp"; }; pinctrl_vdsl_phy_override_1: vdsl_phy_override_1-pins { function = "vdsl_phy_override_1"; - group = "vdsl_phy_override_1_grp"; + pins = "vdsl_phy_override_1_grp"; }; pinctrl_vdsl_phy_override_2: vdsl_phy_override_2-pins { function = "vdsl_phy_override_2"; - group = "vdsl_phy_override_2_grp"; + pins = "vdsl_phy_override_2_grp"; }; pinctrl_vdsl_phy_override_3: vdsl_phy_override_3-pins { function = "vdsl_phy_override_3"; - group = "vdsl_phy_override_3_grp"; + pins = "vdsl_phy_override_3_grp"; }; pinctrl_dsl_gpio8: dsl_gpio8-pins { function = "dsl_gpio8"; - group = "dsl_gpio8"; + pins = "dsl_gpio8"; }; pinctrl_dsl_gpio9: dsl_gpio9-pins { function = "dsl_gpio9"; - group = "dsl_gpio9"; + pins = "dsl_gpio9"; }; }; }; diff --git a/Documentation/devicetree/bindings/mfd/brcm,bcm6362-gpio-sysctl.yaml b/Documentation/devicetree/bindings/mfd/brcm,bcm6362-gpio-sysctl.yaml index 4d594739b3..c2941638c8 100644 --- a/Documentation/devicetree/bindings/mfd/brcm,bcm6362-gpio-sysctl.yaml +++ b/Documentation/devicetree/bindings/mfd/brcm,bcm6362-gpio-sysctl.yaml @@ -230,7 +230,7 @@ examples: pinctrl_nand: nand-pins { function = "nand"; - group = "nand_grp"; + pins = "nand_grp"; }; }; }; diff --git a/Documentation/devicetree/bindings/mfd/brcm,bcm6368-gpio-sysctl.yaml b/Documentation/devicetree/bindings/mfd/brcm,bcm6368-gpio-sysctl.yaml index aae83d4328..44e77d77d3 100644 --- a/Documentation/devicetree/bindings/mfd/brcm,bcm6368-gpio-sysctl.yaml +++ b/Documentation/devicetree/bindings/mfd/brcm,bcm6368-gpio-sysctl.yaml @@ -240,7 +240,7 @@ examples: pinctrl_uart1: uart1-pins { function = "uart1"; - group = "uart1_grp"; + pins = "uart1_grp"; }; }; }; diff --git a/Documentation/devicetree/bindings/mfd/max8925.txt b/Documentation/devicetree/bindings/mfd/max8925.txt deleted file mode 100644 index 4f0dc6638e..0000000000 --- a/Documentation/devicetree/bindings/mfd/max8925.txt +++ /dev/null @@ -1,64 +0,0 @@ -* Maxim max8925 Power Management IC - -Required parent device properties: -- compatible : "maxim,max8925" -- reg : the I2C slave address for the max8925 chip -- interrupts : IRQ line for the max8925 chip -- interrupt-controller: describes the max8925 as an interrupt - controller (has its own domain) -- #interrupt-cells : should be 1. - - The cell is the max8925 local IRQ number - -Optional parent device properties: -- maxim,tsc-irq: there are 2 IRQ lines for max8925, one is indicated in - interrupts property, the other is indicated here. - -max8925 consists of a large and varied group of sub-devices: - -Device Supply Names Description ------- ------------ ----------- -max8925-onkey : : On key -max8925-rtc : : RTC -max8925-regulator : : Regulators -max8925-backlight : : Backlight -max8925-touch : : Touchscreen -max8925-power : : Charger - -Example: - - pmic: max8925@3c { - compatible = "maxim,max8925"; - reg = <0x3c>; - interrupts = <1>; - interrupt-parent = <&intcmux4>; - interrupt-controller; - #interrupt-cells = <1>; - maxim,tsc-irq = <0>; - - regulators { - SDV1 { - regulator-min-microvolt = <637500>; - regulator-max-microvolt = <1425000>; - regulator-boot-on; - regulator-always-on; - }; - - LDO1 { - regulator-min-microvolt = <750000>; - regulator-max-microvolt = <3900000>; - regulator-boot-on; - regulator-always-on; - }; - - }; - backlight { - maxim,max8925-dual-string = <0>; - }; - charger { - batt-detect = <0>; - topoff-threshold = <1>; - fast-charge = <7>; - no-temp-support = <0>; - no-insert-detect = <0>; - }; - }; diff --git a/Documentation/devicetree/bindings/mfd/max8998.txt b/Documentation/devicetree/bindings/mfd/max8998.txt deleted file mode 100644 index 4ed52184d0..0000000000 --- a/Documentation/devicetree/bindings/mfd/max8998.txt +++ /dev/null @@ -1,125 +0,0 @@ -* Maxim MAX8998, National/TI LP3974 multi-function device - -The Maxim MAX8998 is a multi-function device which includes voltage/current -regulators, real time clock, battery charging controller and several -other sub-blocks. It is interfaced using an I2C interface. Each sub-block -is addressed by the host system using different i2c slave address. - -PMIC sub-block --------------- - -The PMIC sub-block contains a number of voltage and current regulators, -with controllable parameters and dynamic voltage scaling capability. -In addition, it includes a real time clock and battery charging controller -as well. It is accessible at I2C address 0x66. - -Required properties: -- compatible: Should be one of the following: - - "maxim,max8998" for Maxim MAX8998 - - "national,lp3974" or "ti,lp3974" for National/TI LP3974. -- reg: Specifies the i2c slave address of the pmic block. It should be 0x66. - -Optional properties: -- interrupts: Interrupt specifiers for two interrupt sources. - - First interrupt specifier is for main interrupt. - - Second interrupt specifier is for power-on/-off interrupt. -- max8998,pmic-buck1-dvs-gpios: GPIO specifiers for two host gpios used - for buck 1 dvs. The format of the gpio specifier depends on the gpio - controller. -- max8998,pmic-buck2-dvs-gpio: GPIO specifier for host gpio used - for buck 2 dvs. The format of the gpio specifier depends on the gpio - controller. -- max8998,pmic-buck1-default-dvs-idx: Default voltage setting selected from - the possible 4 options selectable by the dvs gpios. The value of this - property should be 0, 1, 2 or 3. If not specified or out of range, - a default value of 0 is taken. -- max8998,pmic-buck2-default-dvs-idx: Default voltage setting selected from - the possible 2 options selectable by the dvs gpios. The value of this - property should be 0 or 1. If not specified or out of range, a default - value of 0 is taken. -- max8998,pmic-buck-voltage-lock: If present, disallows changing of - preprogrammed buck dvfs voltages. - -Additional properties required if max8998,pmic-buck1-dvs-gpios is defined: -- max8998,pmic-buck1-dvs-voltage: An array of 4 voltage values in microvolts - for buck1 regulator that can be selected using dvs gpio. - -Additional properties required if max8998,pmic-buck2-dvs-gpio is defined: -- max8998,pmic-buck2-dvs-voltage: An array of 2 voltage values in microvolts - for buck2 regulator that can be selected using dvs gpio. - -Regulators: All the regulators of MAX8998 to be instantiated shall be -listed in a child node named 'regulators'. Each regulator is represented -by a child node of the 'regulators' node. - - regulator-name { - /* standard regulator bindings here */ - }; - -Following regulators of the MAX8998 PMIC block are supported. Note that -the 'n' in regulator name, as in LDOn or BUCKn, represents the LDO or BUCK -number as described in MAX8998 datasheet. - - - LDOn - - valid values for n are 2 to 17 - - Example: LDO2, LDO10, LDO17 - - BUCKn - - valid values for n are 1 to 4. - - Example: BUCK1, BUCK2, BUCK3, BUCK4 - - - ENVICHG: Battery Charging Current Monitor Output. This is a fixed - voltage type regulator - - - ESAFEOUT1: (ldo19) - - ESAFEOUT2: (ld020) - - - CHARGER: main battery charger current control - -Standard regulator bindings are used inside regulator subnodes. Check - Documentation/devicetree/bindings/regulator/regulator.txt -for more details. - -Example: - - pmic@66 { - compatible = "maxim,max8998-pmic"; - reg = <0x66>; - interrupt-parent = <&wakeup_eint>; - interrupts = <4 0>, <3 0>; - - /* Buck 1 DVS settings */ - max8998,pmic-buck1-default-dvs-idx = <0>; - max8998,pmic-buck1-dvs-gpios = <&gpx0 0 1 0 0>, /* SET1 */ - <&gpx0 1 1 0 0>; /* SET2 */ - max8998,pmic-buck1-dvs-voltage = <1350000>, <1300000>, - <1000000>, <950000>; - - /* Buck 2 DVS settings */ - max8998,pmic-buck2-default-dvs-idx = <0>; - max8998,pmic-buck2-dvs-gpio = <&gpx0 0 3 0 0>; /* SET3 */ - max8998,pmic-buck2-dvs-voltage = <1350000>, <1300000>; - - /* Regulators to instantiate */ - regulators { - ldo2_reg: LDO2 { - regulator-name = "VDD_ALIVE_1.1V"; - regulator-min-microvolt = <1100000>; - regulator-max-microvolt = <1100000>; - regulator-always-on; - }; - - buck1_reg: BUCK1 { - regulator-name = "VDD_ARM_1.2V"; - regulator-min-microvolt = <950000>; - regulator-max-microvolt = <1350000>; - regulator-always-on; - regulator-boot-on; - }; - - charger_reg: CHARGER { - regulator-name = "CHARGER"; - regulator-min-microamp = <90000>; - regulator-max-microamp = <800000>; - }; - }; - }; diff --git a/Documentation/devicetree/bindings/mfd/maxim,max5970.yaml b/Documentation/devicetree/bindings/mfd/maxim,max5970.yaml index da67742c5a..0da5cae385 100644 --- a/Documentation/devicetree/bindings/mfd/maxim,max5970.yaml +++ b/Documentation/devicetree/bindings/mfd/maxim,max5970.yaml @@ -45,8 +45,13 @@ properties: patternProperties: "^led@[0-3]$": $ref: /schemas/leds/common.yaml# + unevaluatedProperties: false type: object + properties: + reg: + maximum: 3 + additionalProperties: false vss1-supply: diff --git a/Documentation/devicetree/bindings/mfd/maxim,max8925.yaml b/Documentation/devicetree/bindings/mfd/maxim,max8925.yaml new file mode 100644 index 0000000000..86dd810851 --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/maxim,max8925.yaml @@ -0,0 +1,145 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/mfd/maxim,max8925.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: MAX8925 PMIC from Maxim Integrated. + +maintainers: + - Lee Jones + +properties: + compatible: + const: maxim,max8925 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + interrupt-controller: true + + "#interrupt-cells": + const: 1 + description: + The cell is the IRQ number + + maxim,tsc-irq: + description: second interrupt from max8925 + $ref: /schemas/types.yaml#/definitions/uint32 + + regulators: + type: object + + patternProperties: + "^SDV[1-3]$|^LDO[1-9]$|^LDO1[0-9]$|^LDO20$": + description: regulator configuration for SDV1-3 and LDO1-20 + $ref: /schemas/regulator/regulator.yaml + unevaluatedProperties: false + + additionalProperties: false + + backlight: + type: object + properties: + maxim,max8925-dual-string: + description: set to 1 to support dual string + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1] + default: 0 + + additionalProperties: false + + charger: + type: object + properties: + batt-detect: + description: set to 1 if battery detection via ID pin is supported + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1] + default: 0 + + topoff-threshold: + description: charging current in topoff mode, configures bits 5-6 in CHG_CNTL1 + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 0 + maximum: 3 + default: 0 + + fast-charge: + description: set charging current in fast mode, configures bits 0-3 in CHG_CNTL1 + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 0 + maximum: 7 + default: 0 + + no-temp-support: + description: set to 1 if temperature sensing is not supported + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1] + default: 0 + + no-insert-detect: + description: set to 1 if AC detection is not supported + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1] + default: 0 + + additionalProperties: false + +required: + - compatible + - reg + - interrupts + - interrupt-controller + - "#interrupt-cells" + - regulators + +additionalProperties: false + +examples: + - | + i2c { + #address-cells = <1>; + #size-cells = <0>; + + pmic@3c { + compatible = "maxim,max8925"; + reg = <0x3c>; + interrupts = <1>; + interrupt-parent = <&intcmux4>; + interrupt-controller; + #interrupt-cells = <1>; + maxim,tsc-irq = <0>; + + regulators { + SDV1 { + regulator-min-microvolt = <637500>; + regulator-max-microvolt = <1425000>; + regulator-boot-on; + regulator-always-on; + }; + + LDO1 { + regulator-min-microvolt = <750000>; + regulator-max-microvolt = <3900000>; + regulator-boot-on; + regulator-always-on; + }; + }; + + backlight { + maxim,max8925-dual-string = <0>; + }; + + charger { + batt-detect = <0>; + topoff-threshold = <1>; + fast-charge = <7>; + no-temp-support = <0>; + no-insert-detect = <0>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/mfd/maxim,max8998.yaml b/Documentation/devicetree/bindings/mfd/maxim,max8998.yaml new file mode 100644 index 0000000000..f3c3f64fd0 --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/maxim,max8998.yaml @@ -0,0 +1,324 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/mfd/maxim,max8998.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Maxim MAX8998, National/TI LP3974 Power Management IC + +maintainers: + - Krzysztof Kozlowski + +description: + The Maxim MAX8998 is a Power Management IC which includes voltage/current + regulators, real time clock, battery charging controller and several other + sub-blocks. It is interfaced using an I2C interface. Each sub-block is + addressed by the host system using different i2c slave address. + +properties: + compatible: + enum: + - maxim,max8998 + - national,lp3974 + - ti,lp3974 + + reg: + maxItems: 1 + + interrupts: + minItems: 1 + items: + - description: Main interrupt + - description: Power-on/-off interrupt + + max8998,pmic-buck1-dvs-gpios: + maxItems: 2 + description: + Two host gpios used for buck1 DVS. + + max8998,pmic-buck2-dvs-gpio: + maxItems: 1 + description: + Host gpio used for buck2 DVS. + + max8998,pmic-buck1-default-dvs-idx: + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1, 2, 3] + default: 0 + description: + Default voltage setting selected from the possible 4 options selectable + by the DVS gpios. + + max8998,pmic-buck2-default-dvs-idx: + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1] + default: 0 + description: + Default voltage setting selected from the possible 2 options selectable + by the DVS GPIOs. + + max8998,pmic-buck-voltage-lock: + type: boolean + description: + If present, disallows changing of preprogrammed buck DVS voltages. + + max8998,pmic-buck1-dvs-voltage: + $ref: /schemas/types.yaml#/definitions/uint32-array + maxItems: 4 + description: + Four voltage values in microvolts for buck1 regulator that can be + selected using DVS GPIO. + + max8998,pmic-buck2-dvs-voltage: + $ref: /schemas/types.yaml#/definitions/uint32-array + maxItems: 2 + description: + Two voltage values in microvolts for buck2 regulator that can be + selected using DVS GPIO. + + regulators: + type: object + additionalProperties: false + + properties: + CHARGER: + type: object + $ref: /schemas/regulator/regulator.yaml# + unevaluatedProperties: false + description: + CHARGER is main battery charger current control, wrongly represented + as regulator. + + properties: + regulator-min-microamp: + minimum: 90000 + maximum: 800000 + + regulator-max-microamp: + minimum: 90000 + maximum: 800000 + + regulator-min-microvolt: false + regulator-max-microvolt: false + + required: + - regulator-name + + patternProperties: + "^(LDO([2-9]|1[0-7])|BUCK[1-4])$": + type: object + $ref: /schemas/regulator/regulator.yaml# + unevaluatedProperties: false + + required: + - regulator-name + + "^(EN32KHz-AP|EN32KHz-CP|ENVICHG|ESAFEOUT[12])$": + type: object + $ref: /schemas/regulator/regulator.yaml# + unevaluatedProperties: false + description: | + EN32KHz-AP and EN32KHz-CP are 32768 Hz clocks, wrongly represented as + regulators. + ENVICHG is a Battery Charging Current Monitor Output. + + properties: + regulator-min-microvolt: false + regulator-max-microvolt: false + + required: + - regulator-name + +dependencies: + max8998,pmic-buck1-dvs-gpios: [ "max8998,pmic-buck1-dvs-voltage" ] + max8998,pmic-buck2-dvs-gpio: [ "max8998,pmic-buck2-dvs-voltage" ] + +required: + - compatible + - reg + - regulators + +additionalProperties: false + +examples: + - | + #include + #include + + i2c { + #address-cells = <1>; + #size-cells = <0>; + + pmic@66 { + compatible = "national,lp3974"; + reg = <0x66>; + interrupts-extended = <&gpx0 7 IRQ_TYPE_LEVEL_LOW>, + <&gpx2 7 IRQ_TYPE_LEVEL_LOW>; + pinctrl-names = "default"; + pinctrl-0 = <&lp3974_irq>; + + max8998,pmic-buck1-default-dvs-idx = <0>; + max8998,pmic-buck1-dvs-gpios = <&gpx0 5 GPIO_ACTIVE_HIGH>, + <&gpx0 6 GPIO_ACTIVE_HIGH>; + max8998,pmic-buck1-dvs-voltage = <1100000>, <1000000>, + <1100000>, <1000000>; + max8998,pmic-buck2-default-dvs-idx = <0>; + max8998,pmic-buck2-dvs-gpio = <&gpe2 0 GPIO_ACTIVE_HIGH>; + max8998,pmic-buck2-dvs-voltage = <1200000>, <1100000>; + + regulators { + LDO2 { + regulator-name = "VALIVE_1.2V"; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <1200000>; + regulator-always-on; + }; + + LDO3 { + regulator-name = "VUSB+MIPI_1.1V"; + regulator-min-microvolt = <1100000>; + regulator-max-microvolt = <1100000>; + regulator-always-on; + }; + + LDO4 { + regulator-name = "VADC_3.3V"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + }; + + LDO5 { + regulator-name = "VTF_2.8V"; + regulator-min-microvolt = <2800000>; + regulator-max-microvolt = <2800000>; + }; + + LDO6 { + regulator-name = "LDO6"; + regulator-min-microvolt = <2000000>; + regulator-max-microvolt = <2000000>; + }; + + LDO7 { + regulator-name = "VLCD+VMIPI_1.8V"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + }; + + LDO8 { + regulator-name = "VUSB+VDAC_3.3V"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + }; + + LDO9 { + regulator-name = "VCC_2.8V"; + regulator-min-microvolt = <2800000>; + regulator-max-microvolt = <2800000>; + regulator-always-on; + }; + + LDO10 { + regulator-name = "VPLL_1.1V"; + regulator-min-microvolt = <1100000>; + regulator-max-microvolt = <1100000>; + regulator-boot-on; + regulator-always-on; + }; + + LDO11 { + regulator-name = "CAM_AF_3.3V"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + }; + + LDO12 { + regulator-name = "PS_2.8V"; + regulator-min-microvolt = <2800000>; + regulator-max-microvolt = <2800000>; + }; + + LDO13 { + regulator-name = "VHIC_1.2V"; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <1200000>; + }; + + LDO14 { + regulator-name = "CAM_I_HOST_1.8V"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + }; + + LDO15 { + regulator-name = "CAM_S_DIG+FM33_CORE_1.2V"; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <1200000>; + }; + + LDO16 { + regulator-name = "CAM_S_ANA_2.8V"; + regulator-min-microvolt = <2800000>; + regulator-max-microvolt = <2800000>; + }; + + LDO17 { + regulator-name = "VCC_3.0V_LCD"; + regulator-min-microvolt = <3000000>; + regulator-max-microvolt = <3000000>; + }; + + BUCK1 { + regulator-name = "VINT_1.1V"; + regulator-min-microvolt = <750000>; + regulator-max-microvolt = <1500000>; + regulator-boot-on; + regulator-always-on; + }; + + BUCK2 { + regulator-name = "VG3D_1.1V"; + regulator-min-microvolt = <750000>; + regulator-max-microvolt = <1500000>; + regulator-boot-on; + }; + + BUCK3 { + regulator-name = "VCC_1.8V"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + regulator-always-on; + }; + + BUCK4 { + regulator-name = "VMEM_1.2V"; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <1200000>; + regulator-always-on; + }; + + EN32KHz-AP { + regulator-name = "32KHz AP"; + regulator-always-on; + }; + + EN32KHz-CP { + regulator-name = "32KHz CP"; + }; + + ENVICHG { + regulator-name = "VICHG"; + }; + + ESAFEOUT1 { + regulator-name = "SAFEOUT1"; + }; + + ESAFEOUT2 { + regulator-name = "SAFEOUT2"; + regulator-boot-on; + }; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/mfd/mediatek,mt6357.yaml b/Documentation/devicetree/bindings/mfd/mediatek,mt6357.yaml index fc2a53148e..37423c2e0f 100644 --- a/Documentation/devicetree/bindings/mfd/mediatek,mt6357.yaml +++ b/Documentation/devicetree/bindings/mfd/mediatek,mt6357.yaml @@ -40,6 +40,7 @@ properties: regulators: type: object $ref: /schemas/regulator/mediatek,mt6357-regulator.yaml + unevaluatedProperties: false description: List of MT6357 BUCKs and LDOs regulators. @@ -59,6 +60,7 @@ properties: keys: type: object $ref: /schemas/input/mediatek,pmic-keys.yaml + unevaluatedProperties: false description: MT6357 power and home keys. diff --git a/Documentation/devicetree/bindings/mfd/qcom,spmi-pmic.yaml b/Documentation/devicetree/bindings/mfd/qcom,spmi-pmic.yaml index 9f03436b1c..9fa5686039 100644 --- a/Documentation/devicetree/bindings/mfd/qcom,spmi-pmic.yaml +++ b/Documentation/devicetree/bindings/mfd/qcom,spmi-pmic.yaml @@ -58,6 +58,7 @@ properties: - qcom,pm8350 - qcom,pm8350b - qcom,pm8350c + - qcom,pm8450 - qcom,pm8550 - qcom,pm8550b - qcom,pm8550ve @@ -168,6 +169,10 @@ patternProperties: type: object $ref: /schemas/thermal/qcom,spmi-temp-alarm.yaml# + "^typec@[0-9a-f]+$": + type: object + $ref: /schemas/usb/qcom,pmic-typec.yaml# + "^usb-detect@[0-9a-f]+$": type: object $ref: /schemas/extcon/qcom,pm8941-misc.yaml# @@ -234,13 +239,13 @@ examples: interrupt-controller; #interrupt-cells = <4>; - pmi8998_lsid0: pmic@2 { + pmic@2 { compatible = "qcom,pmi8998", "qcom,spmi-pmic"; reg = <0x2 SPMI_USID>; #address-cells = <1>; #size-cells = <0>; - pmi8998_gpio: gpio@c000 { + gpio@c000 { compatible = "qcom,pmi8998-gpio", "qcom,spmi-gpio"; reg = <0xc000>; gpio-controller; @@ -325,7 +330,7 @@ examples: }; }; - pm6150_gpio: gpio@c000 { + gpio@c000 { compatible = "qcom,pm6150-gpio", "qcom,spmi-gpio"; reg = <0xc000>; gpio-controller; diff --git a/Documentation/devicetree/bindings/mfd/qcom,tcsr.yaml b/Documentation/devicetree/bindings/mfd/qcom,tcsr.yaml index 5ad9d5deaa..33c3d023a1 100644 --- a/Documentation/devicetree/bindings/mfd/qcom,tcsr.yaml +++ b/Documentation/devicetree/bindings/mfd/qcom,tcsr.yaml @@ -27,6 +27,7 @@ properties: - qcom,sdm845-tcsr - qcom,sdx55-tcsr - qcom,sdx65-tcsr + - qcom,sm4450-tcsr - qcom,sm8150-tcsr - qcom,sm8450-tcsr - qcom,tcsr-apq8064 diff --git a/Documentation/devicetree/bindings/mfd/qcom-pm8xxx.yaml b/Documentation/devicetree/bindings/mfd/qcom-pm8xxx.yaml index 9c51c1b190..7fe3875a59 100644 --- a/Documentation/devicetree/bindings/mfd/qcom-pm8xxx.yaml +++ b/Documentation/devicetree/bindings/mfd/qcom-pm8xxx.yaml @@ -43,13 +43,37 @@ properties: interrupt-controller: true patternProperties: + "gpio@[0-9a-f]+$": + type: object + $ref: /schemas/pinctrl/qcom,pmic-gpio.yaml# + + "keypad@[0-9a-f]+$": + type: object + $ref: /schemas/input/qcom,pm8921-keypad.yaml# + "led@[0-9a-f]+$": type: object $ref: /schemas/leds/qcom,pm8058-led.yaml# + "mpps@[0-9a-f]+$": + type: object + $ref: /schemas/pinctrl/qcom,pmic-mpp.yaml# + + "pwrkey@[0-9a-f]+$": + type: object + $ref: /schemas/input/qcom,pm8921-pwrkey.yaml# + "rtc@[0-9a-f]+$": type: object - $ref: ../rtc/qcom-pm8xxx-rtc.yaml + $ref: /schemas/rtc/qcom-pm8xxx-rtc.yaml# + + "vibrator@[0-9a-f]+$": + type: object + $ref: /schemas/input/qcom,pm8xxx-vib.yaml# + + "xoadc@[0-9a-f]+$": + type: object + $ref: /schemas/iio/adc/qcom,pm8018-adc.yaml# required: - compatible diff --git a/Documentation/devicetree/bindings/mfd/rockchip,rk805.yaml b/Documentation/devicetree/bindings/mfd/rockchip,rk805.yaml index 4992f71b6f..44f8188360 100644 --- a/Documentation/devicetree/bindings/mfd/rockchip,rk805.yaml +++ b/Documentation/devicetree/bindings/mfd/rockchip,rk805.yaml @@ -42,9 +42,12 @@ properties: rockchip,system-power-controller: type: boolean + deprecated: true description: Telling whether or not this PMIC is controlling the system power. + system-power-controller: true + wakeup-source: type: boolean description: @@ -80,6 +83,7 @@ properties: "^(DCDC_REG[1-4]|LDO_REG[1-3])$": type: object $ref: ../regulator/regulator.yaml# + unevaluatedProperties: false unevaluatedProperties: false allOf: diff --git a/Documentation/devicetree/bindings/mfd/rockchip,rk806.yaml b/Documentation/devicetree/bindings/mfd/rockchip,rk806.yaml index cf2500f2e9..3c2b06629b 100644 --- a/Documentation/devicetree/bindings/mfd/rockchip,rk806.yaml +++ b/Documentation/devicetree/bindings/mfd/rockchip,rk806.yaml @@ -29,6 +29,8 @@ properties: '#gpio-cells': const: 2 + system-power-controller: true + vcc1-supply: description: The input supply for dcdc-reg1. diff --git a/Documentation/devicetree/bindings/mfd/rockchip,rk808.yaml b/Documentation/devicetree/bindings/mfd/rockchip,rk808.yaml index f5908fa01a..d2ac6fbd5c 100644 --- a/Documentation/devicetree/bindings/mfd/rockchip,rk808.yaml +++ b/Documentation/devicetree/bindings/mfd/rockchip,rk808.yaml @@ -37,9 +37,12 @@ properties: rockchip,system-power-controller: type: boolean + deprecated: true description: Telling whether or not this PMIC is controlling the system power. + system-power-controller: true + wakeup-source: type: boolean description: @@ -107,6 +110,7 @@ properties: "^(DCDC_REG[1-4]|LDO_REG[1-8]|SWITCH_REG[1-2])$": type: object $ref: ../regulator/regulator.yaml# + unevaluatedProperties: false unevaluatedProperties: false required: diff --git a/Documentation/devicetree/bindings/mfd/rockchip,rk809.yaml b/Documentation/devicetree/bindings/mfd/rockchip,rk809.yaml index 7fb849ac74..839c0521f1 100644 --- a/Documentation/devicetree/bindings/mfd/rockchip,rk809.yaml +++ b/Documentation/devicetree/bindings/mfd/rockchip,rk809.yaml @@ -37,9 +37,12 @@ properties: rockchip,system-power-controller: type: boolean + deprecated: true description: Telling whether or not this PMIC is controlling the system power. + system-power-controller: true + wakeup-source: type: boolean description: @@ -86,7 +89,8 @@ properties: patternProperties: "^(LDO_REG[1-9]|DCDC_REG[1-5]|SWITCH_REG[1-2])$": type: object - $ref: ../regulator/regulator.yaml# + $ref: /schemas/regulator/regulator.yaml# + unevaluatedProperties: false unevaluatedProperties: false allOf: diff --git a/Documentation/devicetree/bindings/mfd/rockchip,rk817.yaml b/Documentation/devicetree/bindings/mfd/rockchip,rk817.yaml index 269fb85b20..92b1592e89 100644 --- a/Documentation/devicetree/bindings/mfd/rockchip,rk817.yaml +++ b/Documentation/devicetree/bindings/mfd/rockchip,rk817.yaml @@ -38,9 +38,12 @@ properties: rockchip,system-power-controller: type: boolean + deprecated: true description: Telling whether or not this PMIC is controlling the system power. + system-power-controller: true + wakeup-source: type: boolean description: diff --git a/Documentation/devicetree/bindings/mfd/rockchip,rk818.yaml b/Documentation/devicetree/bindings/mfd/rockchip,rk818.yaml index b57c4b005c..fd4b9de364 100644 --- a/Documentation/devicetree/bindings/mfd/rockchip,rk818.yaml +++ b/Documentation/devicetree/bindings/mfd/rockchip,rk818.yaml @@ -37,9 +37,12 @@ properties: rockchip,system-power-controller: type: boolean + deprecated: true description: Telling whether or not this PMIC is controlling the system power. + system-power-controller: true + wakeup-source: type: boolean description: @@ -99,6 +102,7 @@ properties: "^(DCDC_REG[1-4]|DCDC_BOOST|LDO_REG[1-9]|SWITCH_REG|HDMI_SWITCH|OTG_SWITCH)$": type: object $ref: ../regulator/regulator.yaml# + unevaluatedProperties: false unevaluatedProperties: false required: diff --git a/Documentation/devicetree/bindings/mfd/stericsson,db8500-prcmu.yaml b/Documentation/devicetree/bindings/mfd/stericsson,db8500-prcmu.yaml index 5e0002f099..cb2a42caab 100644 --- a/Documentation/devicetree/bindings/mfd/stericsson,db8500-prcmu.yaml +++ b/Documentation/devicetree/bindings/mfd/stericsson,db8500-prcmu.yaml @@ -75,7 +75,7 @@ properties: unevaluatedProperties: false db8500_varm: - description: The voltage for the ARM Cortex A-9 CPU. + description: The voltage for the ARM Cortex-A9 CPU. type: object $ref: ../regulator/regulator.yaml# unevaluatedProperties: false diff --git a/Documentation/devicetree/bindings/mfd/syscon.yaml b/Documentation/devicetree/bindings/mfd/syscon.yaml index 8103154bbb..084b5c2a2a 100644 --- a/Documentation/devicetree/bindings/mfd/syscon.yaml +++ b/Documentation/devicetree/bindings/mfd/syscon.yaml @@ -49,6 +49,8 @@ properties: - hisilicon,peri-subctrl - hpe,gxp-sysreg - intel,lgm-syscon + - loongson,ls1b-syscon + - loongson,ls1c-syscon - marvell,armada-3700-usb2-host-misc - mediatek,mt8135-pctl-a-syscfg - mediatek,mt8135-pctl-b-syscfg @@ -61,6 +63,7 @@ properties: - rockchip,px30-qos - rockchip,rk3036-qos - rockchip,rk3066-qos + - rockchip,rk3128-qos - rockchip,rk3228-qos - rockchip,rk3288-qos - rockchip,rk3368-qos @@ -69,6 +72,7 @@ properties: - rockchip,rk3588-qos - rockchip,rv1126-qos - starfive,jh7100-sysmain + - ti,am654-dss-oldi-io-ctrl - const: syscon diff --git a/Documentation/devicetree/bindings/mfd/ti,lp87524-q1.yaml b/Documentation/devicetree/bindings/mfd/ti,lp87524-q1.yaml index f6cac4b107..ae149eb859 100644 --- a/Documentation/devicetree/bindings/mfd/ti,lp87524-q1.yaml +++ b/Documentation/devicetree/bindings/mfd/ti,lp87524-q1.yaml @@ -37,6 +37,7 @@ properties: "^buck[0123]$": type: object $ref: /schemas/regulator/regulator.yaml# + unevaluatedProperties: false required: - buck0 diff --git a/Documentation/devicetree/bindings/mfd/ti,lp87561-q1.yaml b/Documentation/devicetree/bindings/mfd/ti,lp87561-q1.yaml index dc5a29b5ef..5167d6eb90 100644 --- a/Documentation/devicetree/bindings/mfd/ti,lp87561-q1.yaml +++ b/Documentation/devicetree/bindings/mfd/ti,lp87561-q1.yaml @@ -41,6 +41,7 @@ properties: buck3210: type: object $ref: /schemas/regulator/regulator.yaml# + unevaluatedProperties: false required: - buck3210 diff --git a/Documentation/devicetree/bindings/mfd/ti,lp87565-q1.yaml b/Documentation/devicetree/bindings/mfd/ti,lp87565-q1.yaml index 012d251110..eca430edf6 100644 --- a/Documentation/devicetree/bindings/mfd/ti,lp87565-q1.yaml +++ b/Documentation/devicetree/bindings/mfd/ti,lp87565-q1.yaml @@ -47,6 +47,7 @@ properties: "^buck(10|23)$": type: object $ref: /schemas/regulator/regulator.yaml# + unevaluatedProperties: false required: - buck10 diff --git a/Documentation/devicetree/bindings/mfd/ti,twl.yaml b/Documentation/devicetree/bindings/mfd/ti,twl.yaml new file mode 100644 index 0000000000..c04d57ba22 --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/ti,twl.yaml @@ -0,0 +1,67 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/mfd/ti,twl.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Texas Instruments TWL family + +maintainers: + - Andreas Kemnade + +description: | + The TWLs are Integrated Power Management Chips. + Some version might contain much more analog function like + USB transceiver or Audio amplifier. + These chips are connected to an i2c bus. + +properties: + compatible: + description: + TWL4030 for integrated power-management/audio CODEC device used in OMAP3 + based boards + TWL6030/32 for integrated power-management used in OMAP4 based boards + enum: + - ti,twl4030 + - ti,twl6030 + - ti,twl6032 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + interrupt-controller: true + + "#interrupt-cells": + const: 1 + + "#clock-cells": + const: 1 + +additionalProperties: false + +required: + - compatible + - reg + - interrupts + - interrupt-controller + - "#interrupt-cells" + +examples: + - | + i2c { + #address-cells = <1>; + #size-cells = <0>; + + pmic@48 { + compatible = "ti,twl6030"; + reg = <0x48>; + interrupts = <39>; /* IRQ_SYS_1N cascaded to gic */ + interrupt-controller; + #interrupt-cells = <1>; + interrupt-parent = <&gic>; + }; + }; + diff --git a/Documentation/devicetree/bindings/mfd/twl-family.txt b/Documentation/devicetree/bindings/mfd/twl-family.txt deleted file mode 100644 index c2f9302965..0000000000 --- a/Documentation/devicetree/bindings/mfd/twl-family.txt +++ /dev/null @@ -1,46 +0,0 @@ -Texas Instruments TWL family - -The TWLs are Integrated Power Management Chips. -Some version might contain much more analog function like -USB transceiver or Audio amplifier. -These chips are connected to an i2c bus. - - -Required properties: -- compatible : Must be "ti,twl4030"; - For Integrated power-management/audio CODEC device used in OMAP3 - based boards -- compatible : Must be "ti,twl6030"; - For Integrated power-management used in OMAP4 based boards -- interrupts : This i2c device has an IRQ line connected to the main SoC -- interrupt-controller : Since the twl support several interrupts internally, - it is considered as an interrupt controller cascaded to the SoC one. -- #interrupt-cells = <1>; - -Optional node: -- Child nodes contain in the twl. The twl family is made of several variants - that support a different number of features. - The children nodes will thus depend of the capability of the variant. - - -Example: -/* - * Integrated Power Management Chip - * https://www.ti.com/lit/ds/symlink/twl6030.pdf - */ -twl@48 { - compatible = "ti,twl6030"; - reg = <0x48>; - interrupts = <39>; /* IRQ_SYS_1N cascaded to gic */ - interrupt-controller; - #interrupt-cells = <1>; - interrupt-parent = <&gic>; - #address-cells = <1>; - #size-cells = <0>; - - twl_rtc { - compatible = "ti,twl_rtc"; - interrupts = <11>; - reg = <0>; - }; -}; diff --git a/Documentation/devicetree/bindings/mfd/x-powers,axp152.yaml b/Documentation/devicetree/bindings/mfd/x-powers,axp152.yaml index 9ad5574613..06f1779835 100644 --- a/Documentation/devicetree/bindings/mfd/x-powers,axp152.yaml +++ b/Documentation/devicetree/bindings/mfd/x-powers,axp152.yaml @@ -67,7 +67,10 @@ allOf: properties: compatible: contains: - const: x-powers,axp305 + enum: + - x-powers,axp15060 + - x-powers,axp305 + - x-powers,axp313a then: required: diff --git a/Documentation/devicetree/bindings/mmc/npcm,sdhci.yaml b/Documentation/devicetree/bindings/mmc/npcm,sdhci.yaml new file mode 100644 index 0000000000..196fdbfa16 --- /dev/null +++ b/Documentation/devicetree/bindings/mmc/npcm,sdhci.yaml @@ -0,0 +1,45 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/mmc/npcm,sdhci.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NPCM SDHCI Controller + +maintainers: + - Tomer Maimon + +allOf: + - $ref: mmc-controller.yaml# + +properties: + compatible: + enum: + - nuvoton,npcm750-sdhci + - nuvoton,npcm845-sdhci + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + maxItems: 1 + +required: + - compatible + - reg + - interrupts + - clocks + +unevaluatedProperties: false + +examples: + - | + mmc@f0840000 { + compatible = "nuvoton,npcm750-sdhci"; + reg = <0xf0840000 0x200>; + interrupts = <0 27 4>; + clocks = <&clk 4>; + }; diff --git a/Documentation/devicetree/bindings/mmc/renesas,sdhi.yaml b/Documentation/devicetree/bindings/mmc/renesas,sdhi.yaml index 7756a8687e..94e2287876 100644 --- a/Documentation/devicetree/bindings/mmc/renesas,sdhi.yaml +++ b/Documentation/devicetree/bindings/mmc/renesas,sdhi.yaml @@ -59,6 +59,7 @@ properties: - renesas,sdhi-r9a07g043 # RZ/G2UL - renesas,sdhi-r9a07g044 # RZ/G2{L,LC} - renesas,sdhi-r9a07g054 # RZ/V2L + - renesas,sdhi-r9a08g045 # RZ/G3S - renesas,sdhi-r9a09g011 # RZ/V2M - const: renesas,rcar-gen3-sdhi # R-Car Gen3 or RZ/G2 - items: @@ -122,6 +123,7 @@ allOf: - renesas,sdhi-r9a07g043 - renesas,sdhi-r9a07g044 - renesas,sdhi-r9a07g054 + - renesas,sdhi-r9a08g045 - renesas,sdhi-r9a09g011 then: properties: diff --git a/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml b/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml index 10f34aa8ba..86fae733d9 100644 --- a/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml +++ b/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml @@ -58,6 +58,7 @@ properties: - qcom,sm8350-sdhci - qcom,sm8450-sdhci - qcom,sm8550-sdhci + - qcom,sm8650-sdhci - const: qcom,sdhci-msm-v5 # for sdcc version 5.0 reg: @@ -85,10 +86,10 @@ properties: - const: iface - const: core - const: xo - - const: ice - - const: bus - - const: cal - - const: sleep + - enum: [ice, bus, cal, sleep] + - enum: [ice, bus, cal, sleep] + - enum: [ice, bus, cal, sleep] + - enum: [ice, bus, cal, sleep] dma-coherent: true diff --git a/Documentation/devicetree/bindings/mmc/starfive,jh7110-mmc.yaml b/Documentation/devicetree/bindings/mmc/starfive,jh7110-mmc.yaml index 51e1b04e79..553a75195c 100644 --- a/Documentation/devicetree/bindings/mmc/starfive,jh7110-mmc.yaml +++ b/Documentation/devicetree/bindings/mmc/starfive,jh7110-mmc.yaml @@ -55,7 +55,6 @@ required: - clocks - clock-names - interrupts - - starfive,sysreg unevaluatedProperties: false @@ -73,5 +72,4 @@ examples: fifo-depth = <32>; fifo-watermark-aligned; data-addr = <0>; - starfive,sysreg = <&sys_syscon 0x14 0x1a 0x7c000000>; }; diff --git a/Documentation/devicetree/bindings/mtd/mtd.yaml b/Documentation/devicetree/bindings/mtd/mtd.yaml index b82ca03e96..f322290ee5 100644 --- a/Documentation/devicetree/bindings/mtd/mtd.yaml +++ b/Documentation/devicetree/bindings/mtd/mtd.yaml @@ -43,7 +43,12 @@ patternProperties: deprecated: true "^otp(-[0-9]+)?$": - $ref: ../nvmem/nvmem.yaml# + type: object + + allOf: + - $ref: ../nvmem/nvmem.yaml# + - $ref: ../nvmem/nvmem-deprecated-cells.yaml# + unevaluatedProperties: false description: | diff --git a/Documentation/devicetree/bindings/mtd/partitions/fixed-partitions.yaml b/Documentation/devicetree/bindings/mtd/partitions/fixed-partitions.yaml index 331e564f29..058253d6d8 100644 --- a/Documentation/devicetree/bindings/mtd/partitions/fixed-partitions.yaml +++ b/Documentation/devicetree/bindings/mtd/partitions/fixed-partitions.yaml @@ -29,6 +29,24 @@ properties: "#size-cells": true + compression: + $ref: /schemas/types.yaml#/definitions/string + description: | + Compression algorithm used to store the data in this partition, chosen + from a list of well-known algorithms. + + The contents are compressed using this algorithm. + + enum: + - none + - bzip2 + - gzip + - lzop + - lz4 + - lzma + - xz + - zstd + patternProperties: "@[0-9a-f]+$": $ref: partition.yaml# @@ -64,6 +82,7 @@ examples: uimage@100000 { reg = <0x0100000 0x200000>; + compress = "lzma"; }; }; diff --git a/Documentation/devicetree/bindings/mtd/partitions/nvmem-cells.yaml b/Documentation/devicetree/bindings/mtd/partitions/nvmem-cells.yaml index 5474d63268..9518281007 100644 --- a/Documentation/devicetree/bindings/mtd/partitions/nvmem-cells.yaml +++ b/Documentation/devicetree/bindings/mtd/partitions/nvmem-cells.yaml @@ -19,6 +19,7 @@ maintainers: allOf: - $ref: /schemas/mtd/partitions/partition.yaml# - $ref: /schemas/nvmem/nvmem.yaml# + - $ref: /schemas/nvmem/nvmem-deprecated-cells.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml index 4bfac91868..7fe0352dff 100644 --- a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml +++ b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml @@ -158,6 +158,8 @@ allOf: patternProperties: "^ethernet-phy@[0-9a-f]$": type: object + $ref: ethernet-phy.yaml# + unevaluatedProperties: false description: Integrated PHY node diff --git a/Documentation/devicetree/bindings/net/brcm,asp-v2.0.yaml b/Documentation/devicetree/bindings/net/brcm,asp-v2.0.yaml index aa3162c748..75d8138298 100644 --- a/Documentation/devicetree/bindings/net/brcm,asp-v2.0.yaml +++ b/Documentation/devicetree/bindings/net/brcm,asp-v2.0.yaml @@ -53,7 +53,7 @@ properties: const: 0 patternProperties: - "^port@[0-9]+$": + "^port@[0-9a-f]+$": type: object $ref: ethernet-controller.yaml# diff --git a/Documentation/devicetree/bindings/net/dsa/brcm,sf2.yaml b/Documentation/devicetree/bindings/net/dsa/brcm,sf2.yaml index b06c416893..f21bdd0f40 100644 --- a/Documentation/devicetree/bindings/net/dsa/brcm,sf2.yaml +++ b/Documentation/devicetree/bindings/net/dsa/brcm,sf2.yaml @@ -78,6 +78,7 @@ properties: ports: type: object + additionalProperties: true patternProperties: '^port@[0-9a-f]$': diff --git a/Documentation/devicetree/bindings/net/dsa/dsa.yaml b/Documentation/devicetree/bindings/net/dsa/dsa.yaml index ec74a660be..6107189d27 100644 --- a/Documentation/devicetree/bindings/net/dsa/dsa.yaml +++ b/Documentation/devicetree/bindings/net/dsa/dsa.yaml @@ -40,17 +40,8 @@ $defs: patternProperties: "^(ethernet-)?ports$": - type: object - additionalProperties: false - - properties: - '#address-cells': - const: 1 - '#size-cells': - const: 0 - patternProperties: - "^(ethernet-)?port@[0-9]+$": + "^(ethernet-)?port@[0-9a-f]+$": description: Ethernet switch ports $ref: dsa-port.yaml# unevaluatedProperties: false diff --git a/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml b/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml index e532c6b795..1c2444121e 100644 --- a/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml +++ b/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml @@ -60,7 +60,7 @@ description: | Check out example 6. - - Port 5 can be wired to an external phy. Port 5 becomes a DSA slave. + - Port 5 can be wired to an external phy. Port 5 becomes a DSA user port. For the multi-chip module MT7530, the external phy must be wired TX to TX to gmac1 of the SoC for this to work. Ubiquiti EdgeRouter X SFP is wired @@ -154,10 +154,12 @@ properties: patternProperties: "^(ethernet-)?ports$": type: object + additionalProperties: true patternProperties: - "^(ethernet-)?port@[0-9]+$": + "^(ethernet-)?port@[0-6]$": type: object + additionalProperties: true properties: reg: @@ -184,7 +186,7 @@ $defs: patternProperties: "^(ethernet-)?ports$": patternProperties: - "^(ethernet-)?port@[0-9]+$": + "^(ethernet-)?port@[0-6]$": if: required: [ ethernet ] then: @@ -210,7 +212,7 @@ $defs: patternProperties: "^(ethernet-)?ports$": patternProperties: - "^(ethernet-)?port@[0-9]+$": + "^(ethernet-)?port@[0-6]$": if: required: [ ethernet ] then: diff --git a/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml b/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml index 03b5567be3..b3029c64d0 100644 --- a/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml +++ b/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml @@ -38,6 +38,8 @@ properties: Should be a gpio specifier for a reset line. maxItems: 1 + wakeup-source: true + microchip,synclko-125: $ref: /schemas/types.yaml#/definitions/flag description: @@ -49,6 +51,26 @@ properties: Set if the output SYNCLKO clock should be disabled. Do not mix with microchip,synclko-125. + microchip,io-drive-strength-microamp: + description: + IO Pad Drive Strength + enum: [8000, 16000] + default: 16000 + + microchip,hi-drive-strength-microamp: + description: + High Speed Drive Strength. Controls drive strength of GMII / RGMII / + MII / RMII (except TX_CLK/REFCLKI, COL and CRS) and CLKO_25_125 lines. + enum: [2000, 4000, 8000, 12000, 16000, 20000, 24000, 28000] + default: 24000 + + microchip,lo-drive-strength-microamp: + description: + Low Speed Drive Strength. Controls drive strength of TX_CLK / REFCLKI, + COL, CRS, LEDs, PME_N, NTRP_N, SDO and SDI/SDA/MDIO lines. + enum: [2000, 4000, 8000, 12000, 16000, 20000, 24000, 28000] + default: 8000 + interrupts: maxItems: 1 diff --git a/Documentation/devicetree/bindings/net/dsa/microchip,lan937x.yaml b/Documentation/devicetree/bindings/net/dsa/microchip,lan937x.yaml index 8d7e878b84..9973d64f15 100644 --- a/Documentation/devicetree/bindings/net/dsa/microchip,lan937x.yaml +++ b/Documentation/devicetree/bindings/net/dsa/microchip,lan937x.yaml @@ -37,8 +37,9 @@ properties: patternProperties: "^(ethernet-)?ports$": + additionalProperties: true patternProperties: - "^(ethernet-)?port@[0-9]+$": + "^(ethernet-)?port@[0-7]$": allOf: - if: properties: diff --git a/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml b/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml index 4d5f5cc6d0..9432565f4f 100644 --- a/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml +++ b/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml @@ -43,6 +43,7 @@ properties: # PHY 1. mdios: type: object + additionalProperties: false properties: '#address-cells': @@ -74,8 +75,9 @@ properties: patternProperties: "^(ethernet-)?ports$": + additionalProperties: true patternProperties: - "^(ethernet-)?port@[0-9]+$": + "^(ethernet-)?port@[0-9]$": allOf: - if: properties: diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.yaml b/Documentation/devicetree/bindings/net/dsa/qca8k.yaml index df64eebebe..167398ab25 100644 --- a/Documentation/devicetree/bindings/net/dsa/qca8k.yaml +++ b/Documentation/devicetree/bindings/net/dsa/qca8k.yaml @@ -73,6 +73,7 @@ $ref: dsa.yaml# patternProperties: "^(ethernet-)?ports$": type: object + additionalProperties: true patternProperties: "^(ethernet-)?port@[0-6]$": type: object diff --git a/Documentation/devicetree/bindings/net/dsa/realtek.yaml b/Documentation/devicetree/bindings/net/dsa/realtek.yaml index cfd69c2604..cce692f57b 100644 --- a/Documentation/devicetree/bindings/net/dsa/realtek.yaml +++ b/Documentation/devicetree/bindings/net/dsa/realtek.yaml @@ -68,6 +68,8 @@ properties: interrupt-controller: type: object + additionalProperties: false + description: | This defines an interrupt controller with an IRQ line (typically a GPIO) that will demultiplex and handle the interrupt from the single diff --git a/Documentation/devicetree/bindings/net/dsa/renesas,rzn1-a5psw.yaml b/Documentation/devicetree/bindings/net/dsa/renesas,rzn1-a5psw.yaml index 833d2f68da..ea285ef3e6 100644 --- a/Documentation/devicetree/bindings/net/dsa/renesas,rzn1-a5psw.yaml +++ b/Documentation/devicetree/bindings/net/dsa/renesas,rzn1-a5psw.yaml @@ -61,17 +61,11 @@ properties: ethernet-ports: type: object - properties: - '#address-cells': - const: 1 - '#size-cells': - const: 0 - + additionalProperties: true patternProperties: "^(ethernet-)?port@[0-4]$": type: object - description: Ethernet switch ports - + additionalProperties: true properties: pcs-handle: maxItems: 1 diff --git a/Documentation/devicetree/bindings/net/engleder,tsnep.yaml b/Documentation/devicetree/bindings/net/engleder,tsnep.yaml index 82a5d7927c..34fd24ff6a 100644 --- a/Documentation/devicetree/bindings/net/engleder,tsnep.yaml +++ b/Documentation/devicetree/bindings/net/engleder,tsnep.yaml @@ -63,6 +63,7 @@ properties: mdio: type: object $ref: mdio.yaml# + unevaluatedProperties: false description: optional node for embedded MDIO controller required: diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml index 9f6a5ccbce..d14d123ad7 100644 --- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml @@ -275,12 +275,12 @@ allOf: properties: rx-internal-delay-ps: description: - RGMII Receive Clock Delay defined in pico seconds.This is used for + RGMII Receive Clock Delay defined in pico seconds. This is used for controllers that have configurable RX internal delays. If this property is present then the MAC applies the RX delay. tx-internal-delay-ps: description: - RGMII Transmit Clock Delay defined in pico seconds.This is used for + RGMII Transmit Clock Delay defined in pico seconds. This is used for controllers that have configurable TX internal delays. If this property is present then the MAC applies the TX delay. diff --git a/Documentation/devicetree/bindings/net/ethernet-switch.yaml b/Documentation/devicetree/bindings/net/ethernet-switch.yaml index f1b9075dc7..72ac67ca34 100644 --- a/Documentation/devicetree/bindings/net/ethernet-switch.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-switch.yaml @@ -36,7 +36,7 @@ patternProperties: const: 0 patternProperties: - "^(ethernet-)?port@[0-9]+$": + "^(ethernet-)?port@[0-9a-f]+$": type: object description: Ethernet switch ports @@ -53,14 +53,16 @@ oneOf: additionalProperties: true $defs: - base: + ethernet-ports: description: An ethernet switch without any extra port properties $ref: '#' patternProperties: - "^(ethernet-)?port@[0-9]+$": - description: Ethernet switch ports - $ref: ethernet-switch-port.yaml# - unevaluatedProperties: false + "^(ethernet-)?ports$": + patternProperties: + "^(ethernet-)?port@[0-9a-f]+$": + description: Ethernet switch ports + $ref: ethernet-switch-port.yaml# + unevaluatedProperties: false ... diff --git a/Documentation/devicetree/bindings/net/fsl,fec.yaml b/Documentation/devicetree/bindings/net/fsl,fec.yaml index b494e00932..8948a11c99 100644 --- a/Documentation/devicetree/bindings/net/fsl,fec.yaml +++ b/Documentation/devicetree/bindings/net/fsl,fec.yaml @@ -59,6 +59,7 @@ properties: - const: fsl,imx6sx-fec - items: - enum: + - fsl,imx8dxl-fec - fsl,imx8qxp-fec - const: fsl,imx8qm-fec - const: fsl,imx6sx-fec diff --git a/Documentation/devicetree/bindings/net/loongson,ls1b-gmac.yaml b/Documentation/devicetree/bindings/net/loongson,ls1b-gmac.yaml new file mode 100644 index 0000000000..c4f3224bad --- /dev/null +++ b/Documentation/devicetree/bindings/net/loongson,ls1b-gmac.yaml @@ -0,0 +1,114 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/loongson,ls1b-gmac.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Loongson-1B Gigabit Ethernet MAC Controller + +maintainers: + - Keguang Zhang + +description: | + Loongson-1B Gigabit Ethernet MAC Controller is based on + Synopsys DesignWare MAC (version 3.50a). + + Main features + - Dual 10/100/1000Mbps GMAC controllers + - Full-duplex operation (IEEE 802.3x flow control automatic transmission) + - Half-duplex operation (CSMA/CD Protocol and back-pressure support) + - RX Checksum Offload + - TX Checksum insertion + - MII interface + - RGMII interface + +select: + properties: + compatible: + contains: + enum: + - loongson,ls1b-gmac + required: + - compatible + +properties: + compatible: + items: + - enum: + - loongson,ls1b-gmac + - const: snps,dwmac-3.50a + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + clock-names: + items: + - const: stmmaceth + + interrupts: + maxItems: 1 + + interrupt-names: + items: + - const: macirq + + loongson,ls1-syscon: + $ref: /schemas/types.yaml#/definitions/phandle + description: + Phandle to the syscon containing some extra configurations + including PHY interface mode. + + phy-mode: + enum: + - mii + - rgmii-id + +required: + - compatible + - reg + - clocks + - clock-names + - interrupts + - interrupt-names + - loongson,ls1-syscon + +allOf: + - $ref: snps,dwmac.yaml# + +unevaluatedProperties: false + +examples: + - | + #include + #include + + gmac0: ethernet@1fe10000 { + compatible = "loongson,ls1b-gmac", "snps,dwmac-3.50a"; + reg = <0x1fe10000 0x10000>; + + clocks = <&clkc LS1X_CLKID_AHB>; + clock-names = "stmmaceth"; + + interrupt-parent = <&intc1>; + interrupts = <2 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq"; + + loongson,ls1-syscon = <&syscon>; + + phy-handle = <&phy0>; + phy-mode = "mii"; + snps,pbl = <1>; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + compatible = "snps,dwmac-mdio"; + + phy0: ethernet-phy@0 { + reg = <0x0>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/net/loongson,ls1c-emac.yaml b/Documentation/devicetree/bindings/net/loongson,ls1c-emac.yaml new file mode 100644 index 0000000000..99001b940b --- /dev/null +++ b/Documentation/devicetree/bindings/net/loongson,ls1c-emac.yaml @@ -0,0 +1,113 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/loongson,ls1c-emac.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Loongson-1C Ethernet MAC Controller + +maintainers: + - Keguang Zhang + +description: | + Loongson-1C Ethernet MAC Controller is based on + Synopsys DesignWare MAC (version 3.50a). + + Main features + - 10/100Mbps + - Full-duplex operation (IEEE 802.3x flow control automatic transmission) + - Half-duplex operation (CSMA/CD Protocol and back-pressure support) + - IEEE 802.1Q VLAN tag detection for reception frames + - MII interface + - RMII interface + +select: + properties: + compatible: + contains: + enum: + - loongson,ls1c-emac + required: + - compatible + +properties: + compatible: + items: + - enum: + - loongson,ls1c-emac + - const: snps,dwmac-3.50a + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + clock-names: + items: + - const: stmmaceth + + interrupts: + maxItems: 1 + + interrupt-names: + items: + - const: macirq + + loongson,ls1-syscon: + $ref: /schemas/types.yaml#/definitions/phandle + description: + Phandle to the syscon containing some extra configurations + including PHY interface mode. + + phy-mode: + enum: + - mii + - rmii + +required: + - compatible + - reg + - clocks + - clock-names + - interrupts + - interrupt-names + - loongson,ls1-syscon + +allOf: + - $ref: snps,dwmac.yaml# + +unevaluatedProperties: false + +examples: + - | + #include + #include + + emac: ethernet@1fe10000 { + compatible = "loongson,ls1c-emac", "snps,dwmac-3.50a"; + reg = <0x1fe10000 0x10000>; + + clocks = <&clkc LS1X_CLKID_AHB>; + clock-names = "stmmaceth"; + + interrupt-parent = <&intc1>; + interrupts = <2 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq"; + + loongson,ls1-syscon = <&syscon>; + + phy-handle = <&phy0>; + phy-mode = "mii"; + snps,pbl = <1>; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + compatible = "snps,dwmac-mdio"; + + phy0: ethernet-phy@13 { + reg = <0x13>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/net/microchip,lan95xx.yaml b/Documentation/devicetree/bindings/net/microchip,lan95xx.yaml index 77c9bbf987..accff93d38 100644 --- a/Documentation/devicetree/bindings/net/microchip,lan95xx.yaml +++ b/Documentation/devicetree/bindings/net/microchip,lan95xx.yaml @@ -44,6 +44,8 @@ properties: local-mac-address: true mac-address: true + nvmem-cells: true + nvmem-cell-names: true required: - compatible diff --git a/Documentation/devicetree/bindings/net/mscc,vsc7514-switch.yaml b/Documentation/devicetree/bindings/net/mscc,vsc7514-switch.yaml index 8ee2c7d7ff..86a9c3fc76 100644 --- a/Documentation/devicetree/bindings/net/mscc,vsc7514-switch.yaml +++ b/Documentation/devicetree/bindings/net/mscc,vsc7514-switch.yaml @@ -24,7 +24,7 @@ allOf: compatible: const: mscc,vsc7514-switch then: - $ref: ethernet-switch.yaml# + $ref: ethernet-switch.yaml#/$defs/ethernet-ports required: - interrupts - interrupt-names @@ -33,28 +33,18 @@ allOf: minItems: 21 reg-names: minItems: 21 - ethernet-ports: - patternProperties: - "^port@[0-9a-f]+$": - $ref: ethernet-switch-port.yaml# - unevaluatedProperties: false - if: properties: compatible: const: mscc,vsc7512-switch then: - $ref: /schemas/net/dsa/dsa.yaml# + $ref: /schemas/net/dsa/dsa.yaml#/$defs/ethernet-ports properties: reg: maxItems: 20 reg-names: maxItems: 20 - ethernet-ports: - patternProperties: - "^port@[0-9a-f]+$": - $ref: /schemas/net/dsa/dsa-port.yaml# - unevaluatedProperties: false properties: compatible: @@ -185,7 +175,7 @@ examples: }; # VSC7512 (DSA) - | - ethernet-switch@1{ + ethernet-switch@1 { compatible = "mscc,vsc7512-switch"; reg = <0x71010000 0x10000>, <0x71030000 0x10000>, @@ -212,22 +202,22 @@ examples: "port7", "port8", "port9", "port10", "qsys", "ana", "s0", "s1", "s2"; - ethernet-ports { - #address-cells = <1>; - #size-cells = <0>; - - port@0 { - reg = <0>; - ethernet = <&mac_sw>; - phy-handle = <&phy0>; - phy-mode = "internal"; - }; - port@1 { - reg = <1>; - phy-handle = <&phy1>; - phy-mode = "internal"; - }; + ethernet-ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + ethernet = <&mac_sw>; + phy-handle = <&phy0>; + phy-mode = "internal"; + }; + port@1 { + reg = <1>; + phy-handle = <&phy1>; + phy-mode = "internal"; }; }; + }; ... diff --git a/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml b/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml index ab8867e693..85bfa45f51 100644 --- a/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml +++ b/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml @@ -20,6 +20,7 @@ allOf: patternProperties: "^ethernet-phy@[0-9a-f]+$": type: object + additionalProperties: false description: | Some packages have multiple PHYs. Secondary PHY should be defines as subnode of the first (parent) PHY. diff --git a/Documentation/devicetree/bindings/net/renesas,ether.yaml b/Documentation/devicetree/bindings/net/renesas,ether.yaml index 06b38c9bc6..29355ab985 100644 --- a/Documentation/devicetree/bindings/net/renesas,ether.yaml +++ b/Documentation/devicetree/bindings/net/renesas,ether.yaml @@ -81,9 +81,8 @@ properties: active-high patternProperties: - "^ethernet-phy@[0-9a-f]$": + "@[0-9a-f]$": type: object - $ref: ethernet-phy.yaml# required: - compatible diff --git a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml index 3f41294f59..5d074f27d4 100644 --- a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml +++ b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml @@ -109,9 +109,8 @@ properties: enum: [0, 2000] patternProperties: - "^ethernet-phy@[0-9a-f]$": + "@[0-9a-f]$": type: object - $ref: ethernet-phy.yaml# required: - compatible diff --git a/Documentation/devicetree/bindings/net/ti,cc1352p7.yaml b/Documentation/devicetree/bindings/net/ti,cc1352p7.yaml new file mode 100644 index 0000000000..3dde10de46 --- /dev/null +++ b/Documentation/devicetree/bindings/net/ti,cc1352p7.yaml @@ -0,0 +1,51 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/ti,cc1352p7.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Texas Instruments Simplelink CC1352P7 wireless MCU + +description: + The CC1352P7 MCU can be connected via SPI or UART. + +maintainers: + - Ayush Singh + +properties: + compatible: + const: ti,cc1352p7 + + clocks: + items: + - description: high-frequency main system (MCU and peripherals) clock + - description: low-frequency system clock + + clock-names: + items: + - const: sclk_hf + - const: sclk_lf + + reset-gpios: + maxItems: 1 + + vdds-supply: true + +required: + - compatible + +additionalProperties: false + +examples: + - | + #include + + serial { + mcu { + compatible = "ti,cc1352p7"; + clocks = <&sclk_hf 0>, <&sclk_lf 25>; + clock-names = "sclk_hf", "sclk_lf"; + reset-gpios = <&pio 35 GPIO_ACTIVE_LOW>; + vdds-supply = <&vdds>; + }; + }; diff --git a/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml b/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml index b04ac49666..f07ae3173b 100644 --- a/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml +++ b/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml @@ -86,7 +86,7 @@ properties: const: 0 patternProperties: - "^port@[0-9]+$": + "^port@[12]$": type: object description: CPSW external ports diff --git a/Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml b/Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml index 311c570165..229c8f3201 100644 --- a/Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml +++ b/Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml @@ -19,6 +19,7 @@ allOf: properties: compatible: enum: + - ti,am642-icssg-prueth # for AM64x SoC family - ti,am654-icssg-prueth # for AM65x SoC family sram: @@ -106,6 +107,13 @@ properties: phandle to system controller node and register offset to ICSSG control register for RGMII transmit delay + ti,half-duplex-capable: + type: boolean + description: + Indicates that the PHY output pin COL is routed to ICSSG GPIO pin + (PRGx_PRU0/1_GPIO10) as input so that the ICSSG MII port is + capable of half duplex operations. + required: - reg anyOf: diff --git a/Documentation/devicetree/bindings/nvmem/allwinner,sun4i-a10-sid.yaml b/Documentation/devicetree/bindings/nvmem/allwinner,sun4i-a10-sid.yaml index 296001e7f4..4424c3c5e7 100644 --- a/Documentation/devicetree/bindings/nvmem/allwinner,sun4i-a10-sid.yaml +++ b/Documentation/devicetree/bindings/nvmem/allwinner,sun4i-a10-sid.yaml @@ -12,6 +12,7 @@ maintainers: allOf: - $ref: nvmem.yaml# + - $ref: nvmem-deprecated-cells.yaml# properties: compatible: @@ -23,7 +24,9 @@ properties: - const: allwinner,sun20i-d1-sid - const: allwinner,sun50i-a64-sid - items: - - const: allwinner,sun50i-a100-sid + - enum: + - allwinner,sun50i-a100-sid + - allwinner,sun50i-h616-sid - const: allwinner,sun50i-a64-sid - const: allwinner,sun50i-h5-sid - const: allwinner,sun50i-h6-sid diff --git a/Documentation/devicetree/bindings/nvmem/amlogic,meson-gxbb-efuse.yaml b/Documentation/devicetree/bindings/nvmem/amlogic,meson-gxbb-efuse.yaml index e49c2754ff..9801fe6f91 100644 --- a/Documentation/devicetree/bindings/nvmem/amlogic,meson-gxbb-efuse.yaml +++ b/Documentation/devicetree/bindings/nvmem/amlogic,meson-gxbb-efuse.yaml @@ -11,6 +11,7 @@ maintainers: allOf: - $ref: nvmem.yaml# + - $ref: nvmem-deprecated-cells.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/nvmem/amlogic,meson6-efuse.yaml b/Documentation/devicetree/bindings/nvmem/amlogic,meson6-efuse.yaml index 84b3dfd21e..b5cf740f96 100644 --- a/Documentation/devicetree/bindings/nvmem/amlogic,meson6-efuse.yaml +++ b/Documentation/devicetree/bindings/nvmem/amlogic,meson6-efuse.yaml @@ -12,6 +12,7 @@ maintainers: allOf: - $ref: nvmem.yaml# + - $ref: nvmem-deprecated-cells.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/nvmem/apple,efuses.yaml b/Documentation/devicetree/bindings/nvmem/apple,efuses.yaml index e0860b6b85..d3abdafdbc 100644 --- a/Documentation/devicetree/bindings/nvmem/apple,efuses.yaml +++ b/Documentation/devicetree/bindings/nvmem/apple,efuses.yaml @@ -16,6 +16,7 @@ maintainers: allOf: - $ref: nvmem.yaml# + - $ref: nvmem-deprecated-cells.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/nvmem/imx-ocotp.yaml b/Documentation/devicetree/bindings/nvmem/imx-ocotp.yaml index 99e60d713d..be1314454b 100644 --- a/Documentation/devicetree/bindings/nvmem/imx-ocotp.yaml +++ b/Documentation/devicetree/bindings/nvmem/imx-ocotp.yaml @@ -16,6 +16,7 @@ description: | allOf: - $ref: nvmem.yaml# + - $ref: nvmem-deprecated-cells.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/nvmem/mediatek,efuse.yaml b/Documentation/devicetree/bindings/nvmem/mediatek,efuse.yaml index 7ec2988b59..cf5f9e22bb 100644 --- a/Documentation/devicetree/bindings/nvmem/mediatek,efuse.yaml +++ b/Documentation/devicetree/bindings/nvmem/mediatek,efuse.yaml @@ -16,6 +16,7 @@ maintainers: allOf: - $ref: nvmem.yaml# + - $ref: nvmem-deprecated-cells.yaml# properties: $nodename: diff --git a/Documentation/devicetree/bindings/nvmem/microchip,sama7g5-otpc.yaml b/Documentation/devicetree/bindings/nvmem/microchip,sama7g5-otpc.yaml index a296d348ad..cc25f29276 100644 --- a/Documentation/devicetree/bindings/nvmem/microchip,sama7g5-otpc.yaml +++ b/Documentation/devicetree/bindings/nvmem/microchip,sama7g5-otpc.yaml @@ -16,6 +16,7 @@ description: | allOf: - $ref: nvmem.yaml# + - $ref: nvmem-deprecated-cells.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/nvmem/mxs-ocotp.yaml b/Documentation/devicetree/bindings/nvmem/mxs-ocotp.yaml index e436650f0f..d9287be898 100644 --- a/Documentation/devicetree/bindings/nvmem/mxs-ocotp.yaml +++ b/Documentation/devicetree/bindings/nvmem/mxs-ocotp.yaml @@ -11,6 +11,7 @@ maintainers: allOf: - $ref: nvmem.yaml# + - $ref: nvmem-deprecated-cells.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/nvmem/nvmem-deprecated-cells.yaml b/Documentation/devicetree/bindings/nvmem/nvmem-deprecated-cells.yaml new file mode 100644 index 0000000000..951af28bbf --- /dev/null +++ b/Documentation/devicetree/bindings/nvmem/nvmem-deprecated-cells.yaml @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/nvmem/nvmem-deprecated-cells.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NVMEM old syntax for fixed cells + +maintainers: + - Srinivas Kandagatla + +description: | + Before introducing NVMEM layouts all NVMEM (fixed) cells were defined + as direct device subnodes. That syntax was replaced by "fixed-layout" + and is deprecated now. No new bindings should use it. + +patternProperties: + "@[0-9a-f]+(,[0-7])?$": + type: object + allOf: + - $ref: layouts/fixed-cell.yaml + - properties: + compatible: false + deprecated: true + +additionalProperties: true + +... diff --git a/Documentation/devicetree/bindings/nvmem/nvmem.yaml b/Documentation/devicetree/bindings/nvmem/nvmem.yaml index 9f921d9401..4fd015d402 100644 --- a/Documentation/devicetree/bindings/nvmem/nvmem.yaml +++ b/Documentation/devicetree/bindings/nvmem/nvmem.yaml @@ -46,15 +46,6 @@ properties: container may reference more advanced (dynamic) layout parsers. -patternProperties: - "@[0-9a-f]+(,[0-7])?$": - type: object - allOf: - - $ref: layouts/fixed-cell.yaml - - properties: - compatible: false - deprecated: true - additionalProperties: true examples: diff --git a/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml b/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml index 8740938c32..8c8f05d9ea 100644 --- a/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml +++ b/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml @@ -11,6 +11,7 @@ maintainers: allOf: - $ref: nvmem.yaml# + - $ref: nvmem-deprecated-cells.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/nvmem/qcom,sec-qfprom.yaml b/Documentation/devicetree/bindings/nvmem/qcom,sec-qfprom.yaml index 9b133f783d..2ada209994 100644 --- a/Documentation/devicetree/bindings/nvmem/qcom,sec-qfprom.yaml +++ b/Documentation/devicetree/bindings/nvmem/qcom,sec-qfprom.yaml @@ -16,6 +16,7 @@ description: allOf: - $ref: nvmem.yaml# + - $ref: nvmem-deprecated-cells.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/nvmem/qcom,spmi-sdam.yaml b/Documentation/devicetree/bindings/nvmem/qcom,spmi-sdam.yaml index cd980def97..068bedf5db 100644 --- a/Documentation/devicetree/bindings/nvmem/qcom,spmi-sdam.yaml +++ b/Documentation/devicetree/bindings/nvmem/qcom,spmi-sdam.yaml @@ -16,6 +16,7 @@ description: | allOf: - $ref: nvmem.yaml# + - $ref: nvmem-deprecated-cells.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/nvmem/rockchip,otp.yaml b/Documentation/devicetree/bindings/nvmem/rockchip,otp.yaml index 9c6eff7889..a44d44b328 100644 --- a/Documentation/devicetree/bindings/nvmem/rockchip,otp.yaml +++ b/Documentation/devicetree/bindings/nvmem/rockchip,otp.yaml @@ -49,6 +49,7 @@ required: allOf: - $ref: nvmem.yaml# + - $ref: nvmem-deprecated-cells.yaml# - if: properties: diff --git a/Documentation/devicetree/bindings/nvmem/rockchip-efuse.yaml b/Documentation/devicetree/bindings/nvmem/rockchip-efuse.yaml index c5403e1490..b80fd8d1ae 100644 --- a/Documentation/devicetree/bindings/nvmem/rockchip-efuse.yaml +++ b/Documentation/devicetree/bindings/nvmem/rockchip-efuse.yaml @@ -11,6 +11,7 @@ maintainers: allOf: - $ref: nvmem.yaml# + - $ref: nvmem-deprecated-cells.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/nvmem/socionext,uniphier-efuse.yaml b/Documentation/devicetree/bindings/nvmem/socionext,uniphier-efuse.yaml index efccc5aacb..e27cbae2d6 100644 --- a/Documentation/devicetree/bindings/nvmem/socionext,uniphier-efuse.yaml +++ b/Documentation/devicetree/bindings/nvmem/socionext,uniphier-efuse.yaml @@ -12,6 +12,7 @@ maintainers: allOf: - $ref: nvmem.yaml# + - $ref: nvmem-deprecated-cells.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/nvmem/sunplus,sp7021-ocotp.yaml b/Documentation/devicetree/bindings/nvmem/sunplus,sp7021-ocotp.yaml index da3f1de7d2..af97eeb831 100644 --- a/Documentation/devicetree/bindings/nvmem/sunplus,sp7021-ocotp.yaml +++ b/Documentation/devicetree/bindings/nvmem/sunplus,sp7021-ocotp.yaml @@ -12,6 +12,7 @@ maintainers: allOf: - $ref: nvmem.yaml# + - $ref: nvmem-deprecated-cells.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/nvmem/u-boot,env.yaml b/Documentation/devicetree/bindings/nvmem/u-boot,env.yaml index 36d97fb878..9c36afc708 100644 --- a/Documentation/devicetree/bindings/nvmem/u-boot,env.yaml +++ b/Documentation/devicetree/bindings/nvmem/u-boot,env.yaml @@ -51,6 +51,8 @@ properties: ethaddr: type: object description: Ethernet interfaces base MAC address. + additionalProperties: false + properties: "#nvmem-cell-cells": description: The first argument is a MAC address offset. diff --git a/Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml b/Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml index bbbad31ae4..fd04d060c1 100644 --- a/Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml +++ b/Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml @@ -26,7 +26,9 @@ description: | properties: compatible: - const: operating-points-v2-kryo-cpu + enum: + - operating-points-v2-krait-cpu + - operating-points-v2-kryo-cpu nvmem-cells: description: | @@ -47,6 +49,8 @@ patternProperties: opp-microvolt: true + opp-peak-kBps: true + opp-supported-hw: description: | A single 32 bit bitmap value, representing compatible HW. @@ -63,14 +67,22 @@ patternProperties: 5: MSM8996SG, speedbin 1 6: MSM8996SG, speedbin 2 7-31: unused - enum: [0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, - 0x9, 0xd, 0xe, 0xf, - 0x10, 0x20, 0x30, 0x70] + + Bitmap for IPQ806x SoC: + 0: IPQ8062 + 1: IPQ8064/IPQ8066/IPQ8068 + 2: IPQ8065/IPQ8069 + 3-31: unused + + Other platforms use bits directly corresponding to speedbin index. clock-latency-ns: true required-opps: true + patternProperties: + '^opp-microvolt-speed[0-9]+-pvs[0-9]+$': true + required: - opp-hz @@ -256,6 +268,22 @@ examples: }; }; + /* Dummy opp table to give example for named opp-microvolt */ + opp-table-2 { + compatible = "operating-points-v2-krait-cpu"; + nvmem-cells = <&speedbin_efuse>; + + opp-384000000 { + opp-hz = /bits/ 64 <384000000>; + opp-microvolt-speed0-pvs0 = <1000000 950000 1050000>; + opp-microvolt-speed0-pvs1 = <925000 878750 971250>; + opp-microvolt-speed0-pvs2 = <875000 831250 918750>; + opp-microvolt-speed0-pvs3 = <800000 760000 840000>; + opp-supported-hw = <0x7>; + clock-latency-ns = <100000>; + }; + }; + smem { compatible = "qcom,smem"; memory-region = <&smem_mem>; diff --git a/Documentation/devicetree/bindings/pci/rcar-gen4-pci-ep.yaml b/Documentation/devicetree/bindings/pci/rcar-gen4-pci-ep.yaml new file mode 100644 index 0000000000..fe38f62da0 --- /dev/null +++ b/Documentation/devicetree/bindings/pci/rcar-gen4-pci-ep.yaml @@ -0,0 +1,115 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright (C) 2022-2023 Renesas Electronics Corp. +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pci/rcar-gen4-pci-ep.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Renesas R-Car Gen4 PCIe Endpoint + +maintainers: + - Yoshihiro Shimoda + +allOf: + - $ref: snps,dw-pcie-ep.yaml# + +properties: + compatible: + items: + - const: renesas,r8a779f0-pcie-ep # R-Car S4-8 + - const: renesas,rcar-gen4-pcie-ep # R-Car Gen4 + + reg: + maxItems: 7 + + reg-names: + items: + - const: dbi + - const: dbi2 + - const: atu + - const: dma + - const: app + - const: phy + - const: addr_space + + interrupts: + maxItems: 3 + + interrupt-names: + items: + - const: dma + - const: sft_ce + - const: app + + clocks: + maxItems: 2 + + clock-names: + items: + - const: core + - const: ref + + power-domains: + maxItems: 1 + + resets: + maxItems: 1 + + reset-names: + items: + - const: pwr + + max-link-speed: + maximum: 4 + + num-lanes: + maximum: 4 + + max-functions: + maximum: 2 + +required: + - compatible + - reg + - reg-names + - interrupts + - interrupt-names + - clocks + - clock-names + - power-domains + - resets + - reset-names + +unevaluatedProperties: false + +examples: + - | + #include + #include + #include + + soc { + #address-cells = <2>; + #size-cells = <2>; + + pcie0_ep: pcie-ep@e65d0000 { + compatible = "renesas,r8a779f0-pcie-ep", "renesas,rcar-gen4-pcie-ep"; + reg = <0 0xe65d0000 0 0x2000>, <0 0xe65d2000 0 0x1000>, + <0 0xe65d3000 0 0x2000>, <0 0xe65d5000 0 0x1200>, + <0 0xe65d6200 0 0x0e00>, <0 0xe65d7000 0 0x0400>, + <0 0xfe000000 0 0x400000>; + reg-names = "dbi", "dbi2", "atu", "dma", "app", "phy", "addr_space"; + interrupts = , + , + ; + interrupt-names = "dma", "sft_ce", "app"; + clocks = <&cpg CPG_MOD 624>, <&pcie0_clkref>; + clock-names = "core", "ref"; + power-domains = <&sysc R8A779F0_PD_ALWAYS_ON>; + resets = <&cpg 624>; + reset-names = "pwr"; + max-link-speed = <4>; + num-lanes = <2>; + max-functions = /bits/ 8 <2>; + }; + }; diff --git a/Documentation/devicetree/bindings/pci/rcar-gen4-pci-host.yaml b/Documentation/devicetree/bindings/pci/rcar-gen4-pci-host.yaml new file mode 100644 index 0000000000..ffb34339b6 --- /dev/null +++ b/Documentation/devicetree/bindings/pci/rcar-gen4-pci-host.yaml @@ -0,0 +1,127 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright (C) 2022-2023 Renesas Electronics Corp. +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pci/rcar-gen4-pci-host.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Renesas R-Car Gen4 PCIe Host + +maintainers: + - Yoshihiro Shimoda + +allOf: + - $ref: snps,dw-pcie.yaml# + +properties: + compatible: + items: + - const: renesas,r8a779f0-pcie # R-Car S4-8 + - const: renesas,rcar-gen4-pcie # R-Car Gen4 + + reg: + maxItems: 7 + + reg-names: + items: + - const: dbi + - const: dbi2 + - const: atu + - const: dma + - const: app + - const: phy + - const: config + + interrupts: + maxItems: 4 + + interrupt-names: + items: + - const: msi + - const: dma + - const: sft_ce + - const: app + + clocks: + maxItems: 2 + + clock-names: + items: + - const: core + - const: ref + + power-domains: + maxItems: 1 + + resets: + maxItems: 1 + + reset-names: + items: + - const: pwr + + max-link-speed: + maximum: 4 + + num-lanes: + maximum: 4 + +required: + - compatible + - reg + - reg-names + - interrupts + - interrupt-names + - clocks + - clock-names + - power-domains + - resets + - reset-names + +unevaluatedProperties: false + +examples: + - | + #include + #include + #include + + soc { + #address-cells = <2>; + #size-cells = <2>; + + pcie: pcie@e65d0000 { + compatible = "renesas,r8a779f0-pcie", "renesas,rcar-gen4-pcie"; + reg = <0 0xe65d0000 0 0x1000>, <0 0xe65d2000 0 0x0800>, + <0 0xe65d3000 0 0x2000>, <0 0xe65d5000 0 0x1200>, + <0 0xe65d6200 0 0x0e00>, <0 0xe65d7000 0 0x0400>, + <0 0xfe000000 0 0x400000>; + reg-names = "dbi", "dbi2", "atu", "dma", "app", "phy", "config"; + interrupts = , + , + , + ; + interrupt-names = "msi", "dma", "sft_ce", "app"; + clocks = <&cpg CPG_MOD 624>, <&pcie0_clkref>; + clock-names = "core", "ref"; + power-domains = <&sysc R8A779F0_PD_ALWAYS_ON>; + resets = <&cpg 624>; + reset-names = "pwr"; + max-link-speed = <4>; + num-lanes = <2>; + #address-cells = <3>; + #size-cells = <2>; + bus-range = <0x00 0xff>; + device_type = "pci"; + ranges = <0x01000000 0 0x00000000 0 0xfe000000 0 0x00400000>, + <0x02000000 0 0x30000000 0 0x30000000 0 0x10000000>; + dma-ranges = <0x42000000 0 0x00000000 0 0x00000000 1 0x00000000>; + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 0 7>; + interrupt-map = <0 0 0 1 &gic GIC_SPI 416 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 2 &gic GIC_SPI 416 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 3 &gic GIC_SPI 416 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 4 &gic GIC_SPI 416 IRQ_TYPE_LEVEL_HIGH>; + snps,enable-cdm-check; + }; + }; diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml index d87e134968..dc05761c5c 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml @@ -33,11 +33,11 @@ properties: specific for each activated function, while the rest of the sub-spaces are common for all of them (if there are more than one). minItems: 2 - maxItems: 6 + maxItems: 7 reg-names: minItems: 2 - maxItems: 6 + maxItems: 7 interrupts: description: diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml index 8fc2151691..bbdb01d228 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml @@ -33,11 +33,11 @@ properties: normal controller functioning. iATU memory IO region is also required if the space is unrolled (IP-core version >= 4.80a). minItems: 2 - maxItems: 5 + maxItems: 7 reg-names: minItems: 2 - maxItems: 5 + maxItems: 7 items: oneOf: - description: diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml index 8bbdeb8821..022055edbf 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml @@ -42,11 +42,11 @@ properties: are required for the normal controller work. iATU memory IO region is also required if the space is unrolled (IP-core version >= 4.80a). minItems: 2 - maxItems: 5 + maxItems: 7 reg-names: minItems: 2 - maxItems: 5 + maxItems: 7 items: oneOf: - description: diff --git a/Documentation/devicetree/bindings/pci/xlnx,nwl-pcie.yaml b/Documentation/devicetree/bindings/pci/xlnx,nwl-pcie.yaml index 897602559b..426f90a47f 100644 --- a/Documentation/devicetree/bindings/pci/xlnx,nwl-pcie.yaml +++ b/Documentation/devicetree/bindings/pci/xlnx,nwl-pcie.yaml @@ -118,7 +118,7 @@ examples: compatible = "xlnx,nwl-pcie-2.11"; reg = <0x0 0xfd0e0000 0x0 0x1000>, <0x0 0xfd480000 0x0 0x1000>, - <0x80 0x00000000 0x0 0x1000000>; + <0x80 0x00000000 0x0 0x10000000>; reg-names = "breg", "pcireg", "cfg"; ranges = <0x02000000 0x0 0xe0000000 0x0 0xe0000000 0x0 0x10000000>, <0x43000000 0x00000006 0x0 0x00000006 0x0 0x00000002 0x0>; diff --git a/Documentation/devicetree/bindings/pci/xlnx,xdma-host.yaml b/Documentation/devicetree/bindings/pci/xlnx,xdma-host.yaml new file mode 100644 index 0000000000..0aa00b8e49 --- /dev/null +++ b/Documentation/devicetree/bindings/pci/xlnx,xdma-host.yaml @@ -0,0 +1,114 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pci/xlnx,xdma-host.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Xilinx XDMA PL PCIe Root Port Bridge + +maintainers: + - Thippeswamy Havalige + +allOf: + - $ref: /schemas/pci/pci-bus.yaml# + +properties: + compatible: + const: xlnx,xdma-host-3.00 + + reg: + maxItems: 1 + + ranges: + maxItems: 2 + + interrupts: + items: + - description: interrupt asserted when miscellaneous interrupt is received. + - description: msi0 interrupt asserted when an MSI is received. + - description: msi1 interrupt asserted when an MSI is received. + + interrupt-names: + items: + - const: misc + - const: msi0 + - const: msi1 + + interrupt-map-mask: + items: + - const: 0 + - const: 0 + - const: 0 + - const: 7 + + interrupt-map: + maxItems: 4 + + "#interrupt-cells": + const: 1 + + interrupt-controller: + description: identifies the node as an interrupt controller + type: object + properties: + interrupt-controller: true + + "#address-cells": + const: 0 + + "#interrupt-cells": + const: 1 + + required: + - interrupt-controller + - "#address-cells" + - "#interrupt-cells" + + additionalProperties: false + +required: + - compatible + - reg + - ranges + - interrupts + - interrupt-map + - interrupt-map-mask + - "#interrupt-cells" + - interrupt-controller + +unevaluatedProperties: false + +examples: + + - | + #include + #include + + soc { + #address-cells = <2>; + #size-cells = <2>; + pcie@a0000000 { + compatible = "xlnx,xdma-host-3.00"; + reg = <0x0 0xa0000000 0x0 0x10000000>; + ranges = <0x2000000 0x0 0xb0000000 0x0 0xb0000000 0x0 0x1000000>, + <0x43000000 0x5 0x0 0x5 0x0 0x0 0x1000000>; + #address-cells = <3>; + #size-cells = <2>; + #interrupt-cells = <1>; + device_type = "pci"; + interrupt-parent = <&gic>; + interrupts = , , + ; + interrupt-names = "misc", "msi0", "msi1"; + interrupt-map-mask = <0x0 0x0 0x0 0x7>; + interrupt-map = <0 0 0 1 &pcie_intc_0 0>, + <0 0 0 2 &pcie_intc_0 1>, + <0 0 0 3 &pcie_intc_0 2>, + <0 0 0 4 &pcie_intc_0 3>; + pcie_intc_0: interrupt-controller { + #address-cells = <0>; + #interrupt-cells = <1>; + interrupt-controller; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/perf/riscv,pmu.yaml b/Documentation/devicetree/bindings/perf/riscv,pmu.yaml index c8448de2f2..d01c677ad3 100644 --- a/Documentation/devicetree/bindings/perf/riscv,pmu.yaml +++ b/Documentation/devicetree/bindings/perf/riscv,pmu.yaml @@ -90,7 +90,7 @@ properties: bitmap of all MHPMCOUNTERx that can monitor the range of events dependencies: - "riscv,event-to-mhpmevent": [ "riscv,event-to-mhpmcounters" ] + riscv,event-to-mhpmevent: [ "riscv,event-to-mhpmcounters" ] required: - compatible diff --git a/Documentation/devicetree/bindings/phy/marvell,pxa1928-usb-phy.yaml b/Documentation/devicetree/bindings/phy/marvell,pxa1928-usb-phy.yaml new file mode 100644 index 0000000000..be33f036cd --- /dev/null +++ b/Documentation/devicetree/bindings/phy/marvell,pxa1928-usb-phy.yaml @@ -0,0 +1,47 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/marvell,pxa1928-usb-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Marvell PXA1928 USB/HSIC PHY + +maintainers: + - Duje Mihanović + +properties: + compatible: + enum: + - marvell,pxa1928-usb-phy + - marvell,pxa1928-hsic-phy + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + '#phy-cells': + const: 0 + + resets: + maxItems: 1 + +required: + - compatible + - reg + - clocks + - '#phy-cells' + +additionalProperties: false + +examples: + - | + #include + + usbphy: phy@7000 { + compatible = "marvell,pxa1928-usb-phy"; + reg = <0x7000 0xe0>; + clocks = <&apmu PXA1928_CLK_USB>; + #phy-cells = <0>; + }; diff --git a/Documentation/devicetree/bindings/phy/mediatek,dsi-phy.yaml b/Documentation/devicetree/bindings/phy/mediatek,dsi-phy.yaml index a63b20dfa4..6703689fcd 100644 --- a/Documentation/devicetree/bindings/phy/mediatek,dsi-phy.yaml +++ b/Documentation/devicetree/bindings/phy/mediatek,dsi-phy.yaml @@ -30,6 +30,7 @@ properties: - const: mediatek,mt8173-mipi-tx - items: - enum: + - mediatek,mt8188-mipi-tx - mediatek,mt8365-mipi-tx - const: mediatek,mt8183-mipi-tx - const: mediatek,mt2701-mipi-tx diff --git a/Documentation/devicetree/bindings/phy/mediatek,mt7628-usbphy.yaml b/Documentation/devicetree/bindings/phy/mediatek,mt7628-usbphy.yaml new file mode 100644 index 0000000000..ce2c228e32 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/mediatek,mt7628-usbphy.yaml @@ -0,0 +1,74 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/mediatek,mt7628-usbphy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Mediatek/Ralink USB PHY + +maintainers: + - Sergio Paracuellos + +properties: + compatible: + enum: + - mediatek,mt7620-usbphy + - mediatek,mt7628-usbphy + - ralink,rt3352-usbphy + + reg: + maxItems: 1 + + "#phy-cells": + const: 0 + + ralink,sysctl: + description: + phandle to a ralink syscon register region. + $ref: /schemas/types.yaml#/definitions/phandle + + resets: + items: + - description: USB Host reset controller + - description: USB Device reset controller + + reset-names: + items: + - const: host + - const: device + +required: + - compatible + - "#phy-cells" + - ralink,sysctl + - resets + - reset-names + +allOf: + - if: + properties: + compatible: + contains: + const: mediatek,mt7628-usbphy + then: + required: + - reg + else: + properties: + reg: false + +additionalProperties: false + +examples: + - | + phy@10120000 { + compatible = "mediatek,mt7628-usbphy"; + reg = <0x10120000 0x1000>; + #phy-cells = <0>; + ralink,sysctl = <&sysc>; + resets = <&rstctrl 22>, + <&rstctrl 25>; + reset-names = "host", "device"; + }; + +... diff --git a/Documentation/devicetree/bindings/phy/phy-stih407-usb.txt b/Documentation/devicetree/bindings/phy/phy-stih407-usb.txt deleted file mode 100644 index 35f03df001..0000000000 --- a/Documentation/devicetree/bindings/phy/phy-stih407-usb.txt +++ /dev/null @@ -1,24 +0,0 @@ -ST STiH407 USB PHY controller - -This file documents the dt bindings for the usb picoPHY driver which is the PHY for both USB2 and USB3 -host controllers (when controlling usb2/1.1 devices) available on STiH407 SoC family from STMicroelectronics. - -Required properties: -- compatible : should be "st,stih407-usb2-phy" -- st,syscfg : phandle of sysconfig bank plus integer array containing phyparam and phyctrl register offsets -- resets : list of phandle and reset specifier pairs. There should be two entries, one - for the whole phy and one for the port -- reset-names : list of reset signal names. Should be "global" and "port" -See: Documentation/devicetree/bindings/reset/st,stih407-powerdown.yaml -See: Documentation/devicetree/bindings/reset/reset.txt - -Example: - -usb2_picophy0: usbpicophy@f8 { - compatible = "st,stih407-usb2-phy"; - #phy-cells = <0>; - st,syscfg = <&syscfg_core 0x100 0xf4>; - resets = <&softreset STIH407_PICOPHY_SOFTRESET>, - <&picophyreset STIH407_PICOPHY0_RESET>; - reset-names = "global", "port"; -}; diff --git a/Documentation/devicetree/bindings/phy/pxa1928-usb-phy.txt b/Documentation/devicetree/bindings/phy/pxa1928-usb-phy.txt deleted file mode 100644 index da94426aa6..0000000000 --- a/Documentation/devicetree/bindings/phy/pxa1928-usb-phy.txt +++ /dev/null @@ -1,18 +0,0 @@ -* Marvell PXA1928 USB and HSIC PHYs - -Required properties: -- compatible: "marvell,pxa1928-usb-phy" or "marvell,pxa1928-hsic-phy" -- reg: base address and length of the registers -- clocks - A single clock. From common clock binding. -- #phys-cells: should be 0. From common phy binding. -- resets: reference to the reset controller - -Example: - - usbphy: phy@7000 { - compatible = "marvell,pxa1928-usb-phy"; - reg = <0x7000 0xe0>; - clocks = <&apmu_clocks PXA1928_CLK_USB>; - #phy-cells = <0>; - }; - diff --git a/Documentation/devicetree/bindings/phy/qcom,ipq5332-usb-hsphy.yaml b/Documentation/devicetree/bindings/phy/qcom,ipq5332-usb-hsphy.yaml index 2671a048c9..e77576d06c 100644 --- a/Documentation/devicetree/bindings/phy/qcom,ipq5332-usb-hsphy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,ipq5332-usb-hsphy.yaml @@ -17,7 +17,9 @@ description: properties: compatible: items: - - const: qcom,ipq5332-usb-hsphy + - enum: + - qcom,ipq5018-usb-hsphy + - qcom,ipq5332-usb-hsphy "#phy-cells": const: 0 diff --git a/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-usb3-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-usb3-phy.yaml deleted file mode 100644 index 827109d370..0000000000 --- a/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-usb3-phy.yaml +++ /dev/null @@ -1,287 +0,0 @@ -# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) -%YAML 1.2 ---- -$id: http://devicetree.org/schemas/phy/qcom,msm8996-qmp-usb3-phy.yaml# -$schema: http://devicetree.org/meta-schemas/core.yaml# - -title: Qualcomm QMP PHY controller (USB, MSM8996) - -maintainers: - - Vinod Koul - -description: - QMP PHY controller supports physical layer functionality for a number of - controllers on Qualcomm chipsets, such as, PCIe, UFS, and USB. - - Note that these bindings are for SoCs up to SC8180X. For newer SoCs, see - qcom,sc8280xp-qmp-usb3-uni-phy.yaml. - -properties: - compatible: - enum: - - qcom,ipq6018-qmp-usb3-phy - - qcom,ipq8074-qmp-usb3-phy - - qcom,msm8996-qmp-usb3-phy - - qcom,msm8998-qmp-usb3-phy - - qcom,sdm845-qmp-usb3-uni-phy - - qcom,sdx55-qmp-usb3-uni-phy - - qcom,sdx65-qmp-usb3-uni-phy - - qcom,sm8150-qmp-usb3-uni-phy - - qcom,sm8250-qmp-usb3-uni-phy - - qcom,sm8350-qmp-usb3-uni-phy - - reg: - items: - - description: serdes - - "#address-cells": - enum: [ 1, 2 ] - - "#size-cells": - enum: [ 1, 2 ] - - ranges: true - - clocks: - minItems: 3 - maxItems: 4 - - clock-names: - minItems: 3 - maxItems: 4 - - power-domains: - maxItems: 1 - - resets: - maxItems: 2 - - reset-names: - maxItems: 2 - - vdda-phy-supply: true - - vdda-pll-supply: true - - vddp-ref-clk-supply: true - -patternProperties: - "^phy@[0-9a-f]+$": - type: object - description: single PHY-provider child node - properties: - reg: - minItems: 3 - maxItems: 6 - - clocks: - items: - - description: PIPE clock - - clock-names: - deprecated: true - items: - - const: pipe0 - - "#clock-cells": - const: 0 - - clock-output-names: - maxItems: 1 - - "#phy-cells": - const: 0 - - required: - - reg - - clocks - - "#clock-cells" - - clock-output-names - - "#phy-cells" - - additionalProperties: false - -required: - - compatible - - reg - - "#address-cells" - - "#size-cells" - - ranges - - clocks - - clock-names - - resets - - reset-names - - vdda-phy-supply - - vdda-pll-supply - -additionalProperties: false - -allOf: - - if: - properties: - compatible: - contains: - enum: - - qcom,sdm845-qmp-usb3-uni-phy - then: - properties: - clocks: - maxItems: 4 - clock-names: - items: - - const: aux - - const: cfg_ahb - - const: ref - - const: com_aux - resets: - maxItems: 2 - reset-names: - items: - - const: phy - - const: common - - - if: - properties: - compatible: - contains: - enum: - - qcom,ipq8074-qmp-usb3-phy - - qcom,msm8996-qmp-usb3-phy - - qcom,msm8998-qmp-usb3-phy - - qcom,sdx55-qmp-usb3-uni-phy - - qcom,sdx65-qmp-usb3-uni-phy - then: - properties: - clocks: - maxItems: 3 - clock-names: - items: - - const: aux - - const: cfg_ahb - - const: ref - resets: - maxItems: 2 - reset-names: - items: - - const: phy - - const: common - - - if: - properties: - compatible: - contains: - enum: - - qcom,sm8150-qmp-usb3-uni-phy - - qcom,sm8250-qmp-usb3-uni-phy - - qcom,sm8350-qmp-usb3-uni-phy - then: - properties: - clocks: - maxItems: 4 - clock-names: - items: - - const: aux - - const: ref_clk_src - - const: ref - - const: com_aux - resets: - maxItems: 2 - reset-names: - items: - - const: phy - - const: common - - - if: - properties: - compatible: - contains: - enum: - - qcom,msm8998-qmp-usb3-phy - then: - patternProperties: - "^phy@[0-9a-f]+$": - properties: - reg: - items: - - description: TX lane 1 - - description: RX lane 1 - - description: PCS - - description: TX lane 2 - - description: RX lane 2 - - - if: - properties: - compatible: - contains: - enum: - - qcom,ipq6018-qmp-usb3-phy - - qcom,ipq8074-qmp-usb3-phy - - qcom,sdx55-qmp-usb3-uni-phy - - qcom,sdx65-qmp-usb3-uni-phy - - qcom,sm8150-qmp-usb3-uni-phy - then: - patternProperties: - "^phy@[0-9a-f]+$": - properties: - reg: - items: - - description: TX - - description: RX - - description: PCS - - description: PCS_MISC - - - if: - properties: - compatible: - contains: - enum: - - qcom,msm8996-qmp-usb3-phy - - qcom,sm8250-qmp-usb3-uni-phy - - qcom,sm8350-qmp-usb3-uni-phy - then: - patternProperties: - "^phy@[0-9a-f]+$": - properties: - reg: - items: - - description: TX - - description: RX - - description: PCS - -examples: - - | - #include - usb_2_qmpphy: phy-wrapper@88eb000 { - compatible = "qcom,sdm845-qmp-usb3-uni-phy"; - reg = <0x088eb000 0x18c>; - #address-cells = <1>; - #size-cells = <1>; - ranges = <0x0 0x088eb000 0x2000>; - - clocks = <&gcc GCC_USB3_SEC_PHY_AUX_CLK >, - <&gcc GCC_USB_PHY_CFG_AHB2PHY_CLK>, - <&gcc GCC_USB3_SEC_CLKREF_CLK>, - <&gcc GCC_USB3_SEC_PHY_COM_AUX_CLK>; - clock-names = "aux", "cfg_ahb", "ref", "com_aux"; - - resets = <&gcc GCC_USB3PHY_PHY_SEC_BCR>, - <&gcc GCC_USB3_PHY_SEC_BCR>; - reset-names = "phy", "common"; - - vdda-phy-supply = <&vdda_usb2_ss_1p2>; - vdda-pll-supply = <&vdda_usb2_ss_core>; - - usb_2_ssphy: phy@200 { - reg = <0x200 0x128>, - <0x400 0x1fc>, - <0x800 0x218>, - <0x600 0x70>; - - clocks = <&gcc GCC_USB3_SEC_PHY_PIPE_CLK>; - - #clock-cells = <0>; - clock-output-names = "usb3_uni_phy_pipe_clk_src"; - - #phy-cells = <0>; - }; - }; diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml index d981d77e82..f3a3296c81 100644 --- a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml @@ -19,6 +19,7 @@ properties: - qcom,msm8996-qmp-ufs-phy - qcom,msm8998-qmp-ufs-phy - qcom,sa8775p-qmp-ufs-phy + - qcom,sc7280-qmp-ufs-phy - qcom,sc8180x-qmp-ufs-phy - qcom,sc8280xp-qmp-ufs-phy - qcom,sdm845-qmp-ufs-phy @@ -85,6 +86,7 @@ allOf: contains: enum: - qcom,sa8775p-qmp-ufs-phy + - qcom,sc7280-qmp-ufs-phy - qcom,sm8450-qmp-ufs-phy then: properties: diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml index f99fbbcd68..57702f7f2a 100644 --- a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml @@ -16,20 +16,34 @@ description: properties: compatible: enum: + - qcom,ipq6018-qmp-usb3-phy + - qcom,ipq8074-qmp-usb3-phy - qcom,ipq9574-qmp-usb3-phy + - qcom,msm8996-qmp-usb3-phy + - qcom,msm8998-qmp-usb3-phy - qcom,qcm2290-qmp-usb3-phy - qcom,sa8775p-qmp-usb3-uni-phy - qcom,sc8280xp-qmp-usb3-uni-phy + - qcom,sdm845-qmp-usb3-uni-phy + - qcom,sdx55-qmp-usb3-uni-phy + - qcom,sdx65-qmp-usb3-uni-phy + - qcom,sdx75-qmp-usb3-uni-phy - qcom,sm6115-qmp-usb3-phy + - qcom,sm8150-qmp-usb3-uni-phy + - qcom,sm8250-qmp-usb3-uni-phy + - qcom,sm8350-qmp-usb3-uni-phy + reg: maxItems: 1 clocks: - maxItems: 4 + minItems: 4 + maxItems: 5 clock-names: - maxItems: 4 + minItems: 4 + maxItems: 5 power-domains: maxItems: 1 @@ -74,9 +88,18 @@ allOf: compatible: contains: enum: + - qcom,ipq6018-qmp-usb3-phy + - qcom,ipq8074-qmp-usb3-phy - qcom,ipq9574-qmp-usb3-phy + - qcom,msm8996-qmp-usb3-phy + - qcom,msm8998-qmp-usb3-phy + - qcom,sdx55-qmp-usb3-uni-phy + - qcom,sdx65-qmp-usb3-uni-phy + - qcom,sdx75-qmp-usb3-uni-phy then: properties: + clocks: + maxItems: 4 clock-names: items: - const: aux @@ -109,6 +132,9 @@ allOf: enum: - qcom,sa8775p-qmp-usb3-uni-phy - qcom,sc8280xp-qmp-usb3-uni-phy + - qcom,sm8150-qmp-usb3-uni-phy + - qcom,sm8250-qmp-usb3-uni-phy + - qcom,sm8350-qmp-usb3-uni-phy then: properties: clocks: @@ -119,6 +145,33 @@ allOf: - const: ref - const: com_aux - const: pipe + + - if: + properties: + compatible: + contains: + enum: + - qcom,sdm845-qmp-usb3-uni-phy + then: + properties: + clocks: + maxItems: 5 + clock-names: + items: + - const: aux + - const: cfg_ahb + - const: ref + - const: com_aux + - const: pipe + + - if: + properties: + compatible: + contains: + enum: + - qcom,sa8775p-qmp-usb3-uni-phy + - qcom,sc8280xp-qmp-usb3-uni-phy + then: required: - power-domains diff --git a/Documentation/devicetree/bindings/phy/qcom,snps-eusb2-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,snps-eusb2-phy.yaml index c53bab107b..c95828607a 100644 --- a/Documentation/devicetree/bindings/phy/qcom,snps-eusb2-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,snps-eusb2-phy.yaml @@ -14,7 +14,12 @@ description: properties: compatible: - const: qcom,sm8550-snps-eusb2-phy + oneOf: + - items: + - enum: + - qcom,sdx75-snps-eusb2-phy + - const: qcom,sm8550-snps-eusb2-phy + - const: qcom,sm8550-snps-eusb2-phy reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/phy/ralink-usb-phy.txt b/Documentation/devicetree/bindings/phy/ralink-usb-phy.txt deleted file mode 100644 index 9d2868a437..0000000000 --- a/Documentation/devicetree/bindings/phy/ralink-usb-phy.txt +++ /dev/null @@ -1,23 +0,0 @@ -Mediatek/Ralink USB PHY - -Required properties: - - compatible: "ralink,rt3352-usbphy" - "mediatek,mt7620-usbphy" - "mediatek,mt7628-usbphy" - - reg: required for "mediatek,mt7628-usbphy", unused otherwise - - #phy-cells: should be 0 - - ralink,sysctl: a phandle to a ralink syscon register region - - resets: the two reset controllers for host and device - - reset-names: the names of the 2 reset controllers - -Example: - -usbphy: phy { - compatible = "mediatek,mt7628-usbphy"; - reg = <0x10120000 0x1000>; - #phy-cells = <0>; - - ralink,sysctl = <&sysc>; - resets = <&rstctrl 22 &rstctrl 25>; - reset-names = "host", "device"; -}; diff --git a/Documentation/devicetree/bindings/phy/st,stih407-usb2-phy.yaml b/Documentation/devicetree/bindings/phy/st,stih407-usb2-phy.yaml new file mode 100644 index 0000000000..e45cdd98aa --- /dev/null +++ b/Documentation/devicetree/bindings/phy/st,stih407-usb2-phy.yaml @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/st,stih407-usb2-phy.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: STMicroelectronics STiH407 USB PHY controller + +maintainers: + - Patrice Chotard + +description: + The USB picoPHY device is the PHY for both USB2 and USB3 host controllers + (when controlling usb2/1.1 devices) available on STiH407 SoC family from + STMicroelectronics. + +properties: + compatible: + const: st,stih407-usb2-phy + + st,syscfg: + description: Phandle to the syscfg bank + $ref: /schemas/types.yaml#/definitions/phandle-array + items: + - items: + - description: phandle to syscfg + - description: phyparam register offset + - description: phyctrl register offset + + resets: + items: + - description: Phandle and reset specifier pair for the whole phy. + - description: Phandle and reset specifier pair for the port. + + reset-names: + items: + - const: global + - const: port + + "#phy-cells": + const: 0 + +required: + - compatible + - st,syscfg + - resets + - reset-names + - "#phy-cells" + +additionalProperties: false + +examples: + - | + #include + usb-phy { + compatible = "st,stih407-usb2-phy"; + #phy-cells = <0>; + st,syscfg = <&syscfg_core 0x100 0xf4>; + resets = <&softreset STIH407_PICOPHY_SOFTRESET>, + <&picophyreset STIH407_PICOPHY0_RESET>; + reset-names = "global", "port"; + }; +... diff --git a/Documentation/devicetree/bindings/pinctrl/amlogic,meson-pinctrl-a1.yaml b/Documentation/devicetree/bindings/pinctrl/amlogic,meson-pinctrl-a1.yaml index 4e7a456ea4..c7df4cd341 100644 --- a/Documentation/devicetree/bindings/pinctrl/amlogic,meson-pinctrl-a1.yaml +++ b/Documentation/devicetree/bindings/pinctrl/amlogic,meson-pinctrl-a1.yaml @@ -16,6 +16,7 @@ properties: compatible: enum: - amlogic,c3-periphs-pinctrl + - amlogic,t7-periphs-pinctrl - amlogic,meson-a1-periphs-pinctrl - amlogic,meson-s4-periphs-pinctrl diff --git a/Documentation/devicetree/bindings/pinctrl/brcm,bcm6318-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/brcm,bcm6318-pinctrl.yaml index 4478a76171..62890a0aea 100644 --- a/Documentation/devicetree/bindings/pinctrl/brcm,bcm6318-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/brcm,bcm6318-pinctrl.yaml @@ -24,6 +24,7 @@ patternProperties: '-pins$': type: object $ref: pinmux-node.yaml# + additionalProperties: false properties: function: @@ -37,6 +38,10 @@ patternProperties: enum: [ gpio0, gpio1, gpio2, gpio3, gpio4, gpio5, gpio6, gpio7, gpio8, gpio9, gpio10, gpio11, gpio12, gpio13, gpio40 ] + patternProperties: + '-pins$': + $ref: '#/patternProperties/-pins$' + allOf: - $ref: pinctrl.yaml# diff --git a/Documentation/devicetree/bindings/pinctrl/brcm,bcm63268-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/brcm,bcm63268-pinctrl.yaml index 73e1caa7c0..7cc0e16508 100644 --- a/Documentation/devicetree/bindings/pinctrl/brcm,bcm63268-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/brcm,bcm63268-pinctrl.yaml @@ -24,6 +24,7 @@ patternProperties: '-pins$': type: object $ref: pinmux-node.yaml# + unevaluatedProperties: false properties: function: @@ -36,11 +37,15 @@ patternProperties: pins: enum: [ gpio0, gpio1, gpio16, gpio17, gpio8, gpio9, gpio18, gpio19, - gpio22, gpio23, gpio30, gpio31, nand_grp, gpio35 + gpio22, gpio23, gpio30, gpio31, nand_grp, gpio35, dectpd_grp, vdsl_phy_override_0_grp, vdsl_phy_override_1_grp, vdsl_phy_override_2_grp, vdsl_phy_override_3_grp, dsl_gpio8, dsl_gpio9 ] + patternProperties: + '-pins$': + $ref: '#/patternProperties/-pins$' + allOf: - $ref: pinctrl.yaml# @@ -122,46 +127,46 @@ examples: pinctrl_nand: nand-pins { function = "nand"; - group = "nand_grp"; + pins = "nand_grp"; }; pinctrl_gpio35_alt: gpio35_alt-pins { function = "gpio35_alt"; - pin = "gpio35"; + pins = "gpio35"; }; pinctrl_dectpd: dectpd-pins { function = "dectpd"; - group = "dectpd_grp"; + pins = "dectpd_grp"; }; pinctrl_vdsl_phy_override_0: vdsl_phy_override_0-pins { function = "vdsl_phy_override_0"; - group = "vdsl_phy_override_0_grp"; + pins = "vdsl_phy_override_0_grp"; }; pinctrl_vdsl_phy_override_1: vdsl_phy_override_1-pins { function = "vdsl_phy_override_1"; - group = "vdsl_phy_override_1_grp"; + pins = "vdsl_phy_override_1_grp"; }; pinctrl_vdsl_phy_override_2: vdsl_phy_override_2-pins { function = "vdsl_phy_override_2"; - group = "vdsl_phy_override_2_grp"; + pins = "vdsl_phy_override_2_grp"; }; pinctrl_vdsl_phy_override_3: vdsl_phy_override_3-pins { function = "vdsl_phy_override_3"; - group = "vdsl_phy_override_3_grp"; + pins = "vdsl_phy_override_3_grp"; }; pinctrl_dsl_gpio8: dsl_gpio8-pins { function = "dsl_gpio8"; - group = "dsl_gpio8"; + pins = "dsl_gpio8"; }; pinctrl_dsl_gpio9: dsl_gpio9-pins { function = "dsl_gpio9"; - group = "dsl_gpio9"; + pins = "dsl_gpio9"; }; }; diff --git a/Documentation/devicetree/bindings/pinctrl/brcm,bcm6328-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/brcm,bcm6328-pinctrl.yaml index 2750ba42ae..f57bb34c31 100644 --- a/Documentation/devicetree/bindings/pinctrl/brcm,bcm6328-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/brcm,bcm6328-pinctrl.yaml @@ -24,6 +24,7 @@ patternProperties: '-pins$': type: object $ref: pinmux-node.yaml# + unevaluatedProperties: false properties: function: @@ -36,6 +37,10 @@ patternProperties: gpio20, gpio25, gpio26, gpio27, gpio28, hsspi_cs1, usb_port1 ] + patternProperties: + '-pins$': + $ref: '#/patternProperties/-pins$' + allOf: - $ref: pinctrl.yaml# diff --git a/Documentation/devicetree/bindings/pinctrl/brcm,bcm6358-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/brcm,bcm6358-pinctrl.yaml index 2f6c540498..ce6fc5380c 100644 --- a/Documentation/devicetree/bindings/pinctrl/brcm,bcm6358-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/brcm,bcm6358-pinctrl.yaml @@ -24,15 +24,16 @@ patternProperties: '-pins$': type: object $ref: pinmux-node.yaml# + unevaluatedProperties: false properties: function: enum: [ ebi_cs, uart1, serial_led, legacy_led, led, spi_cs, utopia, pwm_syn_clk, sys_irq ] - pins: + groups: enum: [ ebi_cs_grp, uart1_grp, serial_led_grp, legacy_led_grp, - led_grp, spi_cs_grp, utopia_grp, pwm_syn_clk, sys_irq_grp ] + led_grp, spi_cs_grp, utopia_grp, pwm_syn_clk_grp, sys_irq_grp ] allOf: - $ref: pinctrl.yaml# diff --git a/Documentation/devicetree/bindings/pinctrl/brcm,bcm6362-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/brcm,bcm6362-pinctrl.yaml index b3044f8057..5f7ed7d3dd 100644 --- a/Documentation/devicetree/bindings/pinctrl/brcm,bcm6362-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/brcm,bcm6362-pinctrl.yaml @@ -24,6 +24,7 @@ patternProperties: '-pins$': type: object $ref: pinmux-node.yaml# + unevaluatedProperties: false properties: function: @@ -41,6 +42,10 @@ patternProperties: gpio15, gpio16, gpio17, gpio18, gpio19, gpio20, gpio21, gpio22, gpio23, gpio24, gpio25, gpio26, gpio27, nand_grp ] + patternProperties: + '-pins$': + $ref: '#/patternProperties/-pins$' + allOf: - $ref: pinctrl.yaml# @@ -204,6 +209,6 @@ examples: pinctrl_nand: nand-pins { function = "nand"; - group = "nand_grp"; + pins = "nand_grp"; }; }; diff --git a/Documentation/devicetree/bindings/pinctrl/brcm,bcm6368-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/brcm,bcm6368-pinctrl.yaml index 3236871827..d549e94550 100644 --- a/Documentation/devicetree/bindings/pinctrl/brcm,bcm6368-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/brcm,bcm6368-pinctrl.yaml @@ -24,6 +24,7 @@ patternProperties: '-pins$': type: object $ref: pinmux-node.yaml# + unevaluatedProperties: false properties: function: @@ -42,6 +43,10 @@ patternProperties: gpio24, gpio25, gpio26, gpio27, gpio28, gpio29, gpio30, gpio31, uart1_grp ] + patternProperties: + '-pins$': + $ref: '#/patternProperties/-pins$' + allOf: - $ref: pinctrl.yaml# @@ -215,6 +220,6 @@ examples: pinctrl_uart1: uart1-pins { function = "uart1"; - group = "uart1_grp"; + pins = "uart1_grp"; }; }; diff --git a/Documentation/devicetree/bindings/pinctrl/nuvoton,npcm845-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/nuvoton,npcm845-pinctrl.yaml new file mode 100644 index 0000000000..3e84728988 --- /dev/null +++ b/Documentation/devicetree/bindings/pinctrl/nuvoton,npcm845-pinctrl.yaml @@ -0,0 +1,217 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pinctrl/nuvoton,npcm845-pinctrl.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Nuvoton NPCM845 Pin Controller and GPIO + +maintainers: + - Tomer Maimon + +description: + The Nuvoton BMC NPCM8XX Pin Controller multi-function routed through + the multiplexing block, Each pin supports GPIO functionality (GPIOx) + and multiple functions that directly connect the pin to different + hardware blocks. + +properties: + compatible: + const: nuvoton,npcm845-pinctrl + + ranges: + maxItems: 1 + + '#address-cells': + const: 1 + + '#size-cells': + const: 1 + + nuvoton,sysgcr: + $ref: /schemas/types.yaml#/definitions/phandle + description: a phandle to access GCR registers. + +patternProperties: + '^gpio@': + type: object + additionalProperties: false + + description: + Eight GPIO banks that each contain 32 GPIOs. + + properties: + gpio-controller: true + + '#gpio-cells': + const: 2 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + gpio-ranges: + maxItems: 1 + + required: + - gpio-controller + - '#gpio-cells' + - reg + - interrupts + - gpio-ranges + + '-mux$': + $ref: pinmux-node.yaml# + + properties: + groups: + description: + One or more groups of pins to mux to a certain function + items: + enum: [ iox1, iox2, smb1d, smb2d, lkgpo1, lkgpo2, ioxh, gspi, + smb5b, smb5c, lkgpo0, pspi, jm1, jm2, smb4den, smb4b, + smb4c, smb15, smb16, smb17, smb18, smb19, smb20, smb21, + smb22, smb23, smb23b, smb4d, smb14, smb5, smb4, smb3, + spi0cs1, spi0cs2, spi0cs3, spi1cs0, spi1cs1, spi1cs2, + spi1cs3, spi1cs23, smb3c, smb3b, bmcuart0a, uart1, jtag2, + bmcuart1, uart2, sg1mdio, bmcuart0b, r1err, r1md, r1oen, + r2oen, rmii3, r3oen, smb3d, fanin0, fanin1, fanin2, fanin3, + fanin4, fanin5, fanin6, fanin7, fanin8, fanin9, fanin10, + fanin11, fanin12, fanin13, fanin14, fanin15, pwm0, pwm1, pwm2, + pwm3, r2, r2err, r2md, r3rxer, ga20kbc, smb5d, lpc, espi, rg2, + ddr, i3c0, i3c1, i3c2, i3c3, i3c4, i3c5, smb0, smb1, smb2, + smb2c, smb2b, smb1c, smb1b, smb8, smb9, smb10, smb11, sd1, + sd1pwr, pwm4, pwm5, pwm6, pwm7, pwm8, pwm9, pwm10, pwm11, + mmc8, mmc, mmcwp, mmccd, mmcrst, clkout, serirq, lpcclk, + scipme, smi, smb6, smb7, spi1, faninx, r1, spi3, spi3cs1, + spi3quad, spi3cs2, spi3cs3, nprd_smi, smb0b, smb0c, smb0den, + smb0d, ddc, rg2mdio, wdog1, wdog2, smb12, smb13, spix, + spixcs1, clkreq, hgpio0, hgpio1, hgpio2, hgpio3, hgpio4, + hgpio5, hgpio6, hgpio7 ] + + function: + description: + The function that a group of pins is muxed to + enum: [ iox1, iox2, smb1d, smb2d, lkgpo1, lkgpo2, ioxh, gspi, + smb5b, smb5c, lkgpo0, pspi, jm1, jm2, smb4den, smb4b, + smb4c, smb15, smb16, smb17, smb18, smb19, smb20, smb21, + smb22, smb23, smb23b, smb4d, smb14, smb5, smb4, smb3, + spi0cs1, spi0cs2, spi0cs3, spi1cs0, spi1cs1, spi1cs2, + spi1cs3, spi1cs23, smb3c, smb3b, bmcuart0a, uart1, jtag2, + bmcuart1, uart2, sg1mdio, bmcuart0b, r1err, r1md, r1oen, + r2oen, rmii3, r3oen, smb3d, fanin0, fanin1, fanin2, fanin3, + fanin4, fanin5, fanin6, fanin7, fanin8, fanin9, fanin10, + fanin11, fanin12, fanin13, fanin14, fanin15, pwm0, pwm1, pwm2, + pwm3, r2, r2err, r2md, r3rxer, ga20kbc, smb5d, lpc, espi, rg2, + ddr, i3c0, i3c1, i3c2, i3c3, i3c4, i3c5, smb0, smb1, smb2, + smb2c, smb2b, smb1c, smb1b, smb8, smb9, smb10, smb11, sd1, + sd1pwr, pwm4, pwm5, pwm6, pwm7, pwm8, pwm9, pwm10, pwm11, + mmc8, mmc, mmcwp, mmccd, mmcrst, clkout, serirq, lpcclk, + scipme, smi, smb6, smb7, spi1, faninx, r1, spi3, spi3cs1, + spi3quad, spi3cs2, spi3cs3, nprd_smi, smb0b, smb0c, smb0den, + smb0d, ddc, rg2mdio, wdog1, wdog2, smb12, smb13, spix, + spixcs1, clkreq, hgpio0, hgpio1, hgpio2, hgpio3, hgpio4, + hgpio5, hgpio6, hgpio7 ] + + dependencies: + groups: [ function ] + function: [ groups ] + + additionalProperties: false + + '^pin': + $ref: pincfg-node.yaml# + + properties: + pins: + description: + A list of pins to configure in certain ways, such as enabling + debouncing + items: + pattern: '^GPIO([0-9]|[0-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-6])' + + bias-disable: true + + bias-pull-up: true + + bias-pull-down: true + + input-enable: true + + output-low: true + + output-high: true + + drive-push-pull: true + + drive-open-drain: true + + input-debounce: + description: + Debouncing periods in microseconds, one period per interrupt + bank found in the controller + $ref: /schemas/types.yaml#/definitions/uint32-array + minItems: 1 + maxItems: 4 + + slew-rate: + description: | + 0: Low rate + 1: High rate + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1] + + drive-strength: + enum: [ 0, 1, 2, 4, 8, 12 ] + + additionalProperties: false + +allOf: + - $ref: pinctrl.yaml# + +required: + - compatible + - ranges + - '#address-cells' + - '#size-cells' + - nuvoton,sysgcr + +additionalProperties: false + +examples: + - | + #include + #include + + soc { + #address-cells = <2>; + #size-cells = <2>; + + pinctrl: pinctrl@f0010000 { + compatible = "nuvoton,npcm845-pinctrl"; + ranges = <0x0 0x0 0xf0010000 0x8000>; + #address-cells = <1>; + #size-cells = <1>; + nuvoton,sysgcr = <&gcr>; + + gpio0: gpio@0 { + gpio-controller; + #gpio-cells = <2>; + reg = <0x0 0xb0>; + interrupts = ; + gpio-ranges = <&pinctrl 0 0 32>; + }; + + fanin0_pin: fanin0-mux { + groups = "fanin0"; + function = "fanin0"; + }; + + pin34_slew: pin34-slew { + pins = "GPIO34/I3C4_SDA"; + bias-disable; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/pinctrl/nxp,s32g2-siul2-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/nxp,s32g2-siul2-pinctrl.yaml index d49aafd8c5..a24286e4de 100644 --- a/Documentation/devicetree/bindings/pinctrl/nxp,s32g2-siul2-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/nxp,s32g2-siul2-pinctrl.yaml @@ -9,7 +9,7 @@ title: NXP S32G2 pin controller maintainers: - Ghennadi Procopciuc - - Chester Lin + - Chester Lin description: | S32G2 pinmux is implemented in SIUL2 (System Integration Unit Lite2), diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,mdm9607-tlmm.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,mdm9607-tlmm.yaml index 2aedb7e7bc..5ece3b9d67 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,mdm9607-tlmm.yaml +++ b/Documentation/devicetree/bindings/pinctrl/qcom,mdm9607-tlmm.yaml @@ -43,7 +43,8 @@ patternProperties: "-state$": oneOf: - $ref: "#/$defs/qcom-mdm9607-tlmm-state" - - patternProperties: + - additionalProperties: false + patternProperties: ".*": $ref: "#/$defs/qcom-mdm9607-tlmm-state" diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,msm8226-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,msm8226-pinctrl.yaml index 9efb765095..a602bf0d27 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,msm8226-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/qcom,msm8226-pinctrl.yaml @@ -67,8 +67,8 @@ $defs: Specify the alternative function to be configured for the specified pins. Functions are only valid for gpio pins. enum: [ gpio, cci_i2c0, blsp_uim1, blsp_uim2, blsp_uim3, blsp_uim5, - blsp_i2c1, blsp_i2c2, blsp_i2c3, blsp_i2c4, blsp_i2c5, blsp_spi1, - blsp_spi2, blsp_spi3, blsp_spi5, blsp_uart1, blsp_uart2, + blsp_i2c1, blsp_i2c2, blsp_i2c3, blsp_i2c4, blsp_i2c5, blsp_i2c6, + blsp_spi1, blsp_spi2, blsp_spi3, blsp_spi5, blsp_uart1, blsp_uart2, blsp_uart3, blsp_uart4, blsp_uart5, cam_mclk0, cam_mclk1, gp0_clk, gp1_clk, sdc3, wlan ] diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,sa8775p-tlmm.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,sa8775p-tlmm.yaml index e119a226a4..2173c52556 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,sa8775p-tlmm.yaml +++ b/Documentation/devicetree/bindings/pinctrl/qcom,sa8775p-tlmm.yaml @@ -28,6 +28,7 @@ properties: gpio-controller: true "#gpio-cells": true gpio-ranges: true + wakeup-parent: true gpio-reserved-ranges: minItems: 1 diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,sc7280-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,sc7280-pinctrl.yaml index 368d44ff54..c8735ab97e 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,sc7280-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/qcom,sc7280-pinctrl.yaml @@ -41,6 +41,10 @@ properties: gpio-ranges: maxItems: 1 + gpio-reserved-ranges: + minItems: 1 + maxItems: 88 + gpio-line-names: maxItems: 175 diff --git a/Documentation/devicetree/bindings/pinctrl/realtek,rtd1315e-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/realtek,rtd1315e-pinctrl.yaml new file mode 100644 index 0000000000..fc6c65fea7 --- /dev/null +++ b/Documentation/devicetree/bindings/pinctrl/realtek,rtd1315e-pinctrl.yaml @@ -0,0 +1,188 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +# Copyright 2023 Realtek Semiconductor Corporation +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pinctrl/realtek,rtd1315e-pinctrl.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Realtek DHC RTD1315E Pin Controller + +maintainers: + - TY Chang + +description: + The Realtek DHC RTD1315E is a high-definition media processor SoC. The + RTD1315E pin controller is used to control pin function, pull up/down + resistor, drive strength, schmitt trigger and power source. + +properties: + compatible: + const: realtek,rtd1315e-pinctrl + + reg: + maxItems: 1 + +patternProperties: + '-pins$': + type: object + allOf: + - $ref: pincfg-node.yaml# + - $ref: pinmux-node.yaml# + + properties: + pins: + items: + enum: [ gpio_0, gpio_1, emmc_rst_n, emmc_dd_sb, emmc_clk, emmc_cmd, + gpio_6, gpio_7, gpio_8, gpio_9, gpio_10, gpio_11, gpio_12, + gpio_13, gpio_14, gpio_15, gpio_16, gpio_17, gpio_18, gpio_19, + gpio_20, emmc_data_0, emmc_data_1, emmc_data_2, usb_cc2, gpio_25, + gpio_26, gpio_27, gpio_28, gpio_29, gpio_30, gpio_31, gpio_32, + gpio_33, gpio_34, gpio_35, hif_data, hif_en, hif_rdy, hif_clk, + gpio_dummy_40, gpio_dummy_41, gpio_dummy_42, gpio_dummy_43, + gpio_dummy_44, gpio_dummy_45, gpio_46, gpio_47, gpio_48, gpio_49, + gpio_50, usb_cc1, emmc_data_3, emmc_data_4, ir_rx, ur0_rx, ur0_tx, + gpio_57, gpio_58, gpio_59, gpio_60, gpio_61, gpio_62, gpio_dummy_63, + gpio_dummy_64, gpio_dummy_65, gpio_66, gpio_67, gpio_68, gpio_69, + gpio_70, gpio_71, gpio_72, gpio_dummy_73, emmc_data_5, emmc_data_6, + emmc_data_7, gpio_dummy_77, gpio_78, gpio_79, gpio_80, gpio_81, + ur2_loc, gspi_loc, hi_width, sf_en, arm_trace_dbg_en, + ejtag_aucpu_loc, ejtag_acpu_loc, ejtag_vcpu_loc, ejtag_scpu_loc, + dmic_loc, vtc_dmic_loc, vtc_tdm_loc, vtc_i2si_loc, tdm_ai_loc, + ai_loc, spdif_loc, hif_en_loc, scan_switch, wd_rset, boot_sel, + reset_n, testmode ] + + function: + enum: [ gpio, nf, emmc, ao, gspi_loc0, gspi_loc1, uart0, uart1, + uart2_loc0, uart2_loc1, i2c0, i2c1, i2c4, i2c5, pcie1, + etn_led, etn_phy, spi, pwm0_loc0, pwm0_loc1, pwm1_loc0, + pwm1_loc1, pwm2_loc0, pwm2_loc1, pwm3_loc0, pwm3_loc1, + spdif_optical_loc0, spdif_optical_loc1, usb_cc1, usb_cc2, + sd, dmic_loc0, dmic_loc1, ai_loc0, ai_loc1, tdm_ai_loc0, + tdm_ai_loc1, hi_loc0, hi_m, vtc_i2so, vtc_i2si_loc0, + vtc_i2si_loc1, vtc_dmic_loc0, vtc_dmic_loc1, vtc_tdm_loc0, + vtc_tdm_loc1, dc_fan, pll_test_loc0, pll_test_loc1, + ir_rx, uart2_disable, gspi_disable, hi_width_disable, + hi_width_1bit, sf_disable, sf_enable, scpu_ejtag_loc0, + scpu_ejtag_loc1, scpu_ejtag_loc2, scpu_ejtag_loc3, + acpu_ejtag_loc0, acpu_ejtag_loc1, acpu_ejtag_loc2, + vcpu_ejtag_loc0, vcpu_ejtag_loc1, vcpu_ejtag_loc2, + aucpu_ejtag_loc0, aucpu_ejtag_loc1, aucpu_ejtag_loc2, + gpu_ejtag, iso_tristate, dbg_out0, dbg_out1, standby_dbg, + spdif, arm_trace_debug_disable, arm_trace_debug_enable, + aucpu_ejtag_disable, acpu_ejtag_disable, vcpu_ejtag_disable, + scpu_ejtag_disable, vtc_dmic_loc_disable, vtc_tdm_disable, + vtc_i2si_disable, tdm_ai_disable, ai_disable, spdif_disable, + hif_disable, hif_enable, test_loop, pmic_pwrup ] + + drive-strength: + enum: [4, 8] + + bias-pull-down: true + + bias-pull-up: true + + bias-disable: true + + input-schmitt-enable: true + + input-schmitt-disable: true + + drive-push-pull: true + + power-source: + description: | + Valid arguments are described as below: + 0: power supply of 1.8V + 1: power supply of 3.3V + enum: [0, 1] + + realtek,drive-strength-p: + description: | + Some of pins can be driven using the P-MOS and N-MOS transistor to + achieve finer adjustments. The block-diagram representation is as + follows: + VDD + | + ||--+ + +-----o|| P-MOS-FET + | ||--+ + IN --+ +----- out + | ||--+ + +------|| N-MOS-FET + ||--+ + | + GND + The driving strength of the P-MOS/N-MOS transistors impacts the + waveform's rise/fall times. Greater driving strength results in + shorter rise/fall times. Each P-MOS and N-MOS transistor offers + 8 configurable levels (0 to 7), with higher values indicating + greater driving strength, contributing to achieving the desired + speed. + + The realtek,drive-strength-p is used to control the driving strength + of the P-MOS output. + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 0 + maximum: 7 + + realtek,drive-strength-n: + description: | + Similar to the realtek,drive-strength-p, the realtek,drive-strength-n + is used to control the driving strength of the N-MOS output. + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 0 + maximum: 7 + + realtek,duty-cycle: + description: | + An integer describing the level to adjust output duty cycle, controlling + the proportion of positive and negative waveforms in nanoseconds. + Valid arguments are described as below: + 0: 0ns + 2: + 0.25ns + 3: + 0.5ns + 4: -0.25ns + 5: -0.5ns + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [ 0, 2, 3, 4, 5 ] + + required: + - pins + + additionalProperties: false + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + pinctrl@4e000 { + compatible = "realtek,rtd1315e-pinctrl"; + reg = <0x4e000 0x130>; + + emmc-hs200-pins { + pins = "emmc_clk", + "emmc_cmd", + "emmc_data_0", + "emmc_data_1", + "emmc_data_2", + "emmc_data_3", + "emmc_data_4", + "emmc_data_5", + "emmc_data_6", + "emmc_data_7"; + function = "emmc"; + realtek,drive-strength-p = <0x2>; + realtek,drive-strength-n = <0x2>; + }; + + i2c-0-pins { + pins = "gpio_12", + "gpio_13"; + function = "i2c0"; + drive-strength = <4>; + }; + }; diff --git a/Documentation/devicetree/bindings/pinctrl/realtek,rtd1319d-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/realtek,rtd1319d-pinctrl.yaml new file mode 100644 index 0000000000..f07361d60a --- /dev/null +++ b/Documentation/devicetree/bindings/pinctrl/realtek,rtd1319d-pinctrl.yaml @@ -0,0 +1,187 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +# Copyright 2023 Realtek Semiconductor Corporation +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pinctrl/realtek,rtd1319d-pinctrl.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Realtek DHC RTD1319D Pin Controller + +maintainers: + - TY Chang + +description: + The Realtek DHC RTD1319D is a high-definition media processor SoC. The + RTD1319D pin controller is used to control pin function, pull up/down + resistor, drive strength, schmitt trigger and power source. + +properties: + compatible: + const: realtek,rtd1319d-pinctrl + + reg: + maxItems: 1 + +patternProperties: + '-pins$': + type: object + allOf: + - $ref: pincfg-node.yaml# + - $ref: pinmux-node.yaml# + + properties: + pins: + items: + enum: [ gpio_0, gpio_1, gpio_2, gpio_3, gpio_4, gpio_5, gpio_6, gpio_7, + gpio_8, gpio_9, gpio_10, gpio_11, gpio_12, gpio_13, gpio_14, + gpio_15, gpio_16, gpio_17, gpio_18, gpio_19, gpio_20, gpio_21, + gpio_22, gpio_23, usb_cc2, gpio_25, gpio_26, gpio_27, gpio_28, + gpio_29, gpio_30, gpio_31, gpio_32, gpio_33, gpio_34, gpio_35, + hif_data, hif_en, hif_rdy, hif_clk, gpio_40, gpio_41, gpio_42, + gpio_43, gpio_44, gpio_45, gpio_46, gpio_47, gpio_48, gpio_49, + gpio_50, usb_cc1, gpio_52, gpio_53, ir_rx, ur0_rx, ur0_tx, + gpio_57, gpio_58, gpio_59, gpio_60, gpio_61, gpio_62, gpio_63, + gpio_64, emmc_rst_n, emmc_dd_sb, emmc_clk, emmc_cmd, emmc_data_0, + emmc_data_1, emmc_data_2, emmc_data_3, emmc_data_4, emmc_data_5, + emmc_data_6, emmc_data_7, dummy, gpio_78, gpio_79, gpio_80, + gpio_81, ur2_loc, gspi_loc, hi_width, sf_en, arm_trace_dbg_en, + ejtag_aucpu_loc, ejtag_acpu_loc, ejtag_vcpu_loc, ejtag_scpu_loc, + dmic_loc, ejtag_secpu_loc, vtc_dmic_loc, vtc_tdm_loc, vtc_i2si_loc, + tdm_ai_loc, ai_loc, spdif_loc, hif_en_loc, sc0_loc, sc1_loc, + scan_switch, wd_rset, boot_sel, reset_n, testmode ] + + function: + enum: [ gpio, nf, emmc, tp0, tp1, sc0, sc0_data0, sc0_data1, sc0_data2, + sc1, sc1_data0, sc1_data1, sc1_data2, ao, gspi_loc0, gspi_loc1, + uart0, uart1, uart2_loc0, uart2_loc1, i2c0, i2c1, i2c3, i2c4, + i2c5, pcie1, sdio, etn_led, etn_phy, spi, pwm0_loc0, pwm0_loc1, + pwm1_loc0, pwm1_loc1, pwm2_loc0, pwm2_loc1, pwm3_loc0, pwm3_loc1, + qam_agc_if0, qam_agc_if1, spdif_optical_loc0, spdif_optical_loc1, + usb_cc1, usb_cc2, vfd, sd, dmic_loc0, dmic_loc1, ai_loc0, ai_loc1, + tdm_ai_loc0, tdm_ai_loc1, hi_loc0, hi_m, vtc_i2so, vtc_i2si_loc0, + vtc_i2si_loc1, vtc_dmic_loc0, vtc_dmic_loc1, vtc_tdm_loc0, + vtc_tdm_loc1, dc_fan, pll_test_loc0, pll_test_loc1, ir_rx, + uart2_disable, gspi_disable, hi_width_disable, hi_width_1bit, + sf_disable, sf_enable, scpu_ejtag_loc0, scpu_ejtag_loc1, + scpu_ejtag_loc2, acpu_ejtag_loc0, acpu_ejtag_loc1, acpu_ejtag_loc2, + vcpu_ejtag_loc0, vcpu_ejtag_loc1, vcpu_ejtag_loc2, secpu_ejtag_loc0, + secpu_ejtag_loc1, secpu_ejtag_loc2, aucpu_ejtag_loc0, aucpu_ejtag_loc1, + aucpu_ejtag_loc2, iso_tristate, dbg_out0, dbg_out1, standby_dbg, + spdif, arm_trace_debug_disable, arm_trace_debug_enable, + aucpu_ejtag_disable, acpu_ejtag_disable, vcpu_ejtag_disable, + scpu_ejtag_disable, secpu_ejtag_disable, vtc_dmic_loc_disable, + vtc_tdm_disable, vtc_i2si_disable, tdm_ai_disable, ai_disable, + spdif_disable, hif_disable, hif_enable, test_loop, pmic_pwrup ] + + drive-strength: + enum: [4, 8] + + bias-pull-down: true + + bias-pull-up: true + + bias-disable: true + + input-schmitt-enable: true + + input-schmitt-disable: true + + drive-push-pull: true + + power-source: + description: | + Valid arguments are described as below: + 0: power supply of 1.8V + 1: power supply of 3.3V + enum: [0, 1] + + realtek,drive-strength-p: + description: | + Some of pins can be driven using the P-MOS and N-MOS transistor to + achieve finer adjustments. The block-diagram representation is as + follows: + VDD + | + ||--+ + +-----o|| P-MOS-FET + | ||--+ + IN --+ +----- out + | ||--+ + +------|| N-MOS-FET + ||--+ + | + GND + The driving strength of the P-MOS/N-MOS transistors impacts the + waveform's rise/fall times. Greater driving strength results in + shorter rise/fall times. Each P-MOS and N-MOS transistor offers + 8 configurable levels (0 to 7), with higher values indicating + greater driving strength, contributing to achieving the desired + speed. + + The realtek,drive-strength-p is used to control the driving strength + of the P-MOS output. + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 0 + maximum: 7 + + realtek,drive-strength-n: + description: | + Similar to the realtek,drive-strength-p, the realtek,drive-strength-n + is used to control the driving strength of the N-MOS output. + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 0 + maximum: 7 + + realtek,duty-cycle: + description: | + An integer describing the level to adjust output duty cycle, controlling + the proportion of positive and negative waveforms in nanoseconds. + Valid arguments are described as below: + 0: 0ns + 2: + 0.25ns + 3: + 0.5ns + 4: -0.25ns + 5: -0.5ns + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [ 0, 2, 3, 4, 5 ] + + required: + - pins + + additionalProperties: false + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + pinctrl@4e000 { + compatible = "realtek,rtd1319d-pinctrl"; + reg = <0x4e000 0x130>; + + emmc-hs200-pins { + pins = "emmc_clk", + "emmc_cmd", + "emmc_data_0", + "emmc_data_1", + "emmc_data_2", + "emmc_data_3", + "emmc_data_4", + "emmc_data_5", + "emmc_data_6", + "emmc_data_7"; + function = "emmc"; + realtek,drive-strength-p = <0x2>; + realtek,drive-strength-n = <0x2>; + }; + + i2c-0-pins { + pins = "gpio_12", + "gpio_13"; + function = "i2c0"; + drive-strength = <4>; + }; + }; diff --git a/Documentation/devicetree/bindings/pinctrl/realtek,rtd1619b-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/realtek,rtd1619b-pinctrl.yaml new file mode 100644 index 0000000000..671e4ec846 --- /dev/null +++ b/Documentation/devicetree/bindings/pinctrl/realtek,rtd1619b-pinctrl.yaml @@ -0,0 +1,186 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +# Copyright 2023 Realtek Semiconductor Corporation +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pinctrl/realtek,rtd1619b-pinctrl.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Realtek DHC RTD1619B Pin Controller + +maintainers: + - TY Chang + +description: + The Realtek DHC RTD1619B is a high-definition media processor SoC. The + RTD1619B pin controller is used to control pin function, pull up/down + resistor, drive strength, schmitt trigger and power source. + +properties: + compatible: + const: realtek,rtd1619b-pinctrl + + reg: + maxItems: 1 + +patternProperties: + '-pins$': + type: object + allOf: + - $ref: pincfg-node.yaml# + - $ref: pinmux-node.yaml# + + properties: + pins: + items: + enum: [ gpio_0, gpio_1, gpio_2, gpio_3, gpio_4, gpio_5, gpio_6, gpio_7, + gpio_8, gpio_9, gpio_10, gpio_11, gpio_12, gpio_13, gpio_14, + gpio_15, gpio_16, gpio_17, gpio_18, gpio_19, gpio_20, gpio_21, + gpio_22, gpio_23, usb_cc2, gpio_25, gpio_26, gpio_27, gpio_28, + gpio_29, gpio_30, gpio_31, gpio_32, gpio_33, gpio_34, gpio_35, + hif_data, hif_en, hif_rdy, hif_clk, gpio_40, gpio_41, gpio_42, + gpio_43, gpio_44, gpio_45, gpio_46, gpio_47, gpio_48, gpio_49, + gpio_50, usb_cc1, gpio_52, gpio_53, ir_rx, ur0_rx, ur0_tx, + gpio_57, gpio_58, gpio_59, gpio_60, gpio_61, gpio_62, gpio_63, + gpio_64, gpio_65, gpio_66, gpio_67, gpio_68, gpio_69, gpio_70, + gpio_71, gpio_72, gpio_73, gpio_74, gpio_75, gpio_76, emmc_cmd, + spi_ce_n, spi_sck, spi_so, spi_si, emmc_rst_n, emmc_dd_sb, + emmc_clk, emmc_data_0, emmc_data_1, emmc_data_2, emmc_data_3, + emmc_data_4, emmc_data_5, emmc_data_6, emmc_data_7, ur2_loc, + gspi_loc, sdio_loc, hi_loc, hi_width, sf_en, arm_trace_dbg_en, + pwm_01_open_drain_en_loc0, pwm_23_open_drain_en_loc0, + pwm_01_open_drain_en_loc1, pwm_23_open_drain_en_loc1, + ejtag_acpu_loc, ejtag_vcpu_loc, ejtag_scpu_loc, dmic_loc, + iso_gspi_loc, ejtag_ve3_loc, ejtag_aucpu0_loc, ejtag_aucpu1_loc ] + + function: + enum: [ gpio, nf, nf_spi, spi, pmic, spdif, spdif_coaxial, spdif_optical_loc0, + spdif_optical_loc1, emmc_spi, emmc, sc1, uart0, uart1, uart2_loc0, uart2_loc1, + gspi_loc1, iso_gspi_loc1, i2c0, i2c1, i2c3, i2c4, i2c5, pwm0, pwm1, pwm2, + pwm3, etn_led, etn_phy, etn_clk, sc0, vfd, gspi_loc0, iso_gspi_loc0, pcie1, + pcie2, sd, sdio_loc0, sdio_loc1, hi, hi_m, dc_fan, pll_test_loc0, pll_test_loc1, + usb_cc1, usb_cc2, ir_rx, tdm_ai_loc0, tdm_ai_loc1, dmic_loc0, dmic_loc1, + ai_loc0, ai_loc1, tp0, tp1, ao, uart2_disable, gspi_disable, sdio_disable, + hi_loc_disable, hi_loc0, hi_width_disable, hi_width_1bit, vtc_i2si_loc0, + vtc_tdm_loc0, vtc_dmic_loc0, vtc_i2si_loc1, vtc_tdm_loc1, vtc_dmic_loc1, + vtc_i2so, ve3_ejtag_loc0, aucpu0_ejtag_loc0, aucpu1_ejtag_loc0, ve3_ejtag_loc1, + aucpu0_ejtag_loc1, aucpu1_ejtag_loc1, ve3_ejtag_loc2, aucpu0_ejtag_loc2, + aucpu1_ejtag_loc2, scpu_ejtag_loc0, acpu_ejtag_loc0, vcpu_ejtag_loc0, + scpu_ejtag_loc1, acpu_ejtag_loc1, vcpu_ejtag_loc1, scpu_ejtag_loc2, + acpu_ejtag_loc2, vcpu_ejtag_loc2, ve3_ejtag_disable, aucpu0_ejtag_disable, + aucpu1_ejtag_disable, acpu_ejtag_disable, vcpu_ejtag_disable, + scpu_ejtag_disable, iso_gspi_disable, sf_disable, sf_enable, + arm_trace_debug_disable, arm_trace_debug_enable, pwm_normal, pwm_open_drain, + standby_dbg, test_loop_dis ] + + drive-strength: + enum: [4, 8] + + bias-pull-down: true + + bias-pull-up: true + + bias-disable: true + + input-schmitt-enable: true + + input-schmitt-disable: true + + drive-push-pull: true + + power-source: + description: | + Valid arguments are described as below: + 0: power supply of 1.8V + 1: power supply of 3.3V + enum: [0, 1] + + realtek,drive-strength-p: + description: | + Some of pins can be driven using the P-MOS and N-MOS transistor to + achieve finer adjustments. The block-diagram representation is as + follows: + VDD + | + ||--+ + +-----o|| P-MOS-FET + | ||--+ + IN --+ +----- out + | ||--+ + +------|| N-MOS-FET + ||--+ + | + GND + The driving strength of the P-MOS/N-MOS transistors impacts the + waveform's rise/fall times. Greater driving strength results in + shorter rise/fall times. Each P-MOS and N-MOS transistor offers + 8 configurable levels (0 to 7), with higher values indicating + greater driving strength, contributing to achieving the desired + speed. + + The realtek,drive-strength-p is used to control the driving strength + of the P-MOS output. + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 0 + maximum: 7 + + realtek,drive-strength-n: + description: | + Similar to the realtek,drive-strength-p, the realtek,drive-strength-n + is used to control the driving strength of the N-MOS output. + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 0 + maximum: 7 + + realtek,duty-cycle: + description: | + An integer describing the level to adjust output duty cycle, controlling + the proportion of positive and negative waveforms in nanoseconds. + Valid arguments are described as below: + 0: 0ns + 2: + 0.25ns + 3: + 0.5ns + 4: -0.25ns + 5: -0.5ns + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [ 0, 2, 3, 4, 5 ] + + required: + - pins + + additionalProperties: false + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + pinctrl@4e000 { + compatible = "realtek,rtd1619b-pinctrl"; + reg = <0x4e000 0x130>; + + emmc-hs200-pins { + pins = "emmc_clk", + "emmc_cmd", + "emmc_data_0", + "emmc_data_1", + "emmc_data_2", + "emmc_data_3", + "emmc_data_4", + "emmc_data_5", + "emmc_data_6", + "emmc_data_7"; + function = "emmc"; + realtek,drive-strength-p = <0x2>; + realtek,drive-strength-n = <0x2>; + }; + + i2c-0-pins { + pins = "gpio_12", + "gpio_13"; + function = "i2c0"; + drive-strength = <4>; + }; + }; diff --git a/Documentation/devicetree/bindings/pinctrl/renesas,rzg2l-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/renesas,rzg2l-pinctrl.yaml index 145c5442f2..b5ca40d0e2 100644 --- a/Documentation/devicetree/bindings/pinctrl/renesas,rzg2l-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/renesas,rzg2l-pinctrl.yaml @@ -25,6 +25,7 @@ properties: - enum: - renesas,r9a07g043-pinctrl # RZ/G2UL{Type-1,Type-2} and RZ/Five - renesas,r9a07g044-pinctrl # RZ/G2{L,LC} + - renesas,r9a08g045-pinctrl # RZ/G3S - items: - enum: @@ -73,10 +74,26 @@ properties: additionalProperties: anyOf: - type: object + additionalProperties: false allOf: - $ref: pincfg-node.yaml# - $ref: pinmux-node.yaml# + - if: + properties: + compatible: + contains: + enum: + - renesas,r9a08g045-pinctrl + then: + properties: + drive-strength: false + output-impedance-ohms: false + slew-rate: false + else: + properties: + drive-strength-microamp: false + description: Pin controller client devices use pin configuration subnodes (children and grandchildren) for desired pin configuration. @@ -91,6 +108,10 @@ additionalProperties: pins: true drive-strength: enum: [ 2, 4, 8, 12 ] + drive-strength-microamp: + enum: [ 1900, 2200, 4000, 4400, 4500, 4700, 5200, 5300, 5700, + 5800, 6000, 6050, 6100, 6550, 6800, 7000, 8000, 9000, + 10000 ] output-impedance-ohms: enum: [ 33, 50, 66, 100 ] power-source: diff --git a/Documentation/devicetree/bindings/pinctrl/renesas,rzv2m-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/renesas,rzv2m-pinctrl.yaml index cb81a17bd0..5fa5d31f88 100644 --- a/Documentation/devicetree/bindings/pinctrl/renesas,rzv2m-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/renesas,rzv2m-pinctrl.yaml @@ -53,6 +53,7 @@ properties: additionalProperties: anyOf: - type: object + additionalProperties: false allOf: - $ref: pincfg-node.yaml# - $ref: pinmux-node.yaml# diff --git a/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.yaml index 10c335efe6..20e806dce1 100644 --- a/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.yaml @@ -115,6 +115,8 @@ additionalProperties: type: object additionalProperties: type: object + additionalProperties: false + properties: rockchip,pins: $ref: /schemas/types.yaml#/definitions/uint32-matrix diff --git a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml index 2120ef71a7..e1eb45a9ed 100644 --- a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml @@ -48,7 +48,8 @@ properties: description: Phandle+args to the syscon node which includes IRQ mux selection. $ref: /schemas/types.yaml#/definitions/phandle-array items: - - items: + - minItems: 2 + items: - description: syscon node which includes IRQ mux selection - description: The offset of the IRQ mux selection register - description: The field mask of IRQ mux, needed if different of 0xf diff --git a/Documentation/devicetree/bindings/power/amlogic,meson-sec-pwrc.yaml b/Documentation/devicetree/bindings/power/amlogic,meson-sec-pwrc.yaml index d80bbedfe3..dab3d92bc2 100644 --- a/Documentation/devicetree/bindings/power/amlogic,meson-sec-pwrc.yaml +++ b/Documentation/devicetree/bindings/power/amlogic,meson-sec-pwrc.yaml @@ -12,7 +12,7 @@ maintainers: - Jianxin Pan description: |+ - Secure Power Domains used in Meson A1/C1/S4 & C3 SoCs, and should be the child node + Secure Power Domains used in Meson A1/C1/S4 & C3/T7 SoCs, and should be the child node of secure-monitor. properties: @@ -21,6 +21,7 @@ properties: - amlogic,meson-a1-pwrc - amlogic,meson-s4-pwrc - amlogic,c3-pwrc + - amlogic,t7-pwrc "#power-domain-cells": const: 1 diff --git a/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml b/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml index c9acef80f4..8985e2df8a 100644 --- a/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml +++ b/Documentation/devicetree/bindings/power/mediatek,power-controller.yaml @@ -31,6 +31,7 @@ properties: - mediatek,mt8188-power-controller - mediatek,mt8192-power-controller - mediatek,mt8195-power-controller + - mediatek,mt8365-power-controller '#power-domain-cells': const: 1 @@ -88,6 +89,7 @@ $defs: "include/dt-bindings/power/mediatek,mt8188-power.h" - for MT8188 type power domain. "include/dt-bindings/power/mt8192-power.h" - for MT8192 type power domain. "include/dt-bindings/power/mt8195-power.h" - for MT8195 type power domain. + "include/dt-bindings/power/mediatek,mt8365-power.h" - for MT8365 type power domain. maxItems: 1 clocks: @@ -115,6 +117,10 @@ $defs: $ref: /schemas/types.yaml#/definitions/phandle description: phandle to the device containing the INFRACFG register range. + mediatek,infracfg-nao: + $ref: /schemas/types.yaml#/definitions/phandle + description: phandle to the device containing the INFRACFG-NAO register range. + mediatek,smi: $ref: /schemas/types.yaml#/definitions/phandle description: phandle to the device containing the SMI register range. diff --git a/Documentation/devicetree/bindings/power/power-domain.yaml b/Documentation/devicetree/bindings/power/power-domain.yaml index d1235e5620..8fdb529d56 100644 --- a/Documentation/devicetree/bindings/power/power-domain.yaml +++ b/Documentation/devicetree/bindings/power/power-domain.yaml @@ -13,8 +13,9 @@ maintainers: description: |+ System on chip designs are often divided into multiple PM domains that can be - used for power gating of selected IP blocks for power saving by reduced leakage - current. + used for power gating of selected IP blocks for power saving by reduced + leakage current. Moreover, in some cases the similar PM domains may also be + capable of scaling performance for a group of IP blocks. This device tree binding can be used to bind PM domain consumer devices with their PM domains provided by PM domain providers. A PM domain provider can be @@ -25,7 +26,7 @@ description: |+ properties: $nodename: - pattern: "^(power-controller|power-domain)([@-].*)?$" + pattern: "^(power-controller|power-domain|performance-domain)([@-].*)?$" domain-idle-states: $ref: /schemas/types.yaml#/definitions/phandle-array @@ -44,11 +45,11 @@ properties: operating-points-v2: description: - Phandles to the OPP tables of power domains provided by a power domain - provider. If the provider provides a single power domain only or all - the power domains provided by the provider have identical OPP tables, - then this shall contain a single phandle. Refer to ../opp/opp-v2-base.yaml - for more information. + Phandles to the OPP tables of power domains that are capable of scaling + performance, provided by a power domain provider. If the provider provides + a single power domain only or all the power domains provided by the + provider have identical OPP tables, then this shall contain a single + phandle. Refer to ../opp/opp-v2-base.yaml for more information. "#power-domain-cells": description: diff --git a/Documentation/devicetree/bindings/power/qcom,rpmpd.yaml b/Documentation/devicetree/bindings/power/qcom,rpmpd.yaml index 9b03c41d36..da9c5846f4 100644 --- a/Documentation/devicetree/bindings/power/qcom,rpmpd.yaml +++ b/Documentation/devicetree/bindings/power/qcom,rpmpd.yaml @@ -15,42 +15,52 @@ description: properties: compatible: - enum: - - qcom,mdm9607-rpmpd - - qcom,msm8226-rpmpd - - qcom,msm8909-rpmpd - - qcom,msm8916-rpmpd - - qcom,msm8939-rpmpd - - qcom,msm8953-rpmpd - - qcom,msm8976-rpmpd - - qcom,msm8994-rpmpd - - qcom,msm8996-rpmpd - - qcom,msm8998-rpmpd - - qcom,qcm2290-rpmpd - - qcom,qcs404-rpmpd - - qcom,qdu1000-rpmhpd - - qcom,sa8155p-rpmhpd - - qcom,sa8540p-rpmhpd - - qcom,sa8775p-rpmhpd - - qcom,sdm660-rpmpd - - qcom,sc7180-rpmhpd - - qcom,sc7280-rpmhpd - - qcom,sc8180x-rpmhpd - - qcom,sc8280xp-rpmhpd - - qcom,sdm670-rpmhpd - - qcom,sdm845-rpmhpd - - qcom,sdx55-rpmhpd - - qcom,sdx65-rpmhpd - - qcom,sdx75-rpmhpd - - qcom,sm6115-rpmpd - - qcom,sm6125-rpmpd - - qcom,sm6350-rpmhpd - - qcom,sm6375-rpmpd - - qcom,sm8150-rpmhpd - - qcom,sm8250-rpmhpd - - qcom,sm8350-rpmhpd - - qcom,sm8450-rpmhpd - - qcom,sm8550-rpmhpd + oneOf: + - enum: + - qcom,mdm9607-rpmpd + - qcom,msm8226-rpmpd + - qcom,msm8909-rpmpd + - qcom,msm8916-rpmpd + - qcom,msm8917-rpmpd + - qcom,msm8939-rpmpd + - qcom,msm8953-rpmpd + - qcom,msm8976-rpmpd + - qcom,msm8994-rpmpd + - qcom,msm8996-rpmpd + - qcom,msm8998-rpmpd + - qcom,qcm2290-rpmpd + - qcom,qcs404-rpmpd + - qcom,qdu1000-rpmhpd + - qcom,qm215-rpmpd + - qcom,sa8155p-rpmhpd + - qcom,sa8540p-rpmhpd + - qcom,sa8775p-rpmhpd + - qcom,sc7180-rpmhpd + - qcom,sc7280-rpmhpd + - qcom,sc8180x-rpmhpd + - qcom,sc8280xp-rpmhpd + - qcom,sc8380xp-rpmhpd + - qcom,sdm660-rpmpd + - qcom,sdm670-rpmhpd + - qcom,sdm845-rpmhpd + - qcom,sdx55-rpmhpd + - qcom,sdx65-rpmhpd + - qcom,sdx75-rpmhpd + - qcom,sm6115-rpmpd + - qcom,sm6125-rpmpd + - qcom,sm6350-rpmhpd + - qcom,sm6375-rpmpd + - qcom,sm7150-rpmhpd + - qcom,sm8150-rpmhpd + - qcom,sm8250-rpmhpd + - qcom,sm8350-rpmhpd + - qcom,sm8450-rpmhpd + - qcom,sm8550-rpmhpd + - qcom,sm8650-rpmhpd + - items: + - enum: + - qcom,msm8937-rpmpd + - const: qcom,msm8917-rpmpd '#power-domain-cells': const: 1 diff --git a/Documentation/devicetree/bindings/power/reset/gpio-poweroff.yaml b/Documentation/devicetree/bindings/power/reset/gpio-poweroff.yaml index b54ec003a1..a4b437fce3 100644 --- a/Documentation/devicetree/bindings/power/reset/gpio-poweroff.yaml +++ b/Documentation/devicetree/bindings/power/reset/gpio-poweroff.yaml @@ -18,6 +18,9 @@ description: > Finally the operating system assumes the power off failed if the system is still running after waiting some time (timeout-ms). +allOf: + - $ref: restart-handler.yaml# + properties: compatible: const: gpio-poweroff @@ -40,6 +43,9 @@ properties: default: 100 description: Delay to wait after driving gpio inactive + priority: + default: 0 + timeout-ms: default: 3000 description: Time to wait before assuming the power off sequence failed. diff --git a/Documentation/devicetree/bindings/power/reset/syscon-poweroff.yaml b/Documentation/devicetree/bindings/power/reset/syscon-poweroff.yaml index 3412fe7e1e..d342b113fc 100644 --- a/Documentation/devicetree/bindings/power/reset/syscon-poweroff.yaml +++ b/Documentation/devicetree/bindings/power/reset/syscon-poweroff.yaml @@ -15,6 +15,9 @@ description: |+ defined by the register map pointed by syscon reference plus the offset with the value and mask defined in the poweroff node. Default will be little endian mode, 32 bit access only. + The SYSCON register map is normally retrieved from the parental dt-node. So + the SYSCON poweroff node should be represented as a sub-node of a "syscon", + "simple-mfd" node. properties: compatible: @@ -30,7 +33,10 @@ properties: regmap: $ref: /schemas/types.yaml#/definitions/phandle - description: Phandle to the register map node. + deprecated: true + description: + Phandle to the register map node. This property is deprecated in favor of + the syscon-poweroff node being a child of a system controller node. value: $ref: /schemas/types.yaml#/definitions/uint32 @@ -38,7 +44,6 @@ properties: required: - compatible - - regmap - offset additionalProperties: false @@ -56,7 +61,6 @@ examples: - | poweroff { compatible = "syscon-poweroff"; - regmap = <®mapnode>; offset = <0x0>; mask = <0x7a>; }; diff --git a/Documentation/devicetree/bindings/power/supply/max8925_battery.txt b/Documentation/devicetree/bindings/power/supply/max8925_battery.txt deleted file mode 100644 index d7e3e0c0f7..0000000000 --- a/Documentation/devicetree/bindings/power/supply/max8925_battery.txt +++ /dev/null @@ -1,18 +0,0 @@ -max8925-battery bindings -~~~~~~~~~~~~~~~~ - -Optional properties : - - batt-detect: whether support battery detect - - topoff-threshold: set charging current in topoff mode - - fast-charge: set charging current in fast mode - - no-temp-support: whether support temperature protection detect - - no-insert-detect: whether support insert detect - -Example: - charger { - batt-detect = <0>; - topoff-threshold = <1>; - fast-charge = <7>; - no-temp-support = <0>; - no-insert-detect = <0>; - }; diff --git a/Documentation/devicetree/bindings/power/supply/maxim,max17040.yaml b/Documentation/devicetree/bindings/power/supply/maxim,max17040.yaml index 2627cd3eed..377cbb2c2c 100644 --- a/Documentation/devicetree/bindings/power/supply/maxim,max17040.yaml +++ b/Documentation/devicetree/bindings/power/supply/maxim,max17040.yaml @@ -55,6 +55,14 @@ properties: interrupts: maxItems: 1 + io-channels: + items: + - description: battery temperature + + io-channel-names: + items: + - const: temp + wakeup-source: type: boolean description: | @@ -95,3 +103,26 @@ examples: wakeup-source; }; }; + - | + #include + i2c { + #address-cells = <1>; + #size-cells = <0>; + + fuel-gauge@36 { + compatible = "maxim,max17043"; + reg = <0x36>; + + interrupt-parent = <&gpio>; + interrupts = <144 IRQ_TYPE_EDGE_FALLING>; + + monitored-battery = <&battery>; + power-supplies = <&charger>; + + io-channels = <&adc 8>; + io-channel-names = "temp"; + + maxim,alert-low-soc-level = <10>; + wakeup-source; + }; + }; diff --git a/Documentation/devicetree/bindings/power/supply/mitsumi,mm8013.yaml b/Documentation/devicetree/bindings/power/supply/mitsumi,mm8013.yaml new file mode 100644 index 0000000000..6865640cbd --- /dev/null +++ b/Documentation/devicetree/bindings/power/supply/mitsumi,mm8013.yaml @@ -0,0 +1,38 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/power/supply/mitsumi,mm8013.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Mitsumi MM8013 fuel gauge + +maintainers: + - Konrad Dybcio + +allOf: + - $ref: power-supply.yaml# + +properties: + compatible: + const: mitsumi,mm8013 + + reg: + maxItems: 1 + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + i2c { + #address-cells = <1>; + #size-cells = <0>; + + fuel-gauge@55 { + compatible = "mitsumi,mm8013"; + reg = <0x55>; + }; + }; diff --git a/Documentation/devicetree/bindings/power/supply/qcom,pm8916-bms-vm.yaml b/Documentation/devicetree/bindings/power/supply/qcom,pm8916-bms-vm.yaml new file mode 100644 index 0000000000..ad764e69ab --- /dev/null +++ b/Documentation/devicetree/bindings/power/supply/qcom,pm8916-bms-vm.yaml @@ -0,0 +1,83 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/power/supply/qcom,pm8916-bms-vm.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Voltage Mode BMS + +maintainers: + - Nikita Travkin + +description: + Voltage Mode BMS is a hardware block found in some Qualcomm PMICs + such as pm8916. This block performs battery voltage monitoring. + +allOf: + - $ref: power-supply.yaml# + +properties: + compatible: + const: qcom,pm8916-bms-vm + + reg: + maxItems: 1 + + interrupts: + items: + - description: BMS FSM left S3 mode + - description: BMS FSM entered S2 mode + - description: OCV measured in S3 mode + - description: OCV below threshold + - description: FIFO update done + - description: BMS FSM switched state + + interrupt-names: + items: + - const: cv_leave + - const: cv_enter + - const: ocv_good + - const: ocv_thr + - const: fifo + - const: state_chg + + monitored-battery: true + + power-supplies: true + +required: + - compatible + - reg + - interrupts + - interrupt-names + - monitored-battery + +additionalProperties: false + +examples: + - | + #include + pmic { + #address-cells = <1>; + #size-cells = <0>; + + battery@4000 { + compatible = "qcom,pm8916-bms-vm"; + reg = <0x4000>; + interrupts = <0x0 0x40 0 IRQ_TYPE_EDGE_RISING>, + <0x0 0x40 1 IRQ_TYPE_EDGE_RISING>, + <0x0 0x40 2 IRQ_TYPE_EDGE_RISING>, + <0x0 0x40 3 IRQ_TYPE_EDGE_RISING>, + <0x0 0x40 4 IRQ_TYPE_EDGE_RISING>, + <0x0 0x40 5 IRQ_TYPE_EDGE_RISING>; + interrupt-names = "cv_leave", + "cv_enter", + "ocv_good", + "ocv_thr", + "fifo", + "state_chg"; + + monitored-battery = <&battery>; + power-supplies = <&pm8916_charger>; + }; + }; diff --git a/Documentation/devicetree/bindings/power/supply/qcom,pm8916-lbc.yaml b/Documentation/devicetree/bindings/power/supply/qcom,pm8916-lbc.yaml new file mode 100644 index 0000000000..cdf14e5ed1 --- /dev/null +++ b/Documentation/devicetree/bindings/power/supply/qcom,pm8916-lbc.yaml @@ -0,0 +1,128 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/power/supply/qcom,pm8916-lbc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Linear Battery Charger + +maintainers: + - Nikita Travkin + +description: + Linear Battery Charger hardware block, found in some Qualcomm PMICs + such as pm8916. Implements a simple, autonomous CC/CV charger. + +allOf: + - $ref: power-supply.yaml# + +properties: + compatible: + const: qcom,pm8916-lbc + + reg: + items: + - description: Charger + - description: Battery + - description: USB + - description: MISC + + reg-names: + items: + - const: chgr + - const: bat_if + - const: usb + - const: misc + + interrupts: + items: + - description: Battery detection + - description: Fast charging + - description: Charging failed + - description: Charging done + - description: Battery present + - description: Battery temperature OK + - description: USB coarse detection + - description: USB IN valid + - description: Charger gone + - description: Overtemperature + + interrupt-names: + items: + - const: vbat_det + - const: fast_chg + - const: chg_fail + - const: chg_done + - const: bat_pres + - const: temp_ok + - const: coarse_det + - const: usb_vbus + - const: chg_gone + - const: overtemp + + qcom,fast-charge-safe-voltage: + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 4000000 + maximum: 4775000 + description: + Maximum safe battery voltage in uV; May be pre-set by bootloader, + in which case, setting this will harmlessly fail. + + qcom,fast-charge-safe-current: + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 90000 + maximum: 1440000 + description: + Maximum safe battery charge current in uA; May be pre-set by + bootloader, in which case setting this will harmlessly fail. + + monitored-battery: true + +required: + - compatible + - reg + - interrupts + - interrupt-names + - qcom,fast-charge-safe-voltage + - qcom,fast-charge-safe-current + +additionalProperties: false + +examples: + - | + #include + pmic { + #address-cells = <1>; + #size-cells = <0>; + + charger@1000 { + compatible = "qcom,pm8916-lbc"; + reg = <0x1000>, <0x1200>, <0x1300>, <0x1600>; + reg-names = "chgr", "bat_if", "usb", "misc"; + + interrupts = <0x0 0x10 0 IRQ_TYPE_EDGE_BOTH>, + <0x0 0x10 5 IRQ_TYPE_EDGE_BOTH>, + <0x0 0x10 6 IRQ_TYPE_EDGE_BOTH>, + <0x0 0x10 7 IRQ_TYPE_EDGE_BOTH>, + <0x0 0x12 0 IRQ_TYPE_EDGE_BOTH>, + <0x0 0x12 1 IRQ_TYPE_EDGE_BOTH>, + <0x0 0x13 0 IRQ_TYPE_EDGE_BOTH>, + <0x0 0x13 1 IRQ_TYPE_EDGE_BOTH>, + <0x0 0x13 2 IRQ_TYPE_EDGE_BOTH>, + <0x0 0x13 4 IRQ_TYPE_EDGE_BOTH>; + interrupt-names = "vbat_det", + "fast_chg", + "chg_fail", + "chg_done", + "bat_pres", + "temp_ok", + "coarse_det", + "usb_vbus", + "chg_gone", + "overtemp"; + monitored-battery = <&battery>; + + qcom,fast-charge-safe-current = <900000>; + qcom,fast-charge-safe-voltage = <4300000>; + }; + }; diff --git a/Documentation/devicetree/bindings/power/supply/sbs,sbs-manager.yaml b/Documentation/devicetree/bindings/power/supply/sbs,sbs-manager.yaml index f255f3858d..2e21846463 100644 --- a/Documentation/devicetree/bindings/power/supply/sbs,sbs-manager.yaml +++ b/Documentation/devicetree/bindings/power/supply/sbs,sbs-manager.yaml @@ -47,6 +47,12 @@ patternProperties: "^i2c@[1-4]$": type: object $ref: /schemas/i2c/i2c-controller.yaml# + unevaluatedProperties: false + + properties: + reg: + minimum: 1 + maximum: 4 examples: - | diff --git a/Documentation/devicetree/bindings/power/supply/stericsson,ab8500-battery.txt b/Documentation/devicetree/bindings/power/supply/stericsson,ab8500-battery.txt deleted file mode 100644 index ee125cb0e4..0000000000 --- a/Documentation/devicetree/bindings/power/supply/stericsson,ab8500-battery.txt +++ /dev/null @@ -1,34 +0,0 @@ -AB85000 PMIC contains a node, which contains shared -information about the battery connected to the PMIC. -The node has no compatible property. - -Properties of this node are: - -thermistor-on-batctrl: - A boolean value indicating thermistor interface to battery - - Note: - 'btemp' and 'batctrl' are the pins interfaced for battery temperature - measurement, 'btemp' signal is used when NTC(negative temperature - coefficient) resister is interfaced external to battery whereas - 'batctrl' pin is used when NTC resister is internal to battery. - - Example: - ab8500_battery: ab8500_battery { - thermistor-on-batctrl; - }; - indicates: NTC resister is internal to battery, 'batctrl' is used - for thermal measurement. - - The absence of property 'thermal-on-batctrl' indicates - NTC resister is external to battery and 'btemp' signal is used - for thermal measurement. - -battery-type: - This shall be the battery manufacturing technology type, - allowed types are: - "UNKNOWN" "NiMH" "LION" "LIPO" "LiFe" "NiCd" "LiMn" - Example: - ab8500_battery: ab8500_battery { - stericsson,battery-type = "LIPO"; - } diff --git a/Documentation/devicetree/bindings/pwm/imx-pwm.yaml b/Documentation/devicetree/bindings/pwm/imx-pwm.yaml index c01dff3b7f..a84a240a61 100644 --- a/Documentation/devicetree/bindings/pwm/imx-pwm.yaml +++ b/Documentation/devicetree/bindings/pwm/imx-pwm.yaml @@ -14,12 +14,10 @@ allOf: properties: "#pwm-cells": - description: | - Should be 2 for i.MX1 and 3 for i.MX27 and newer SoCs. See pwm.yaml - in this directory for a description of the cells format. - enum: - - 2 - - 3 + description: + The only third cell flag supported by this binding is + PWM_POLARITY_INVERTED. fsl,imx1-pwm does not support this flags. + const: 3 compatible: oneOf: diff --git a/Documentation/devicetree/bindings/pwm/mxs-pwm.yaml b/Documentation/devicetree/bindings/pwm/mxs-pwm.yaml index 6ffbed204c..8f50e23ca8 100644 --- a/Documentation/devicetree/bindings/pwm/mxs-pwm.yaml +++ b/Documentation/devicetree/bindings/pwm/mxs-pwm.yaml @@ -15,12 +15,19 @@ allOf: properties: compatible: - enum: - - fsl,imx23-pwm + oneOf: + - const: fsl,imx23-pwm + - items: + - enum: + - fsl,imx28-pwm + - const: fsl,imx23-pwm reg: maxItems: 1 + clocks: + maxItems: 1 + "#pwm-cells": const: 3 @@ -31,6 +38,7 @@ properties: required: - compatible - reg + - clocks - fsl,pwm-number additionalProperties: false @@ -40,6 +48,7 @@ examples: pwm@80064000 { compatible = "fsl,imx23-pwm"; reg = <0x80064000 0x2000>; + clocks = <&clks 30>; #pwm-cells = <3>; fsl,pwm-number = <8>; }; diff --git a/Documentation/devicetree/bindings/pwm/pwm-rockchip.yaml b/Documentation/devicetree/bindings/pwm/pwm-rockchip.yaml index f2d1dc7e7b..65bfb492b3 100644 --- a/Documentation/devicetree/bindings/pwm/pwm-rockchip.yaml +++ b/Documentation/devicetree/bindings/pwm/pwm-rockchip.yaml @@ -32,6 +32,7 @@ properties: - rockchip,rk3308-pwm - rockchip,rk3568-pwm - rockchip,rk3588-pwm + - rockchip,rv1126-pwm - const: rockchip,rk3328-pwm reg: diff --git a/Documentation/devicetree/bindings/regulator/adi,max77503-regulator.yaml b/Documentation/devicetree/bindings/regulator/adi,max77503-regulator.yaml new file mode 100644 index 0000000000..aa581e550b --- /dev/null +++ b/Documentation/devicetree/bindings/regulator/adi,max77503-regulator.yaml @@ -0,0 +1,50 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +# Copyright (c) 2023 Analog Devices, Inc. +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/regulator/adi,max77503-regulator.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Analog Devices MAX77503 Buck Converter + +maintainers: + - Gokhan Celik + +description: | + The Analog Devices MAX77503 is a single channel 14V input, 1.5A + high-efficiency buck converter. This converter has 94% efficiency + for 2-Cell/3-Cell battery applications. + +allOf: + - $ref: regulator.yaml# + +properties: + compatible: + enum: + - adi,max77503 + + reg: + description: I2C address of the device + items: + - enum: [0x1e, 0x24, 0x37] + +required: + - compatible + - reg + +unevaluatedProperties: false + +examples: + - | + i2c { + #address-cells = <1>; + #size-cells = <0>; + + regulator@1e { + compatible = "adi,max77503"; + reg = <0x1e>; + + regulator-min-microvolt = <800000>; + regulator-max-microvolt = <5000000>; + }; + }; diff --git a/Documentation/devicetree/bindings/regulator/da9210.txt b/Documentation/devicetree/bindings/regulator/da9210.txt deleted file mode 100644 index 58065ca9e3..0000000000 --- a/Documentation/devicetree/bindings/regulator/da9210.txt +++ /dev/null @@ -1,29 +0,0 @@ -* Dialog Semiconductor DA9210 Multi-phase 12A DCDC BUCK Converter - -Required properties: - -- compatible: must be "dlg,da9210" -- reg: the i2c slave address of the regulator. It should be 0x68. - -Optional properties: - -- interrupts: a reference to the DA9210 interrupt, if available. - -Any standard regulator properties can be used to configure the single da9210 -DCDC. - -Example: - - da9210@68 { - compatible = "dlg,da9210"; - reg = <0x68>; - - interrupt-parent = <...>; - interrupts = <...>; - - regulator-min-microvolt = <300000>; - regulator-max-microvolt = <1570000>; - regulator-min-microamp = <1600000>; - regulator-max-microamp = <4600000>; - regulator-boot-on; - }; diff --git a/Documentation/devicetree/bindings/regulator/dlg,da9210.yaml b/Documentation/devicetree/bindings/regulator/dlg,da9210.yaml new file mode 100644 index 0000000000..81f23de36d --- /dev/null +++ b/Documentation/devicetree/bindings/regulator/dlg,da9210.yaml @@ -0,0 +1,52 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/regulator/dlg,da9210.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Dialog Semiconductor DA9210 Multi-Phase 12A DC-DC Buck Converter + +maintainers: + - Support Opensource + +allOf: + - $ref: regulator.yaml# + +properties: + compatible: + const: dlg,da9210 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + +required: + - compatible + - reg + +unevaluatedProperties: false + +examples: + - | + #include + + i2c { + #address-cells = <1>; + #size-cells = <0>; + + regulator@68 { + compatible = "dlg,da9210"; + reg = <0x68>; + + interrupt-parent = <&irqc0>; + interrupts = <2 IRQ_TYPE_LEVEL_LOW>; + + regulator-min-microvolt = <300000>; + regulator-max-microvolt = <1570000>; + regulator-min-microamp = <1600000>; + regulator-max-microamp = <4600000>; + regulator-boot-on; + }; + }; diff --git a/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml b/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml index ac0281b1cc..ce7751b912 100644 --- a/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml @@ -100,6 +100,11 @@ properties: vin-supply: description: Input supply phandle. + interrupts: + maxItems: 1 + description: + Interrupt signaling a critical under-voltage event. + required: - compatible - regulator-name diff --git a/Documentation/devicetree/bindings/regulator/maxim,max20086.yaml b/Documentation/devicetree/bindings/regulator/maxim,max20086.yaml index 05f7239118..7394c0a339 100644 --- a/Documentation/devicetree/bindings/regulator/maxim,max20086.yaml +++ b/Documentation/devicetree/bindings/regulator/maxim,max20086.yaml @@ -43,6 +43,7 @@ properties: "^OUT[1-4]$": type: object $ref: regulator.yaml# + unevaluatedProperties: false additionalProperties: false diff --git a/Documentation/devicetree/bindings/regulator/maxim,max77826.yaml b/Documentation/devicetree/bindings/regulator/maxim,max77826.yaml index 78c0b63243..6d6bbfbd26 100644 --- a/Documentation/devicetree/bindings/regulator/maxim,max77826.yaml +++ b/Documentation/devicetree/bindings/regulator/maxim,max77826.yaml @@ -30,10 +30,12 @@ properties: "^LDO([1-9]|1[0-5])$": type: object $ref: regulator.yaml# + unevaluatedProperties: false "^BUCK|BUCKBOOST$": type: object $ref: regulator.yaml# + unevaluatedProperties: false additionalProperties: false diff --git a/Documentation/devicetree/bindings/regulator/mediatek,mt6358-regulator.yaml b/Documentation/devicetree/bindings/regulator/mediatek,mt6358-regulator.yaml new file mode 100644 index 0000000000..c50402fcba --- /dev/null +++ b/Documentation/devicetree/bindings/regulator/mediatek,mt6358-regulator.yaml @@ -0,0 +1,250 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/regulator/mediatek,mt6358-regulator.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: MediaTek MT6358 Regulator + +maintainers: + - Zhiyong Tao + +description: + Regulator node of the PMIC. This node should under the PMIC's device node. + All voltage regulators provided by the PMIC are described as sub-nodes of + this node. + +properties: + compatible: + oneOf: + - const: mediatek,mt6358-regulator + - items: + - const: mediatek,mt6366-regulator + - const: mediatek,mt6358-regulator + + vsys-ldo1-supply: + description: Supply for LDOs vfe28, vxo22, vcn28, vaux18, vaud28, vsim1, vusb, vbif28 + vsys-ldo2-supply: + description: Supply for LDOs vldo28 (MT6358 only), vio28, vmc, vmch, vsim2 + vsys-ldo3-supply: + description: Supply for LDOs vcn33, vcama[12] (MT6358 only), vemc, vibr + vsys-vcore-supply: + description: Supply for buck regulator vcore + vsys-vdram1-supply: + description: Supply for buck regulator vdram1 + vsys-vgpu-supply: + description: Supply for buck regulator vgpu + vsys-vmodem-supply: + description: Supply for buck regulator vmodem + vsys-vpa-supply: + description: Supply for buck regulator vpa + vsys-vproc11-supply: + description: Supply for buck regulator vproc11 + vsys-vproc12-supply: + description: Supply for buck regulator vproc12 + vsys-vs1-supply: + description: Supply for buck regulator vs1 + vsys-vs2-supply: + description: Supply for buck regulator vs2 + vs1-ldo1-supply: + description: + Supply for LDOs vrf18, vefuse, vcn18, vcamio (MT6358 only), vio18, vm18 (MT6366 only) + vs2-ldo1-supply: + description: Supply for LDOs vdram2, vmddr (MT6366 only) + vs2-ldo2-supply: + description: Supply for LDOs vrf12, va12 + vs2-ldo3-supply: + description: + Supply for LDOs vsram-core (MT6366 only), vsram-gpu, vsram-others, vsram-proc11, vsram-proc12 + vs2-ldo4-supply: + description: Supply for LDO vcamd + +patternProperties: + "^(buck_)?v(core|dram1|gpu|modem|pa|proc1[12]|s[12])$": + description: Buck regulators + type: object + $ref: regulator.yaml# + properties: + regulator-allowed-modes: + description: | + Buck regulatpr operating modes allowed. Valid values below. + Users should use the macros from dt-bindings/regulator/mediatek,mt6397-regulator.h + 0 (MT6397_BUCK_MODE_AUTO): Auto PFM/PWM mode + 1 (MT6397_BUCK_MODE_FORCE_PWM): Forced PWM mode + items: + enum: [0, 1] + unevaluatedProperties: false + + "^(ldo_)?v(a|rf)12$": + description: LDOs with fixed 1.2V output and 0~100/10mV tuning + type: object + $ref: regulator.yaml# + properties: + regulator-allowed-modes: false + unevaluatedProperties: false + + "^(ldo_)?v((aux|cn|io|rf)18|camio)$": + description: + LDOs with fixed 1.8V output and 0~100/10mV tuning (vcn18 on MT6366 has variable output) + type: object + $ref: regulator.yaml# + properties: + regulator-allowed-modes: false + unevaluatedProperties: false + + "^(ldo_)?vxo22$": + description: LDOs with fixed 2.2V output and 0~100/10mV tuning + type: object + $ref: regulator.yaml# + properties: + regulator-allowed-modes: false + unevaluatedProperties: false + + "^(ldo_)?v(aud|bif|cn|fe|io)28$": + description: LDOs with fixed 2.8V output and 0~100/10mV tuning + type: object + $ref: regulator.yaml# + properties: + regulator-allowed-modes: false + unevaluatedProperties: false + + "^(ldo_)?vusb$": + description: LDOs with fixed 3.0V output and 0~100/10mV tuning + type: object + $ref: regulator.yaml# + properties: + regulator-allowed-modes: false + unevaluatedProperties: false + + "^(ldo_)?vsram[_-](core|gpu|others|proc1[12])$": + description: LDOs with variable output + type: object + $ref: regulator.yaml# + properties: + regulator-allowed-modes: false + unevaluatedProperties: false + + "^(ldo_)?v(cama[12]|camd|cn33|dram2|efuse|emc|ibr|ldo28|m18|mc|mch|mddr|sim[12])$": + description: LDOs with variable output and 0~100/10mV tuning + type: object + $ref: regulator.yaml# + properties: + regulator-allowed-modes: false + unevaluatedProperties: false + +required: + - compatible + +additionalProperties: false + +allOf: + - if: + properties: + compatible: + const: mediatek,mt6358-regulator + then: + patternProperties: + # Old regulator node name scheme (with prefix and underscores) only + # ([^y-] is used to avoid matching -supply + "^(? + + regulator { + compatible = "mediatek,mt6358-regulator"; + + buck_vgpu { + regulator-name = "vgpu"; + regulator-min-microvolt = <625000>; + regulator-max-microvolt = <900000>; + regulator-ramp-delay = <6250>; + regulator-enable-ramp-delay = <200>; + regulator-allowed-modes = ; + }; + + ldo_vsram_gpu { + regulator-name = "vsram_gpu"; + regulator-min-microvolt = <850000>; + regulator-max-microvolt = <1000000>; + regulator-ramp-delay = <6250>; + regulator-enable-ramp-delay = <240>; + }; + }; + + - | + #include + + regulator { + compatible = "mediatek,mt6366-regulator", "mediatek,mt6358-regulator"; + + vdram1 { + regulator-name = "pp1125_emi_vdd2"; + regulator-min-microvolt = <1125000>; + regulator-max-microvolt = <1125000>; + regulator-ramp-delay = <12500>; + regulator-enable-ramp-delay = <0>; + regulator-allowed-modes = ; + regulator-always-on; + }; + + vproc11 { + regulator-name = "ppvar_dvdd_proc_bc_mt6366"; + regulator-min-microvolt = <600000>; + regulator-max-microvolt = <1200000>; + regulator-ramp-delay = <6250>; + regulator-enable-ramp-delay = <200>; + regulator-allowed-modes = ; + regulator-always-on; + }; + + vmddr { + regulator-name = "pm0750_emi_vmddr"; + regulator-min-microvolt = <700000>; + regulator-max-microvolt = <750000>; + regulator-enable-ramp-delay = <325>; + regulator-always-on; + }; + + vsram-proc11 { + regulator-name = "pp0900_dvdd_sram_bc"; + regulator-min-microvolt = <850000>; + regulator-max-microvolt = <1120000>; + regulator-ramp-delay = <6250>; + regulator-enable-ramp-delay = <240>; + regulator-always-on; + }; + }; +... diff --git a/Documentation/devicetree/bindings/regulator/mps,mpq2286.yaml b/Documentation/devicetree/bindings/regulator/mps,mpq2286.yaml new file mode 100644 index 0000000000..1296f9b308 --- /dev/null +++ b/Documentation/devicetree/bindings/regulator/mps,mpq2286.yaml @@ -0,0 +1,59 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/regulator/mps,mpq2286.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Monolithic Power System MPQ2286 PMIC + +maintainers: + - Saravanan Sekar + +properties: + compatible: + enum: + - mps,mpq2286 + + reg: + maxItems: 1 + + regulators: + type: object + + properties: + buck: + type: object + $ref: regulator.yaml# + + unevaluatedProperties: false + + additionalProperties: false + +required: + - compatible + - reg + - regulators + +additionalProperties: false + +examples: + - | + i2c { + #address-cells = <1>; + #size-cells = <0>; + + pmic@3 { + compatible = "mps,mpq2286"; + reg = <0x3>; + + regulators { + buck { + regulator-name = "buck"; + regulator-min-microvolt = <1600000>; + regulator-max-microvolt = <1800000>; + regulator-boot-on; + }; + }; + }; + }; +... diff --git a/Documentation/devicetree/bindings/regulator/mt6358-regulator.txt b/Documentation/devicetree/bindings/regulator/mt6358-regulator.txt deleted file mode 100644 index b6384306db..0000000000 --- a/Documentation/devicetree/bindings/regulator/mt6358-regulator.txt +++ /dev/null @@ -1,350 +0,0 @@ -MediaTek MT6358 Regulator - -All voltage regulators provided by the MT6358 PMIC are described as the -subnodes of the MT6358 regulators node. Each regulator is named according -to its regulator type, buck_ and ldo_. The definition for each -of these nodes is defined using the standard binding for regulators at -Documentation/devicetree/bindings/regulator/regulator.txt. - -The valid names for regulators are:: -BUCK: - buck_vdram1, buck_vcore, buck_vpa, buck_vproc11, buck_vproc12, buck_vgpu, - buck_vs2, buck_vmodem, buck_vs1 -LDO: - ldo_vdram2, ldo_vsim1, ldo_vibr, ldo_vrf12, ldo_vio18, ldo_vusb, ldo_vcamio, - ldo_vcamd, ldo_vcn18, ldo_vfe28, ldo_vsram_proc11, ldo_vcn28, ldo_vsram_others, - ldo_vsram_gpu, ldo_vxo22, ldo_vefuse, ldo_vaux18, ldo_vmch, ldo_vbif28, - ldo_vsram_proc12, ldo_vcama1, ldo_vemc, ldo_vio28, ldo_va12, ldo_vrf18, - ldo_vcn33, ldo_vcama2, ldo_vmc, ldo_vldo28, ldo_vaud28, ldo_vsim2 - -Example: - - pmic { - compatible = "mediatek,mt6358"; - - mt6358regulator: mt6358regulator { - compatible = "mediatek,mt6358-regulator"; - - mt6358_vdram1_reg: buck_vdram1 { - regulator-compatible = "buck_vdram1"; - regulator-name = "vdram1"; - regulator-min-microvolt = <500000>; - regulator-max-microvolt = <2087500>; - regulator-ramp-delay = <12500>; - regulator-enable-ramp-delay = <0>; - regulator-always-on; - }; - - mt6358_vcore_reg: buck_vcore { - regulator-name = "vcore"; - regulator-min-microvolt = <500000>; - regulator-max-microvolt = <1293750>; - regulator-ramp-delay = <6250>; - regulator-enable-ramp-delay = <200>; - regulator-always-on; - }; - - mt6358_vpa_reg: buck_vpa { - regulator-name = "vpa"; - regulator-min-microvolt = <500000>; - regulator-max-microvolt = <3650000>; - regulator-ramp-delay = <50000>; - regulator-enable-ramp-delay = <250>; - }; - - mt6358_vproc11_reg: buck_vproc11 { - regulator-name = "vproc11"; - regulator-min-microvolt = <500000>; - regulator-max-microvolt = <1293750>; - regulator-ramp-delay = <6250>; - regulator-enable-ramp-delay = <200>; - regulator-always-on; - }; - - mt6358_vproc12_reg: buck_vproc12 { - regulator-name = "vproc12"; - regulator-min-microvolt = <500000>; - regulator-max-microvolt = <1293750>; - regulator-ramp-delay = <6250>; - regulator-enable-ramp-delay = <200>; - regulator-always-on; - }; - - mt6358_vgpu_reg: buck_vgpu { - regulator-name = "vgpu"; - regulator-min-microvolt = <500000>; - regulator-max-microvolt = <1293750>; - regulator-ramp-delay = <6250>; - regulator-enable-ramp-delay = <200>; - }; - - mt6358_vs2_reg: buck_vs2 { - regulator-name = "vs2"; - regulator-min-microvolt = <500000>; - regulator-max-microvolt = <2087500>; - regulator-ramp-delay = <12500>; - regulator-enable-ramp-delay = <0>; - regulator-always-on; - }; - - mt6358_vmodem_reg: buck_vmodem { - regulator-name = "vmodem"; - regulator-min-microvolt = <500000>; - regulator-max-microvolt = <1293750>; - regulator-ramp-delay = <6250>; - regulator-enable-ramp-delay = <900>; - regulator-always-on; - }; - - mt6358_vs1_reg: buck_vs1 { - regulator-name = "vs1"; - regulator-min-microvolt = <1000000>; - regulator-max-microvolt = <2587500>; - regulator-ramp-delay = <12500>; - regulator-enable-ramp-delay = <0>; - regulator-always-on; - }; - - mt6358_vdram2_reg: ldo_vdram2 { - regulator-name = "vdram2"; - regulator-min-microvolt = <600000>; - regulator-max-microvolt = <1800000>; - regulator-enable-ramp-delay = <3300>; - }; - - mt6358_vsim1_reg: ldo_vsim1 { - regulator-name = "vsim1"; - regulator-min-microvolt = <1700000>; - regulator-max-microvolt = <3100000>; - regulator-enable-ramp-delay = <540>; - }; - - mt6358_vibr_reg: ldo_vibr { - regulator-name = "vibr"; - regulator-min-microvolt = <1200000>; - regulator-max-microvolt = <3300000>; - regulator-enable-ramp-delay = <60>; - }; - - mt6358_vrf12_reg: ldo_vrf12 { - compatible = "regulator-fixed"; - regulator-name = "vrf12"; - regulator-min-microvolt = <1200000>; - regulator-max-microvolt = <1200000>; - regulator-enable-ramp-delay = <120>; - }; - - mt6358_vio18_reg: ldo_vio18 { - compatible = "regulator-fixed"; - regulator-name = "vio18"; - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-enable-ramp-delay = <2700>; - regulator-always-on; - }; - - mt6358_vusb_reg: ldo_vusb { - regulator-name = "vusb"; - regulator-min-microvolt = <3000000>; - regulator-max-microvolt = <3100000>; - regulator-enable-ramp-delay = <270>; - regulator-always-on; - }; - - mt6358_vcamio_reg: ldo_vcamio { - compatible = "regulator-fixed"; - regulator-name = "vcamio"; - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-enable-ramp-delay = <270>; - }; - - mt6358_vcamd_reg: ldo_vcamd { - regulator-name = "vcamd"; - regulator-min-microvolt = <900000>; - regulator-max-microvolt = <1800000>; - regulator-enable-ramp-delay = <270>; - }; - - mt6358_vcn18_reg: ldo_vcn18 { - compatible = "regulator-fixed"; - regulator-name = "vcn18"; - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-enable-ramp-delay = <270>; - }; - - mt6358_vfe28_reg: ldo_vfe28 { - compatible = "regulator-fixed"; - regulator-name = "vfe28"; - regulator-min-microvolt = <2800000>; - regulator-max-microvolt = <2800000>; - regulator-enable-ramp-delay = <270>; - }; - - mt6358_vsram_proc11_reg: ldo_vsram_proc11 { - regulator-name = "vsram_proc11"; - regulator-min-microvolt = <500000>; - regulator-max-microvolt = <1293750>; - regulator-ramp-delay = <6250>; - regulator-enable-ramp-delay = <240>; - regulator-always-on; - }; - - mt6358_vcn28_reg: ldo_vcn28 { - compatible = "regulator-fixed"; - regulator-name = "vcn28"; - regulator-min-microvolt = <2800000>; - regulator-max-microvolt = <2800000>; - regulator-enable-ramp-delay = <270>; - }; - - mt6358_vsram_others_reg: ldo_vsram_others { - regulator-name = "vsram_others"; - regulator-min-microvolt = <500000>; - regulator-max-microvolt = <1293750>; - regulator-ramp-delay = <6250>; - regulator-enable-ramp-delay = <240>; - regulator-always-on; - }; - - mt6358_vsram_gpu_reg: ldo_vsram_gpu { - regulator-name = "vsram_gpu"; - regulator-min-microvolt = <500000>; - regulator-max-microvolt = <1293750>; - regulator-ramp-delay = <6250>; - regulator-enable-ramp-delay = <240>; - }; - - mt6358_vxo22_reg: ldo_vxo22 { - compatible = "regulator-fixed"; - regulator-name = "vxo22"; - regulator-min-microvolt = <2200000>; - regulator-max-microvolt = <2200000>; - regulator-enable-ramp-delay = <120>; - regulator-always-on; - }; - - mt6358_vefuse_reg: ldo_vefuse { - regulator-name = "vefuse"; - regulator-min-microvolt = <1700000>; - regulator-max-microvolt = <1900000>; - regulator-enable-ramp-delay = <270>; - }; - - mt6358_vaux18_reg: ldo_vaux18 { - compatible = "regulator-fixed"; - regulator-name = "vaux18"; - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-enable-ramp-delay = <270>; - }; - - mt6358_vmch_reg: ldo_vmch { - regulator-name = "vmch"; - regulator-min-microvolt = <2900000>; - regulator-max-microvolt = <3300000>; - regulator-enable-ramp-delay = <60>; - }; - - mt6358_vbif28_reg: ldo_vbif28 { - compatible = "regulator-fixed"; - regulator-name = "vbif28"; - regulator-min-microvolt = <2800000>; - regulator-max-microvolt = <2800000>; - regulator-enable-ramp-delay = <270>; - }; - - mt6358_vsram_proc12_reg: ldo_vsram_proc12 { - regulator-name = "vsram_proc12"; - regulator-min-microvolt = <500000>; - regulator-max-microvolt = <1293750>; - regulator-ramp-delay = <6250>; - regulator-enable-ramp-delay = <240>; - regulator-always-on; - }; - - mt6358_vcama1_reg: ldo_vcama1 { - regulator-name = "vcama1"; - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <3000000>; - regulator-enable-ramp-delay = <270>; - }; - - mt6358_vemc_reg: ldo_vemc { - regulator-name = "vemc"; - regulator-min-microvolt = <2900000>; - regulator-max-microvolt = <3300000>; - regulator-enable-ramp-delay = <60>; - regulator-always-on; - }; - - mt6358_vio28_reg: ldo_vio28 { - compatible = "regulator-fixed"; - regulator-name = "vio28"; - regulator-min-microvolt = <2800000>; - regulator-max-microvolt = <2800000>; - regulator-enable-ramp-delay = <270>; - }; - - mt6358_va12_reg: ldo_va12 { - compatible = "regulator-fixed"; - regulator-name = "va12"; - regulator-min-microvolt = <1200000>; - regulator-max-microvolt = <1200000>; - regulator-enable-ramp-delay = <270>; - regulator-always-on; - }; - - mt6358_vrf18_reg: ldo_vrf18 { - compatible = "regulator-fixed"; - regulator-name = "vrf18"; - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-enable-ramp-delay = <120>; - }; - - mt6358_vcn33_reg: ldo_vcn33 { - regulator-name = "vcn33"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3500000>; - regulator-enable-ramp-delay = <270>; - }; - - mt6358_vcama2_reg: ldo_vcama2 { - regulator-name = "vcama2"; - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <3000000>; - regulator-enable-ramp-delay = <270>; - }; - - mt6358_vmc_reg: ldo_vmc { - regulator-name = "vmc"; - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <3300000>; - regulator-enable-ramp-delay = <60>; - }; - - mt6358_vldo28_reg: ldo_vldo28 { - regulator-name = "vldo28"; - regulator-min-microvolt = <2800000>; - regulator-max-microvolt = <3000000>; - regulator-enable-ramp-delay = <270>; - }; - - mt6358_vaud28_reg: ldo_vaud28 { - compatible = "regulator-fixed"; - regulator-name = "vaud28"; - regulator-min-microvolt = <2800000>; - regulator-max-microvolt = <2800000>; - regulator-enable-ramp-delay = <270>; - }; - - mt6358_vsim2_reg: ldo_vsim2 { - regulator-name = "vsim2"; - regulator-min-microvolt = <1700000>; - regulator-max-microvolt = <3100000>; - regulator-enable-ramp-delay = <540>; - }; - }; - }; diff --git a/Documentation/devicetree/bindings/regulator/onnn,fan53880.yaml b/Documentation/devicetree/bindings/regulator/onnn,fan53880.yaml index eb61e04ef8..b5181719da 100644 --- a/Documentation/devicetree/bindings/regulator/onnn,fan53880.yaml +++ b/Documentation/devicetree/bindings/regulator/onnn,fan53880.yaml @@ -48,10 +48,12 @@ properties: "^LDO[1-4]$": type: object $ref: regulator.yaml# + unevaluatedProperties: false "^BUCK|BOOST$": type: object $ref: regulator.yaml# + unevaluatedProperties: false additionalProperties: false diff --git a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml index 127a6f39b7..acd37f28ef 100644 --- a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml @@ -50,6 +50,7 @@ description: | For PM8550, smps1 - smps6, ldo1 - ldo17, bob1 - bob2 For PM8998, smps1 - smps13, ldo1 - ldo28, lvs1 - lvs2 For PMI8998, bob + For PMC8380, smps1 - smps8, ldo1 - lodo3 For PMR735A, smps1 - smps3, ldo1 - ldo7 For PMX55, smps1 - smps7, ldo1 - ldo16 For PMX65, smps1 - smps8, ldo1 - ldo21 @@ -78,6 +79,7 @@ properties: - qcom,pm8998-rpmh-regulators - qcom,pmc8180-rpmh-regulators - qcom,pmc8180c-rpmh-regulators + - qcom,pmc8380-rpmh-regulators - qcom,pmg1110-rpmh-regulators - qcom,pmi8998-rpmh-regulators - qcom,pmm8155au-rpmh-regulators @@ -364,6 +366,16 @@ allOf: patternProperties: "^vdd-s([1-9]|1[0-3])-supply$": true + - if: + properties: + compatible: + enum: + - qcom,pmc8380-rpmh-regulators + then: + patternProperties: + "^vdd-l[1-3]-supply$": true + "^vdd-s[1-8]-supply$": true + - if: properties: compatible: diff --git a/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.yaml b/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.yaml index bdf34c2de9..7a1b7d2abb 100644 --- a/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.yaml @@ -17,12 +17,15 @@ properties: - qcom,pm660l-regulators - qcom,pm8004-regulators - qcom,pm8005-regulators + - qcom,pm8019-regulators - qcom,pm8226-regulators - qcom,pm8841-regulators + - qcom,pm8909-regulators - qcom,pm8916-regulators - qcom,pm8941-regulators - qcom,pm8950-regulators - qcom,pm8994-regulators + - qcom,pma8084-regulators - qcom,pmi8994-regulators - qcom,pmp8074-regulators - qcom,pms405-regulators @@ -32,7 +35,7 @@ properties: $ref: /schemas/types.yaml#/definitions/phandle patternProperties: - "^(5vs[1-2]|(l|s)[1-9][0-9]?|lvs[1-3])$": + "^(5vs[1-2]|(l|s)[1-9][0-9]?|lvs[1-4])$": description: List of regulators and its properties type: object $ref: regulator.yaml# @@ -176,6 +179,25 @@ allOf: patternProperties: "^vdd_s[1-4]-supply$": true + - if: + properties: + compatible: + contains: + enum: + - qcom,pm8019-regulators + then: + properties: + vdd_l1-supply: true + vdd_l2_l3-supply: true + vdd_l4_l5_l6-supply: true + vdd_l7_l8_l11-supply: true + vdd_l9-supply: true + vdd_l10-supply: true + vdd_l12-supply: true + vdd_l13_l14-supply: true + patternProperties: + "^vdd_s[1-4]-supply$": true + - if: properties: compatible: @@ -206,6 +228,24 @@ allOf: patternProperties: "^vdd_s[1-8]-supply$": true + - if: + properties: + compatible: + contains: + enum: + - qcom,pm8909-regulators + then: + properties: + vdd_s1-supply: true + vdd_s2-supply: true + vdd_l1-supply: true + vdd_l2_l5-supply: true + vdd_l3_l6_l10-supply: true + vdd_l4_l7-supply: true + vdd_l8_l11_l15_l18-supply: true + vdd_l9_l12_l14_l17-supply: true + vdd_l13-supply: true + - if: properties: compatible: @@ -295,6 +335,32 @@ allOf: patternProperties: "^vdd_s[1-9][0-2]?-supply$": true + - if: + properties: + compatible: + contains: + enum: + - qcom,pma8084-regulators + then: + properties: + vdd_l1_l11-supply: true + vdd_l2_l3_l4_l27-supply: true + vdd_l5_l7-supply: true + vdd_l6_l12_l14_l15_l26-supply: true + vdd_l8-supply: true + vdd_l9_l10_l13_l20_l23_l24-supply: true + vdd_l16_l25-supply: true + vdd_l17-supply: true + vdd_l18-supply: true + vdd_l19-supply: true + vdd_l21-supply: true + vdd_l22-supply: true + vdd_lvs1_2-supply: true + vdd_lvs3_4-supply: true + vdd_5vs1-supply: true + patternProperties: + "^vdd_s([1-9]|1[0-2])-supply$": true + - if: properties: compatible: diff --git a/Documentation/devicetree/bindings/remoteproc/mtk,scp.yaml b/Documentation/devicetree/bindings/remoteproc/mtk,scp.yaml index 895415772d..09102dda49 100644 --- a/Documentation/devicetree/bindings/remoteproc/mtk,scp.yaml +++ b/Documentation/devicetree/bindings/remoteproc/mtk,scp.yaml @@ -21,6 +21,7 @@ properties: - mediatek,mt8188-scp - mediatek,mt8192-scp - mediatek,mt8195-scp + - mediatek,mt8195-scp-dual reg: description: @@ -31,10 +32,7 @@ properties: reg-names: minItems: 2 - items: - - const: sram - - const: cfg - - const: l1tcm + maxItems: 3 clocks: description: @@ -58,6 +56,93 @@ properties: memory-region: maxItems: 1 + cros-ec-rpmsg: + $ref: /schemas/mfd/google,cros-ec.yaml + description: + This subnode represents the rpmsg device. The properties + of this node are defined by the individual bindings for + the rpmsg devices. + + required: + - mediatek,rpmsg-name + + unevaluatedProperties: false + + '#address-cells': + const: 1 + + '#size-cells': + const: 1 + + ranges: + description: + Standard ranges definition providing address translations for + local SCP SRAM address spaces to bus addresses. + +patternProperties: + "^scp@[a-f0-9]+$": + type: object + description: + The MediaTek SCP integrated to SoC might be a multi-core version. + The other cores are represented as child nodes of the boot core. + There are some integration differences for the IP like the usage of + address translator for translating SoC bus addresses into address space + for the processor. + + Each SCP core has own cache memory. The SRAM and L1TCM are shared by + cores. The power of cache, SRAM and L1TCM power should be enabled + before booting SCP cores. The size of cache, SRAM, and L1TCM are varied + on differnt SoCs. + + The SCP cores do not use an MMU, but has a set of registers to + control the translations between 32-bit CPU addresses into system bus + addresses. Cache and memory access settings are provided through a + Memory Protection Unit (MPU), programmable only from the SCP. + + properties: + compatible: + enum: + - mediatek,scp-core + + reg: + description: The base address and size of SRAM. + maxItems: 1 + + reg-names: + const: sram + + interrupts: + maxItems: 1 + + firmware-name: + $ref: /schemas/types.yaml#/definitions/string + description: + If present, name (or relative path) of the file within the + firmware search path containing the firmware image used when + initializing sub cores of multi-core SCP. + + memory-region: + maxItems: 1 + + cros-ec-rpmsg: + $ref: /schemas/mfd/google,cros-ec.yaml + description: + This subnode represents the rpmsg device. The properties + of this node are defined by the individual bindings for + the rpmsg devices. + + required: + - mediatek,rpmsg-name + + unevaluatedProperties: false + + required: + - compatible + - reg + - reg-names + + additionalProperties: false + required: - compatible - reg @@ -87,23 +172,39 @@ allOf: reg: maxItems: 2 reg-names: + items: + - const: sram + - const: cfg + - if: + properties: + compatible: + enum: + - mediatek,mt8192-scp + - mediatek,mt8195-scp + then: + properties: + reg: + maxItems: 3 + reg-names: + items: + - const: sram + - const: cfg + - const: l1tcm + - if: + properties: + compatible: + enum: + - mediatek,mt8195-scp-dual + then: + properties: + reg: maxItems: 2 + reg-names: + items: + - const: cfg + - const: l1tcm -additionalProperties: - type: object - description: - Subnodes of the SCP represent rpmsg devices. The names of the devices - are not important. The properties of these nodes are defined by the - individual bindings for the rpmsg devices. - properties: - mediatek,rpmsg-name: - $ref: /schemas/types.yaml#/definitions/string-array - description: - Contains the name for the rpmsg device. Used to match - the subnode to rpmsg device announced by SCP. - - required: - - mediatek,rpmsg-name +additionalProperties: false examples: - | @@ -118,7 +219,42 @@ examples: clocks = <&infracfg CLK_INFRA_SCPSYS>; clock-names = "main"; - cros_ec { + cros-ec-rpmsg { + compatible = "google,cros-ec-rpmsg"; mediatek,rpmsg-name = "cros-ec-rpmsg"; }; }; + + - | + scp@10500000 { + compatible = "mediatek,mt8195-scp-dual"; + reg = <0x10720000 0xe0000>, + <0x10700000 0x8000>; + reg-names = "cfg", "l1tcm"; + + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 0x10500000 0x100000>; + + scp@0 { + compatible = "mediatek,scp-core"; + reg = <0x0 0xa0000>; + reg-names = "sram"; + + cros-ec-rpmsg { + compatible = "google,cros-ec-rpmsg"; + mediatek,rpmsg-name = "cros-ec-rpmsg"; + }; + }; + + scp@a0000 { + compatible = "mediatek,scp-core"; + reg = <0xa0000 0x20000>; + reg-names = "sram"; + + cros-ec-rpmsg { + compatible = "google,cros-ec-rpmsg"; + mediatek,rpmsg-name = "cros-ec-rpmsg"; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml index a2b0079de0..661c2b425d 100644 --- a/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml +++ b/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml @@ -66,7 +66,9 @@ allOf: - qcom,msm8953-adsp-pil - qcom,msm8974-adsp-pil - qcom,msm8996-adsp-pil + - qcom,msm8996-slpi-pil - qcom,msm8998-adsp-pas + - qcom,msm8998-slpi-pas - qcom,sdm845-adsp-pas - qcom,sdm845-cdsp-pas - qcom,sdm845-slpi-pas @@ -79,24 +81,6 @@ allOf: items: - const: xo - - if: - properties: - compatible: - contains: - enum: - - qcom,msm8996-slpi-pil - - qcom,msm8998-slpi-pas - then: - properties: - clocks: - items: - - description: XO clock - - description: AGGRE2 clock - clock-names: - items: - - const: xo - - const: aggre2 - - if: properties: compatible: diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,msm8996-mss-pil.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,msm8996-mss-pil.yaml index 0643faae2c..971734085d 100644 --- a/Documentation/devicetree/bindings/remoteproc/qcom,msm8996-mss-pil.yaml +++ b/Documentation/devicetree/bindings/remoteproc/qcom,msm8996-mss-pil.yaml @@ -220,7 +220,6 @@ allOf: - description: GCC MSS GPLL0 clock - description: GCC MSS SNOC_AXI clock - description: GCC MSS MNOC_AXI clock - - description: RPM PNOC clock - description: RPM QDSS clock clock-names: items: @@ -231,7 +230,6 @@ allOf: - const: gpll0_mss - const: snoc_axi - const: mnoc_axi - - const: pnoc - const: qdss glink-edge: false required: diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,sc7180-pas.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,sc7180-pas.yaml index 689d5d5353..f10f329677 100644 --- a/Documentation/devicetree/bindings/remoteproc/qcom,sc7180-pas.yaml +++ b/Documentation/devicetree/bindings/remoteproc/qcom,sc7180-pas.yaml @@ -16,6 +16,7 @@ description: properties: compatible: enum: + - qcom,sc7180-adsp-pas - qcom,sc7180-mpss-pas - qcom,sc7280-mpss-pas @@ -30,26 +31,6 @@ properties: items: - const: xo - interrupts: - minItems: 6 - - interrupt-names: - minItems: 6 - - power-domains: - minItems: 2 - items: - - description: CX power domain - - description: MX power domain - - description: MSS power domain - - power-domain-names: - minItems: 2 - items: - - const: cx - - const: mx - - const: mss - memory-region: maxItems: 1 description: Reference to the reserved-memory for the Hexagon core @@ -71,6 +52,40 @@ required: allOf: - $ref: /schemas/remoteproc/qcom,pas-common.yaml# + - if: + properties: + compatible: + enum: + - qcom,sc7180-adsp-pas + then: + properties: + interrupts: + maxItems: 5 + interrupt-names: + maxItems: 5 + else: + properties: + interrupts: + minItems: 6 + interrupt-names: + minItems: 6 + + - if: + properties: + compatible: + enum: + - qcom,sc7180-adsp-pas + then: + properties: + power-domains: + items: + - description: LCX power domain + - description: LMX power domain + power-domain-names: + items: + - const: lcx + - const: lmx + - if: properties: compatible: @@ -79,15 +94,31 @@ allOf: then: properties: power-domains: - minItems: 3 + items: + - description: CX power domain + - description: MX power domain + - description: MSS power domain power-domain-names: - minItems: 3 - else: + items: + - const: cx + - const: mx + - const: mss + + - if: + properties: + compatible: + enum: + - qcom,sc7280-mpss-pas + then: properties: power-domains: - maxItems: 2 + items: + - description: CX power domain + - description: MX power domain power-domain-names: - maxItems: 2 + items: + - const: cx + - const: mx unevaluatedProperties: false diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,sm6375-pas.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,sm6375-pas.yaml new file mode 100644 index 0000000000..3e4a03eb45 --- /dev/null +++ b/Documentation/devicetree/bindings/remoteproc/qcom,sm6375-pas.yaml @@ -0,0 +1,145 @@ +# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/remoteproc/qcom,sm6375-pas.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm SM6375 Peripheral Authentication Service + +maintainers: + - Manivannan Sadhasivam + +description: + Qualcomm SM6375 SoC Peripheral Authentication Service loads and boots + firmware on the Qualcomm DSP Hexagon cores. + +properties: + compatible: + enum: + - qcom,sm6375-adsp-pas + - qcom,sm6375-cdsp-pas + - qcom,sm6375-mpss-pas + + reg: + maxItems: 1 + + clocks: + items: + - description: XO clock + + clock-names: + items: + - const: xo + + memory-region: + maxItems: 1 + description: Reference to the reserved-memory for the Hexagon core + + firmware-name: + $ref: /schemas/types.yaml#/definitions/string + description: Firmware name for the Hexagon core + + smd-edge: false + +required: + - compatible + - reg + +allOf: + - $ref: /schemas/remoteproc/qcom,pas-common.yaml# + - if: + properties: + compatible: + enum: + - qcom,sm6375-adsp-pas + - qcom,sm6375-cdsp-pas + then: + properties: + interrupts: + maxItems: 5 + interrupt-names: + maxItems: 5 + else: + properties: + interrupts: + minItems: 6 + interrupt-names: + minItems: 6 + + - if: + properties: + compatible: + enum: + - qcom,sm6375-adsp-pas + then: + properties: + power-domains: + items: + - description: LCX power domain + - description: LMX power domain + power-domain-names: + items: + - const: lcx + - const: lmx + + - if: + properties: + compatible: + enum: + - qcom,sm6375-cdsp-pas + - qcom,sm6375-mpss-pas + then: + properties: + power-domains: + items: + - description: CX power domain + power-domain-names: + items: + - const: cx + +unevaluatedProperties: false + +examples: + - | + #include + #include + #include + #include + + remoteproc_adsp: remoteproc@a400000 { + compatible = "qcom,sm6375-adsp-pas"; + reg = <0x0a400000 0x100>; + + interrupts-extended = <&intc GIC_SPI 282 IRQ_TYPE_LEVEL_HIGH>, + <&smp2p_adsp_in 0 IRQ_TYPE_EDGE_RISING>, + <&smp2p_adsp_in 1 IRQ_TYPE_EDGE_RISING>, + <&smp2p_adsp_in 2 IRQ_TYPE_EDGE_RISING>, + <&smp2p_adsp_in 3 IRQ_TYPE_EDGE_RISING>; + interrupt-names = "wdog", "fatal", "ready", + "handover", "stop-ack"; + + clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>; + clock-names = "xo"; + + power-domains = <&rpmpd SM6375_VDD_LPI_CX>, + <&rpmpd SM6375_VDD_LPI_MX>; + power-domain-names = "lcx", "lmx"; + + memory-region = <&pil_adsp_mem>; + + qcom,smem-states = <&smp2p_adsp_out 0>; + qcom,smem-state-names = "stop"; + + glink-edge { + interrupts-extended = <&ipcc IPCC_CLIENT_LPASS + IPCC_MPROC_SIGNAL_GLINK_QMP + IRQ_TYPE_EDGE_RISING>; + mboxes = <&ipcc IPCC_CLIENT_LPASS + IPCC_MPROC_SIGNAL_GLINK_QMP>; + + label = "lpass"; + qcom,remote-pid = <2>; + + /* ... */ + }; + }; diff --git a/Documentation/devicetree/bindings/remoteproc/renesas,rcar-rproc.yaml b/Documentation/devicetree/bindings/remoteproc/renesas,rcar-rproc.yaml index 4bea679a0f..5c280117dc 100644 --- a/Documentation/devicetree/bindings/remoteproc/renesas,rcar-rproc.yaml +++ b/Documentation/devicetree/bindings/remoteproc/renesas,rcar-rproc.yaml @@ -31,7 +31,7 @@ properties: remoteproc device. This is variable and describes the memories shared with the remote processor (e.g. remoteproc firmware and carveouts, rpmsg vrings, ...). - (see ../reserved-memory/reserved-memory.yaml) + (see reserved-memory/reserved-memory.yaml in dtschema project) required: - compatible diff --git a/Documentation/devicetree/bindings/remoteproc/ti,pru-rproc.yaml b/Documentation/devicetree/bindings/remoteproc/ti,pru-rproc.yaml index baccd98754..faf16cf140 100644 --- a/Documentation/devicetree/bindings/remoteproc/ti,pru-rproc.yaml +++ b/Documentation/devicetree/bindings/remoteproc/ti,pru-rproc.yaml @@ -66,6 +66,17 @@ properties: Should contain the name of the default firmware image file located on the firmware search path. + interrupts: + maxItems: 1 + description: + Interrupt specifiers enable the virtio/rpmsg communication between MPU + and the PRU/RTU cores. For the values of the interrupt cells please refer + to interrupt-controller/ti,pruss-intc.yaml schema. + + interrupt-names: + items: + - const: vring + if: properties: compatible: @@ -171,6 +182,9 @@ examples: <0x22400 0x100>; reg-names = "iram", "control", "debug"; firmware-name = "am65x-pru0_0-fw"; + interrupt-parent = <&icssg0_intc>; + interrupts = <16 2 2>; + interrupt-names = "vring"; }; rtu0_0: rtu@4000 { @@ -180,6 +194,9 @@ examples: <0x23400 0x100>; reg-names = "iram", "control", "debug"; firmware-name = "am65x-rtu0_0-fw"; + interrupt-parent = <&icssg0_intc>; + interrupts = <20 4 4>; + interrupt-names = "vring"; }; tx_pru0_0: txpru@a000 { @@ -198,6 +215,9 @@ examples: <0x24400 0x100>; reg-names = "iram", "control", "debug"; firmware-name = "am65x-pru0_1-fw"; + interrupt-parent = <&icssg0_intc>; + interrupts = <18 3 3>; + interrupt-names = "vring"; }; rtu0_1: rtu@6000 { @@ -207,6 +227,9 @@ examples: <0x23c00 0x100>; reg-names = "iram", "control", "debug"; firmware-name = "am65x-rtu0_1-fw"; + interrupt-parent = <&icssg0_intc>; + interrupts = <22 5 5>; + interrupt-names = "vring"; }; tx_pru0_1: txpru@c000 { diff --git a/Documentation/devicetree/bindings/reserved-memory/framebuffer.yaml b/Documentation/devicetree/bindings/reserved-memory/framebuffer.yaml deleted file mode 100644 index 851ec24d61..0000000000 --- a/Documentation/devicetree/bindings/reserved-memory/framebuffer.yaml +++ /dev/null @@ -1,52 +0,0 @@ -# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) -%YAML 1.2 ---- -$id: http://devicetree.org/schemas/reserved-memory/framebuffer.yaml# -$schema: http://devicetree.org/meta-schemas/core.yaml# - -title: /reserved-memory framebuffer node - -maintainers: - - devicetree-spec@vger.kernel.org - -allOf: - - $ref: reserved-memory.yaml - -properties: - compatible: - const: framebuffer - description: > - This indicates a region of memory meant to be used as a framebuffer for - a set of display devices. It can be used by an operating system to keep - the framebuffer from being overwritten and use it as the backing memory - for a display device (such as simple-framebuffer). - -unevaluatedProperties: false - -examples: - - | - / { - compatible = "foo"; - model = "foo"; - #address-cells = <1>; - #size-cells = <1>; - - chosen { - framebuffer { - compatible = "simple-framebuffer"; - memory-region = <&fb>; - }; - }; - - reserved-memory { - #address-cells = <1>; - #size-cells = <1>; - ranges; - - fb: framebuffer@80000000 { - compatible = "framebuffer"; - reg = <0x80000000 0x007e9000>; - }; - }; - }; -... diff --git a/Documentation/devicetree/bindings/reserved-memory/memory-region.yaml b/Documentation/devicetree/bindings/reserved-memory/memory-region.yaml deleted file mode 100644 index 592f180e6b..0000000000 --- a/Documentation/devicetree/bindings/reserved-memory/memory-region.yaml +++ /dev/null @@ -1,40 +0,0 @@ -# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) -%YAML 1.2 ---- -$id: http://devicetree.org/schemas/reserved-memory/memory-region.yaml# -$schema: http://devicetree.org/meta-schemas/core.yaml# - -title: Reserved Memory Region - -maintainers: - - devicetree-spec@vger.kernel.org - -description: | - Regions in the /reserved-memory node may be referenced by other device - nodes by adding a memory-region property to the device node. - -select: true - -properties: - memory-region: - $ref: /schemas/types.yaml#/definitions/phandle-array - description: > - Phandle to a /reserved-memory child node assigned to the device. - - memory-region-names: - $ref: /schemas/types.yaml#/definitions/string-array - description: > - A list of names, one for each corresponding entry in the - memory-region property - -additionalProperties: true - -examples: - - | - fb0: video@12300000 { - /* ... */ - reg = <0x12300000 0x1000>; - memory-region = <&display_reserved>; - }; - -... diff --git a/Documentation/devicetree/bindings/reserved-memory/qcom,rmtfs-mem.yaml b/Documentation/devicetree/bindings/reserved-memory/qcom,rmtfs-mem.yaml index bab982f004..46407e9c1d 100644 --- a/Documentation/devicetree/bindings/reserved-memory/qcom,rmtfs-mem.yaml +++ b/Documentation/devicetree/bindings/reserved-memory/qcom,rmtfs-mem.yaml @@ -26,6 +26,17 @@ properties: description: > identifier of the client to use this region for buffers + qcom,use-guard-pages: + type: boolean + description: > + Indicates that the firmware, or hardware, does not gracefully handle + memory protection of this region when placed adjacent to other protected + memory regions, and that padding around the used portion of the memory + region is necessary. + + When this is set, the first and last page should be left unused, and the + effective size of the region will thereby shrink with two pages. + qcom,vmid: $ref: /schemas/types.yaml#/definitions/uint32-array description: > diff --git a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt index 1810701a85..8ce72996d5 100644 --- a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt +++ b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt @@ -1 +1 @@ -This file has been moved to reserved-memory.yaml. +This file has been moved to reserved-memory.yaml in the dtschema repository. diff --git a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.yaml b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.yaml deleted file mode 100644 index c680e397cf..0000000000 --- a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.yaml +++ /dev/null @@ -1,181 +0,0 @@ -# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) -%YAML 1.2 ---- -$id: http://devicetree.org/schemas/reserved-memory/reserved-memory.yaml# -$schema: http://devicetree.org/meta-schemas/core.yaml# - -title: /reserved-memory Child Node Common - -maintainers: - - devicetree-spec@vger.kernel.org - -description: > - Reserved memory is specified as a node under the /reserved-memory node. The - operating system shall exclude reserved memory from normal usage one can - create child nodes describing particular reserved (excluded from normal use) - memory regions. Such memory regions are usually designed for the special - usage by various device drivers. - - Each child of the reserved-memory node specifies one or more regions - of reserved memory. Each child node may either use a 'reg' property to - specify a specific range of reserved memory, or a 'size' property with - optional constraints to request a dynamically allocated block of - memory. - - Following the generic-names recommended practice, node names should - reflect the purpose of the node (ie. "framebuffer" or "dma-pool"). - Unit address (@
) should be appended to the name if the node - is a static allocation. - -properties: - reg: true - - size: - oneOf: - - $ref: /schemas/types.yaml#/definitions/uint32 - - $ref: /schemas/types.yaml#/definitions/uint64 - description: > - Length based on parent's \#size-cells. Size in bytes of memory to - reserve. - - alignment: - oneOf: - - $ref: /schemas/types.yaml#/definitions/uint32 - - $ref: /schemas/types.yaml#/definitions/uint64 - description: > - Length based on parent's \#size-cells. Address boundary for - alignment of allocation. - - alloc-ranges: - $ref: /schemas/types.yaml#/definitions/uint32-array - description: > - Address and Length pairs. Specifies regions of memory that are - acceptable to allocate from. - - iommu-addresses: - $ref: /schemas/types.yaml#/definitions/phandle-array - description: > - A list of phandle and specifier pairs that describe static IO virtual - address space mappings and carveouts associated with a given reserved - memory region. The phandle in the first cell refers to the device for - which the mapping or carveout is to be created. - - The specifier consists of an address/size pair and denotes the IO - virtual address range of the region for the given device. The exact - format depends on the values of the "#address-cells" and "#size-cells" - properties of the device referenced via the phandle. - - When used in combination with a "reg" property, an IOVA mapping is to - be established for this memory region. One example where this can be - useful is to create an identity mapping for physical memory that the - firmware has configured some hardware to access (such as a bootsplash - framebuffer). - - If no "reg" property is specified, the "iommu-addresses" property - defines carveout regions in the IOVA space for the given device. This - can be useful if a certain memory region should not be mapped through - the IOMMU. - - no-map: - type: boolean - description: > - Indicates the operating system must not create a virtual mapping - of the region as part of its standard mapping of system memory, - nor permit speculative access to it under any circumstances other - than under the control of the device driver using the region. - - reusable: - type: boolean - description: > - The operating system can use the memory in this region with the - limitation that the device driver(s) owning the region need to be - able to reclaim it back. Typically that means that the operating - system can use that region to store volatile or cached data that - can be otherwise regenerated or migrated elsewhere. - -allOf: - - if: - required: - - no-map - - then: - not: - required: - - reusable - - - if: - required: - - reusable - - then: - not: - required: - - no-map - -oneOf: - - oneOf: - - required: - - reg - - - required: - - size - - - oneOf: - # IOMMU reservations - - required: - - iommu-addresses - - # IOMMU mappings - - required: - - reg - - iommu-addresses - -additionalProperties: true - -examples: - - | - / { - compatible = "foo"; - model = "foo"; - - #address-cells = <2>; - #size-cells = <2>; - - reserved-memory { - #address-cells = <2>; - #size-cells = <2>; - ranges; - - adsp_resv: reservation-adsp { - /* - * Restrict IOVA mappings for ADSP buffers to the 512 MiB region - * from 0x40000000 - 0x5fffffff. Anything outside is reserved by - * the ADSP for I/O memory and private memory allocations. - */ - iommu-addresses = <&adsp 0x0 0x00000000 0x00 0x40000000>, - <&adsp 0x0 0x60000000 0xff 0xa0000000>; - }; - - fb: framebuffer@90000000 { - reg = <0x0 0x90000000 0x0 0x00800000>; - iommu-addresses = <&dc0 0x0 0x90000000 0x0 0x00800000>; - }; - }; - - bus@0 { - #address-cells = <1>; - #size-cells = <1>; - ranges = <0x0 0x0 0x0 0x40000000>; - - adsp: adsp@2990000 { - reg = <0x2990000 0x2000>; - memory-region = <&adsp_resv>; - }; - - dc0: display@15200000 { - reg = <0x15200000 0x10000>; - memory-region = <&fb>; - }; - }; - }; -... diff --git a/Documentation/devicetree/bindings/reserved-memory/shared-dma-pool.yaml b/Documentation/devicetree/bindings/reserved-memory/shared-dma-pool.yaml deleted file mode 100644 index 457de0920c..0000000000 --- a/Documentation/devicetree/bindings/reserved-memory/shared-dma-pool.yaml +++ /dev/null @@ -1,97 +0,0 @@ -# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) -%YAML 1.2 ---- -$id: http://devicetree.org/schemas/reserved-memory/shared-dma-pool.yaml# -$schema: http://devicetree.org/meta-schemas/core.yaml# - -title: /reserved-memory DMA pool - -maintainers: - - devicetree-spec@vger.kernel.org - -allOf: - - $ref: reserved-memory.yaml - -properties: - compatible: - oneOf: - - const: shared-dma-pool - description: > - This indicates a region of memory meant to be used as a shared - pool of DMA buffers for a set of devices. It can be used by an - operating system to instantiate the necessary pool management - subsystem if necessary. - - - const: restricted-dma-pool - description: > - This indicates a region of memory meant to be used as a pool - of restricted DMA buffers for a set of devices. The memory - region would be the only region accessible to those devices. - When using this, the no-map and reusable properties must not - be set, so the operating system can create a virtual mapping - that will be used for synchronization. The main purpose for - restricted DMA is to mitigate the lack of DMA access control - on systems without an IOMMU, which could result in the DMA - accessing the system memory at unexpected times and/or - unexpected addresses, possibly leading to data leakage or - corruption. The feature on its own provides a basic level of - protection against the DMA overwriting buffer contents at - unexpected times. However, to protect against general data - leakage and system memory corruption, the system needs to - provide way to lock down the memory access, e.g., MPU. Note - that since coherent allocation needs remapping, one must set - up another device coherent pool by shared-dma-pool and use - dma_alloc_from_dev_coherent instead for atomic coherent - allocation. - - linux,cma-default: - type: boolean - description: > - If this property is present, then Linux will use the region for - the default pool of the contiguous memory allocator. - - linux,dma-default: - type: boolean - description: > - If this property is present, then Linux will use the region for - the default pool of the consistent DMA allocator. - -if: - properties: - compatible: - contains: - const: restricted-dma-pool -then: - properties: - no-map: false - reusable: false - -unevaluatedProperties: false - -examples: - - | - reserved-memory { - #address-cells = <1>; - #size-cells = <1>; - ranges; - - /* global autoconfigured region for contiguous allocations */ - linux,cma { - compatible = "shared-dma-pool"; - reusable; - size = <0x4000000>; - alignment = <0x2000>; - linux,cma-default; - }; - - display_reserved: framebuffer@78000000 { - reg = <0x78000000 0x800000>; - }; - - restricted_dma_reserved: restricted-dma-pool@50000000 { - compatible = "restricted-dma-pool"; - reg = <0x50000000 0x4000000>; - }; - }; - -... diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml index 97e8441eda..f392e367d6 100644 --- a/Documentation/devicetree/bindings/riscv/cpus.yaml +++ b/Documentation/devicetree/bindings/riscv/cpus.yaml @@ -47,6 +47,7 @@ properties: - sifive,u74-mc - thead,c906 - thead,c910 + - thead,c920 - const: riscv - items: - enum: diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml index cc1f546fdb..c91ab0e466 100644 --- a/Documentation/devicetree/bindings/riscv/extensions.yaml +++ b/Documentation/devicetree/bindings/riscv/extensions.yaml @@ -128,6 +128,12 @@ properties: changes to interrupts as frozen at commit ccbddab ("Merge pull request #42 from riscv/jhauser-2023-RC4") of riscv-aia. + - const: smstateen + description: | + The standard Smstateen extension for controlling access to CSRs + added by other RISC-V extensions in H/S/VS/U/VU modes and as + ratified at commit a28bfae (Ratified (#7)) of riscv-state-enable. + - const: ssaia description: | The standard Ssaia supervisor-level extension for the advanced @@ -212,6 +218,12 @@ properties: ratified in the 20191213 version of the unprivileged ISA specification. + - const: zicond + description: + The standard Zicond extension for conditional arithmetic and + conditional-select/move operations as ratified in commit 95cf1f9 + ("Add changes requested by Ved during signoff") of riscv-zicond. + - const: zicsr description: | The standard Zicsr extension for control and status register diff --git a/Documentation/devicetree/bindings/riscv/sophgo.yaml b/Documentation/devicetree/bindings/riscv/sophgo.yaml new file mode 100644 index 0000000000..86748c5390 --- /dev/null +++ b/Documentation/devicetree/bindings/riscv/sophgo.yaml @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/riscv/sophgo.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Sophgo SoC-based boards + +maintainers: + - Chao Wei + - Chen Wang + +description: + Sophgo SoC-based boards + +properties: + $nodename: + const: '/' + compatible: + oneOf: + - items: + - enum: + - milkv,duo + - const: sophgo,cv1800b + - items: + - enum: + - milkv,pioneer + - const: sophgo,sg2042 + +additionalProperties: true + +... diff --git a/Documentation/devicetree/bindings/rng/amlogic,meson-rng.yaml b/Documentation/devicetree/bindings/rng/amlogic,meson-rng.yaml index 457a6e43d8..afa52af442 100644 --- a/Documentation/devicetree/bindings/rng/amlogic,meson-rng.yaml +++ b/Documentation/devicetree/bindings/rng/amlogic,meson-rng.yaml @@ -14,6 +14,7 @@ properties: compatible: enum: - amlogic,meson-rng + - amlogic,meson-s4-rng reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/rng/st,stm32-rng.yaml b/Documentation/devicetree/bindings/rng/st,stm32-rng.yaml index 187b172d0c..717f6b321f 100644 --- a/Documentation/devicetree/bindings/rng/st,stm32-rng.yaml +++ b/Documentation/devicetree/bindings/rng/st,stm32-rng.yaml @@ -15,7 +15,9 @@ maintainers: properties: compatible: - const: st,stm32-rng + enum: + - st,stm32-rng + - st,stm32mp13-rng reg: maxItems: 1 @@ -30,11 +32,27 @@ properties: type: boolean description: If set enable the clock detection management + st,rng-lock-conf: + type: boolean + description: If set, the RNG configuration in RNG_CR, RNG_HTCR and + RNG_NSCR will be locked. + required: - compatible - reg - clocks +allOf: + - if: + properties: + compatible: + contains: + enum: + - st,stm32-rng + then: + properties: + st,rng-lock-conf: false + additionalProperties: false examples: diff --git a/Documentation/devicetree/bindings/rtc/amlogic,meson6-rtc.yaml b/Documentation/devicetree/bindings/rtc/amlogic,meson6-rtc.yaml index 8bf7d3a9be..3a4551253e 100644 --- a/Documentation/devicetree/bindings/rtc/amlogic,meson6-rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/amlogic,meson6-rtc.yaml @@ -13,6 +13,7 @@ maintainers: allOf: - $ref: rtc.yaml# - $ref: /schemas/nvmem/nvmem.yaml# + - $ref: /schemas/nvmem/nvmem-deprecated-cells.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/rtc/cirrus,ep9301-rtc.yaml b/Documentation/devicetree/bindings/rtc/cirrus,ep9301-rtc.yaml new file mode 100644 index 0000000000..a95f6af2b1 --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/cirrus,ep9301-rtc.yaml @@ -0,0 +1,42 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/rtc/cirrus,ep9301-rtc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Cirrus EP93xx Real Time Clock controller + +maintainers: + - Hartley Sweeten + - Alexander Sverdlin + +allOf: + - $ref: rtc.yaml# + +properties: + compatible: + oneOf: + - const: cirrus,ep9301-rtc + - items: + - enum: + - cirrus,ep9302-rtc + - cirrus,ep9307-rtc + - cirrus,ep9312-rtc + - cirrus,ep9315-rtc + - const: cirrus,ep9301-rtc + + reg: + maxItems: 1 + +required: + - compatible + - reg + +unevaluatedProperties: false + +examples: + - | + rtc@80920000 { + compatible = "cirrus,ep9301-rtc"; + reg = <0x80920000 0x100>; + }; diff --git a/Documentation/devicetree/bindings/rtc/epson,rtc7301.txt b/Documentation/devicetree/bindings/rtc/epson,rtc7301.txt deleted file mode 100644 index 5f9df3f146..0000000000 --- a/Documentation/devicetree/bindings/rtc/epson,rtc7301.txt +++ /dev/null @@ -1,16 +0,0 @@ -EPSON TOYOCOM RTC-7301SF/DG - -Required properties: - -- compatible: Should be "epson,rtc7301sf" or "epson,rtc7301dg" -- reg: Specifies base physical address and size of the registers. -- interrupts: A single interrupt specifier. - -Example: - -rtc: rtc@44a00000 { - compatible = "epson,rtc7301dg"; - reg = <0x44a00000 0x10000>; - interrupt-parent = <&axi_intc_0>; - interrupts = <3 2>; -}; diff --git a/Documentation/devicetree/bindings/rtc/epson,rtc7301.yaml b/Documentation/devicetree/bindings/rtc/epson,rtc7301.yaml new file mode 100644 index 0000000000..bdb5cadb31 --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/epson,rtc7301.yaml @@ -0,0 +1,51 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/rtc/epson,rtc7301.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Epson Toyocom RTC-7301SF/DG + +description: + The only difference between the two variants is the packaging. + The DG variant is a DIL package, and the SF variant is a flat + package. + +maintainers: + - Akinobu Mita + +properties: + compatible: + enum: + - epson,rtc7301dg + - epson,rtc7301sf + + reg: + maxItems: 1 + + reg-io-width: + description: + The size (in bytes) of the IO accesses that should be performed + on the device. + enum: [1, 4] + default: 4 + + interrupts: + maxItems: 1 + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + #include + rtc: rtc@44a00000 { + compatible = "epson,rtc7301dg"; + reg = <0x44a00000 0x10000>; + reg-io-width = <4>; + interrupt-parent = <&axi_intc_0>; + interrupts = <3 2>; + }; diff --git a/Documentation/devicetree/bindings/rtc/maxim,mcp795.txt b/Documentation/devicetree/bindings/rtc/maxim,mcp795.txt deleted file mode 100644 index a59fdd8c23..0000000000 --- a/Documentation/devicetree/bindings/rtc/maxim,mcp795.txt +++ /dev/null @@ -1,11 +0,0 @@ -* Maxim MCP795 SPI Serial Real-Time Clock - -Required properties: -- compatible: Should contain "maxim,mcp795". -- reg: SPI address for chip - -Example: - mcp795: rtc@0 { - compatible = "maxim,mcp795"; - reg = <0>; - }; diff --git a/Documentation/devicetree/bindings/rtc/microcrystal,rv3032.yaml b/Documentation/devicetree/bindings/rtc/microcrystal,rv3032.yaml index 27a9de10f0..7680089d1d 100644 --- a/Documentation/devicetree/bindings/rtc/microcrystal,rv3032.yaml +++ b/Documentation/devicetree/bindings/rtc/microcrystal,rv3032.yaml @@ -38,6 +38,8 @@ properties: - 3000 - 4400 + wakeup-source: true + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/rtc/mstar,ssd202d-rtc.yaml b/Documentation/devicetree/bindings/rtc/mstar,ssd202d-rtc.yaml new file mode 100644 index 0000000000..4c1f22ef5a --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/mstar,ssd202d-rtc.yaml @@ -0,0 +1,35 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/rtc/mstar,ssd202d-rtc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Mstar SSD202D Real Time Clock + +maintainers: + - Daniel Palmer + - Romain Perier + +allOf: + - $ref: rtc.yaml# + +properties: + compatible: + enum: + - mstar,ssd202d-rtc + reg: + maxItems: 1 + +required: + - compatible + - reg + +unevaluatedProperties: false + +examples: + - | + rtc@6800 { + compatible = "mstar,ssd202d-rtc"; + reg = <0x6800 0x200>; + }; +... diff --git a/Documentation/devicetree/bindings/rtc/nxp,pcf2123.yaml b/Documentation/devicetree/bindings/rtc/nxp,pcf2123.yaml new file mode 100644 index 0000000000..96e377a4d1 --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/nxp,pcf2123.yaml @@ -0,0 +1,47 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/rtc/nxp,pcf2123.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NXP PCF2123 SPI Real Time Clock + +maintainers: + - Javier Carrasco + +allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml# + - $ref: rtc.yaml# + +properties: + compatible: + enum: + - nxp,pcf2123 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + +required: + - compatible + - reg + +unevaluatedProperties: false + +examples: + - | + #include + spi { + #address-cells = <1>; + #size-cells = <0>; + + rtc@3 { + compatible = "nxp,pcf2123"; + reg = <3>; + interrupts = ; + spi-cs-high; + }; + }; +... diff --git a/Documentation/devicetree/bindings/rtc/nxp,pcf8523.txt b/Documentation/devicetree/bindings/rtc/nxp,pcf8523.txt deleted file mode 100644 index 0b1080c60f..0000000000 --- a/Documentation/devicetree/bindings/rtc/nxp,pcf8523.txt +++ /dev/null @@ -1,18 +0,0 @@ -* NXP PCF8523 Real Time Clock - -Required properties: -- compatible: Should contain "nxp,pcf8523". -- reg: I2C address for chip. - -Optional property: -- quartz-load-femtofarads: The capacitive load of the quartz(x-tal), - expressed in femto Farad (fF). Valid values are 7000 and 12500. - Default value (if no value is specified) is 12500fF. - -Example: - -pcf8523: rtc@68 { - compatible = "nxp,pcf8523"; - reg = <0x68>; - quartz-load-femtofarads = <7000>; -}; diff --git a/Documentation/devicetree/bindings/rtc/nxp,pcf8523.yaml b/Documentation/devicetree/bindings/rtc/nxp,pcf8523.yaml new file mode 100644 index 0000000000..d11c8bc16b --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/nxp,pcf8523.yaml @@ -0,0 +1,45 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/rtc/nxp,pcf8523.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NXP PCF8523 Real Time Clock + +maintainers: + - Sam Ravnborg + +allOf: + - $ref: rtc.yaml# + +properties: + compatible: + const: nxp,pcf8523 + + reg: + maxItems: 1 + + quartz-load-femtofarads: + description: + The capacitive load of the crystal, expressed in femto Farad (fF). + enum: [ 7000, 12500 ] + default: 12500 + +required: + - compatible + - reg + +unevaluatedProperties: false + +examples: + - | + i2c { + #address-cells = <1>; + #size-cells = <0>; + + rtc@68 { + compatible = "nxp,pcf8523"; + reg = <0x68>; + quartz-load-femtofarads = <7000>; + }; + }; diff --git a/Documentation/devicetree/bindings/rtc/nxp,rtc-2123.txt b/Documentation/devicetree/bindings/rtc/nxp,rtc-2123.txt deleted file mode 100644 index 7371f525a6..0000000000 --- a/Documentation/devicetree/bindings/rtc/nxp,rtc-2123.txt +++ /dev/null @@ -1,17 +0,0 @@ -NXP PCF2123 SPI Real Time Clock - -Required properties: -- compatible: should be: "nxp,pcf2123" - or "microcrystal,rv2123" -- reg: should be the SPI slave chipselect address - -Optional properties: -- spi-cs-high: PCF2123 needs chipselect high - -Example: - -pcf2123: rtc@3 { - compatible = "nxp,pcf2123" - reg = <3> - spi-cs-high; -}; diff --git a/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml b/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml index 2a65f31ac5..c9e3c5262c 100644 --- a/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml @@ -45,6 +45,8 @@ properties: - isil,isl1208 # Intersil ISL1218 Low Power RTC with Battery Backed SRAM - isil,isl1218 + # SPI-BUS INTERFACE REAL TIME CLOCK MODULE + - maxim,mcp795 # Real Time Clock Module with I2C-Bus - microcrystal,rv3029 # Real Time Clock diff --git a/Documentation/devicetree/bindings/serial/esp,esp32-acm.yaml b/Documentation/devicetree/bindings/serial/esp,esp32-acm.yaml new file mode 100644 index 0000000000..77fbb2c721 --- /dev/null +++ b/Documentation/devicetree/bindings/serial/esp,esp32-acm.yaml @@ -0,0 +1,42 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause + +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/serial/esp,esp32-acm.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: ESP32S3 ACM gadget controller + +maintainers: + - Max Filippov + +description: + Fixed function USB CDC-ACM gadget controller of the Espressif ESP32S3 SoC. + +allOf: + - $ref: serial.yaml# + +properties: + compatible: + const: esp,esp32s3-acm + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + +required: + - compatible + - reg + - interrupts + +additionalProperties: false + +examples: + - | + serial@60038000 { + compatible = "esp,esp32s3-acm"; + reg = <0x60038000 0x1000>; + interrupts = <96 3 0>; + }; diff --git a/Documentation/devicetree/bindings/serial/esp,esp32-uart.yaml b/Documentation/devicetree/bindings/serial/esp,esp32-uart.yaml new file mode 100644 index 0000000000..2a80ca997a --- /dev/null +++ b/Documentation/devicetree/bindings/serial/esp,esp32-uart.yaml @@ -0,0 +1,51 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause + +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/serial/esp,esp32-uart.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: ESP32xx UART controllers + +maintainers: + - Max Filippov + +description: + ESP32 UART controller is a part of the ESP32 SoC. + ESP32S3 UART controller is a part of the ESP32S3 SoC. + Both SoCs are produced by Espressif Systems Co. Ltd. + +allOf: + - $ref: serial.yaml# + +properties: + compatible: + enum: + - esp,esp32-uart + - esp,esp32s3-uart + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + maxItems: 1 + +required: + - compatible + - reg + - interrupts + - clocks + +additionalProperties: false + +examples: + - | + serial@60000000 { + compatible = "esp,esp32s3-uart"; + reg = <0x60000000 0x80>; + interrupts = <27 1 0>; + clocks = <&serial_clk>; + }; diff --git a/Documentation/devicetree/bindings/serial/fsl-imx-uart.yaml b/Documentation/devicetree/bindings/serial/fsl-imx-uart.yaml index 40414247d6..8303555304 100644 --- a/Documentation/devicetree/bindings/serial/fsl-imx-uart.yaml +++ b/Documentation/devicetree/bindings/serial/fsl-imx-uart.yaml @@ -70,6 +70,8 @@ properties: interrupts: maxItems: 1 + wakeup-source: true + fsl,dte-mode: $ref: /schemas/types.yaml#/definitions/flag description: | diff --git a/Documentation/devicetree/bindings/serial/fsl-mxs-auart.yaml b/Documentation/devicetree/bindings/serial/fsl-mxs-auart.yaml index 6a400a5e6f..da032effff 100644 --- a/Documentation/devicetree/bindings/serial/fsl-mxs-auart.yaml +++ b/Documentation/devicetree/bindings/serial/fsl-mxs-auart.yaml @@ -14,10 +14,13 @@ allOf: properties: compatible: - enum: - - fsl,imx23-auart - - fsl,imx28-auart - - alphascale,asm9260-auart + oneOf: + - const: fsl,imx23-auart + - const: alphascale,asm9260-auart + - items: + - enum: + - fsl,imx28-auart + - const: fsl,imx23-auart reg: maxItems: 1 @@ -82,7 +85,7 @@ examples: }; auart0: serial@8006a000 { - compatible = "fsl,imx28-auart"; + compatible = "fsl,imx28-auart", "fsl,imx23-auart"; reg = <0x8006a000 0x2000>; interrupts = <112>; dmas = <&dma_apbx 8>, <&dma_apbx 9>; diff --git a/Documentation/devicetree/bindings/serial/maxim,max310x.txt b/Documentation/devicetree/bindings/serial/maxim,max310x.txt deleted file mode 100644 index 79e10a05a9..0000000000 --- a/Documentation/devicetree/bindings/serial/maxim,max310x.txt +++ /dev/null @@ -1,48 +0,0 @@ -* Maxim MAX310X advanced Universal Asynchronous Receiver-Transmitter (UART) - -Required properties: -- compatible: Should be one of the following: - - "maxim,max3107" for Maxim MAX3107, - - "maxim,max3108" for Maxim MAX3108, - - "maxim,max3109" for Maxim MAX3109, - - "maxim,max14830" for Maxim MAX14830. -- reg: SPI chip select number. -- interrupts: Specifies the interrupt source of the parent interrupt - controller. The format of the interrupt specifier depends on the - parent interrupt controller. -- clocks: phandle to the IC source clock. -- clock-names: Should be "xtal" if clock is an external crystal or - "osc" if an external clock source is used. - -Optional properties: -- gpio-controller: Marks the device node as a GPIO controller. -- #gpio-cells: Should be two. The first cell is the GPIO number and - the second cell is used to specify the GPIO polarity: - 0 = active high, - 1 = active low. - -Example: - -/ { - clocks { - spi_uart_clk: osc_max14830 { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <3686400>; - }; - - }; -}; - -&spi0 { - max14830: max14830@0 { - compatible = "maxim,max14830"; - reg = <0>; - clocks = <&spi_uart_clk>; - clock-names = "osc"; - interrupt-parent = <&gpio3>; - interrupts = <7 IRQ_TYPE_LEVEL_LOW>; - gpio-controller; - #gpio-cells = <2>; - }; -}; diff --git a/Documentation/devicetree/bindings/serial/maxim,max310x.yaml b/Documentation/devicetree/bindings/serial/maxim,max310x.yaml new file mode 100644 index 0000000000..889eeaca64 --- /dev/null +++ b/Documentation/devicetree/bindings/serial/maxim,max310x.yaml @@ -0,0 +1,74 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/serial/maxim,max310x.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Maxim MAX310X Advanced Universal Asynchronous Receiver-Transmitter (UART) + +maintainers: + - Hugo Villeneuve + +properties: + compatible: + enum: + - maxim,max3107 + - maxim,max3108 + - maxim,max3109 + - maxim,max14830 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + maxItems: 1 + + clock-names: + enum: + - xtal # External crystal + - osc # External clock source + + gpio-controller: true + + "#gpio-cells": + const: 2 + + gpio-line-names: + minItems: 1 + maxItems: 16 + +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + +allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml# + - $ref: /schemas/serial/serial.yaml# + - $ref: /schemas/serial/rs485.yaml# + +unevaluatedProperties: false + +examples: + - | + #include + i2c { + #address-cells = <1>; + #size-cells = <0>; + + serial@2c { + compatible = "maxim,max3107"; + reg = <0x2c>; + clocks = <&xtal4m>; + clock-names = "xtal"; + interrupt-parent = <&gpio3>; + interrupts = <7 IRQ_TYPE_LEVEL_LOW>; + gpio-controller; + #gpio-cells = <2>; + }; + }; diff --git a/Documentation/devicetree/bindings/serial/nvidia,tegra20-hsuart.yaml b/Documentation/devicetree/bindings/serial/nvidia,tegra20-hsuart.yaml index 04d55fecf4..a5d67563cd 100644 --- a/Documentation/devicetree/bindings/serial/nvidia,tegra20-hsuart.yaml +++ b/Documentation/devicetree/bindings/serial/nvidia,tegra20-hsuart.yaml @@ -91,11 +91,6 @@ properties: - description: range upper bound - description: adjustment (in permyriad, i.e. 0.01%) -allOf: - - $ref: serial.yaml - -unevaluatedProperties: false - required: - compatible - reg @@ -106,6 +101,11 @@ required: - dmas - dma-names +allOf: + - $ref: serial.yaml + +unevaluatedProperties: false + examples: - | #include diff --git a/Documentation/devicetree/bindings/serial/nxp,sc16is7xx.txt b/Documentation/devicetree/bindings/serial/nxp,sc16is7xx.txt deleted file mode 100644 index 1a7e4bff04..0000000000 --- a/Documentation/devicetree/bindings/serial/nxp,sc16is7xx.txt +++ /dev/null @@ -1,118 +0,0 @@ -* NXP SC16IS7xx advanced Universal Asynchronous Receiver-Transmitter (UART) -* i2c as bus - -Required properties: -- compatible: Should be one of the following: - - "nxp,sc16is740" for NXP SC16IS740, - - "nxp,sc16is741" for NXP SC16IS741, - - "nxp,sc16is750" for NXP SC16IS750, - - "nxp,sc16is752" for NXP SC16IS752, - - "nxp,sc16is760" for NXP SC16IS760, - - "nxp,sc16is762" for NXP SC16IS762. -- reg: I2C address of the SC16IS7xx device. -- interrupts: Should contain the UART interrupt -- clocks: Reference to the IC source clock. - OR (when there is no clock provider visible to the platform) -- clock-frequency: The source clock frequency for the IC. - -Optional properties: -- gpio-controller: Marks the device node as a GPIO controller. -- #gpio-cells: Should be two. The first cell is the GPIO number and - the second cell is used to specify the GPIO polarity: - 0 = active high, - 1 = active low. -- irda-mode-ports: An array that lists the indices of the port that - should operate in IrDA mode. -- nxp,modem-control-line-ports: An array that lists the indices of the port that - should have shared GPIO lines configured as - modem control lines. - -Example: - sc16is750: sc16is750@51 { - compatible = "nxp,sc16is750"; - reg = <0x51>; - clocks = <&clk20m>; - interrupt-parent = <&gpio3>; - interrupts = <7 IRQ_TYPE_EDGE_FALLING>; - gpio-controller; - #gpio-cells = <2>; - }; - - sc16is752: sc16is752@53 { - compatible = "nxp,sc16is752"; - reg = <0x53>; - clocks = <&clk20m>; - interrupt-parent = <&gpio3>; - interrupts = <7 IRQ_TYPE_EDGE_FALLING>; - nxp,modem-control-line-ports = <1>; /* Port 1 as modem control lines */ - gpio-controller; /* Port 0 as GPIOs */ - #gpio-cells = <2>; - }; - - sc16is752: sc16is752@54 { - compatible = "nxp,sc16is752"; - reg = <0x54>; - clocks = <&clk20m>; - interrupt-parent = <&gpio3>; - interrupts = <7 IRQ_TYPE_EDGE_FALLING>; - nxp,modem-control-line-ports = <0 1>; /* Ports 0 and 1 as modem control lines */ - }; - -* spi as bus - -Required properties: -- compatible: Should be one of the following: - - "nxp,sc16is740" for NXP SC16IS740, - - "nxp,sc16is741" for NXP SC16IS741, - - "nxp,sc16is750" for NXP SC16IS750, - - "nxp,sc16is752" for NXP SC16IS752, - - "nxp,sc16is760" for NXP SC16IS760, - - "nxp,sc16is762" for NXP SC16IS762. -- reg: SPI chip select number. -- interrupts: Specifies the interrupt source of the parent interrupt - controller. The format of the interrupt specifier depends on the - parent interrupt controller. -- clocks: phandle to the IC source clock. - -Optional properties: -- gpio-controller: Marks the device node as a GPIO controller. -- #gpio-cells: Should be two. The first cell is the GPIO number and - the second cell is used to specify the GPIO polarity: - 0 = active high, - 1 = active low. -- irda-mode-ports: An array that lists the indices of the port that - should operate in IrDA mode. -- nxp,modem-control-line-ports: An array that lists the indices of the port that - should have shared GPIO lines configured as - modem control lines. - -Example: - sc16is750: sc16is750@0 { - compatible = "nxp,sc16is750"; - reg = <0>; - clocks = <&clk20m>; - interrupt-parent = <&gpio3>; - interrupts = <7 IRQ_TYPE_EDGE_FALLING>; - gpio-controller; - #gpio-cells = <2>; - }; - - sc16is752: sc16is752@1 { - compatible = "nxp,sc16is752"; - reg = <1>; - clocks = <&clk20m>; - interrupt-parent = <&gpio3>; - interrupts = <7 IRQ_TYPE_EDGE_FALLING>; - nxp,modem-control-line-ports = <1>; /* Port 1 as modem control lines */ - gpio-controller; /* Port 0 as GPIOs */ - #gpio-cells = <2>; - }; - - sc16is752: sc16is752@2 { - compatible = "nxp,sc16is752"; - reg = <2>; - clocks = <&clk20m>; - interrupt-parent = <&gpio3>; - interrupts = <7 IRQ_TYPE_EDGE_FALLING>; - nxp,modem-control-line-ports = <0 1>; /* Ports 0 and 1 as modem control lines */ - }; diff --git a/Documentation/devicetree/bindings/serial/nxp,sc16is7xx.yaml b/Documentation/devicetree/bindings/serial/nxp,sc16is7xx.yaml new file mode 100644 index 0000000000..5dec15b7e7 --- /dev/null +++ b/Documentation/devicetree/bindings/serial/nxp,sc16is7xx.yaml @@ -0,0 +1,127 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/serial/nxp,sc16is7xx.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NXP SC16IS7xx Advanced Universal Asynchronous Receiver-Transmitter (UART) + +maintainers: + - Hugo Villeneuve + +properties: + compatible: + enum: + - nxp,sc16is740 + - nxp,sc16is741 + - nxp,sc16is750 + - nxp,sc16is752 + - nxp,sc16is760 + - nxp,sc16is762 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + maxItems: 1 + + clock-frequency: + description: + When there is no clock provider visible to the platform, this + is the source crystal or external clock frequency for the IC in Hz. + minimum: 1 + maximum: 80000000 + + gpio-controller: true + + "#gpio-cells": + const: 2 + + gpio-line-names: + minItems: 1 + maxItems: 8 + + irda-mode-ports: + description: | + An array that lists the indices of the port that should operate in IrDA + mode: + 0: port A + 1: port B + $ref: /schemas/types.yaml#/definitions/uint32-array + minItems: 1 + maxItems: 2 + items: + minimum: 0 + maximum: 1 + + nxp,modem-control-line-ports: + description: | + An array that lists the indices of the port that should have shared GPIO + lines configured as modem control lines: + 0: port A + 1: port B + $ref: /schemas/types.yaml#/definitions/uint32-array + minItems: 1 + maxItems: 2 + items: + minimum: 0 + maximum: 1 + +required: + - compatible + - reg + - interrupts + +allOf: + - $ref: /schemas/spi/spi-peripheral-props.yaml# + - $ref: /schemas/serial/serial.yaml# + - $ref: /schemas/serial/rs485.yaml# + +oneOf: + - required: + - clocks + - required: + - clock-frequency + +unevaluatedProperties: false + +examples: + - | + #include + i2c { + #address-cells = <1>; + #size-cells = <0>; + + serial@51 { + compatible = "nxp,sc16is750"; + reg = <0x51>; + clocks = <&clk20m>; + interrupt-parent = <&gpio3>; + interrupts = <7 IRQ_TYPE_EDGE_FALLING>; + gpio-controller; + #gpio-cells = <2>; + }; + + serial@53 { + compatible = "nxp,sc16is752"; + reg = <0x53>; + clocks = <&clk20m>; + interrupt-parent = <&gpio3>; + interrupts = <7 IRQ_TYPE_EDGE_FALLING>; + nxp,modem-control-line-ports = <1>; /* Port 1 as modem control lines */ + gpio-controller; /* Port 0 as GPIOs */ + #gpio-cells = <2>; + }; + + serial@54 { + compatible = "nxp,sc16is752"; + reg = <0x54>; + clocks = <&clk20m>; + interrupt-parent = <&gpio3>; + interrupts = <7 IRQ_TYPE_EDGE_FALLING>; + nxp,modem-control-line-ports = <0 1>; /* Ports 0 and 1 as modem control lines */ + }; + }; diff --git a/Documentation/devicetree/bindings/serial/qcom,msm-uart.yaml b/Documentation/devicetree/bindings/serial/qcom,msm-uart.yaml index a052aaef21..ea6abfe2d9 100644 --- a/Documentation/devicetree/bindings/serial/qcom,msm-uart.yaml +++ b/Documentation/devicetree/bindings/serial/qcom,msm-uart.yaml @@ -40,11 +40,11 @@ required: - interrupts - reg -unevaluatedProperties: false - allOf: - $ref: /schemas/serial/serial.yaml# +unevaluatedProperties: false + examples: - | serial@a9c00000 { diff --git a/Documentation/devicetree/bindings/serial/qcom,msm-uartdm.yaml b/Documentation/devicetree/bindings/serial/qcom,msm-uartdm.yaml index 484b9a51f6..ee52bf8e89 100644 --- a/Documentation/devicetree/bindings/serial/qcom,msm-uartdm.yaml +++ b/Documentation/devicetree/bindings/serial/qcom,msm-uartdm.yaml @@ -78,8 +78,6 @@ required: - interrupts - reg -unevaluatedProperties: false - allOf: - $ref: /schemas/serial/serial.yaml# @@ -97,6 +95,8 @@ allOf: reg: maxItems: 1 +unevaluatedProperties: false + examples: - | #include diff --git a/Documentation/devicetree/bindings/serial/renesas,em-uart.yaml b/Documentation/devicetree/bindings/serial/renesas,em-uart.yaml index 3fc2601f13..89f1eb0f2c 100644 --- a/Documentation/devicetree/bindings/serial/renesas,em-uart.yaml +++ b/Documentation/devicetree/bindings/serial/renesas,em-uart.yaml @@ -38,6 +38,13 @@ properties: - const: sclk - const: pclk +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + allOf: - $ref: serial.yaml# @@ -53,13 +60,6 @@ allOf: clock-names: minItems: 2 -required: - - compatible - - reg - - interrupts - - clocks - - clock-names - unevaluatedProperties: false examples: diff --git a/Documentation/devicetree/bindings/serial/renesas,hscif.yaml b/Documentation/devicetree/bindings/serial/renesas,hscif.yaml index 1c7f1276ae..2046e2dc0a 100644 --- a/Documentation/devicetree/bindings/serial/renesas,hscif.yaml +++ b/Documentation/devicetree/bindings/serial/renesas,hscif.yaml @@ -111,8 +111,6 @@ required: - clock-names - power-domains -unevaluatedProperties: false - if: properties: compatible: @@ -125,6 +123,8 @@ then: required: - resets +unevaluatedProperties: false + examples: - | #include diff --git a/Documentation/devicetree/bindings/serial/renesas,scif.yaml b/Documentation/devicetree/bindings/serial/renesas,scif.yaml index 99030fc18c..4610a5bd58 100644 --- a/Documentation/devicetree/bindings/serial/renesas,scif.yaml +++ b/Documentation/devicetree/bindings/serial/renesas,scif.yaml @@ -79,6 +79,7 @@ properties: - enum: - renesas,scif-r9a07g043 # RZ/G2UL and RZ/Five - renesas,scif-r9a07g054 # RZ/V2L + - renesas,scif-r9a08g045 # RZ/G3S - const: renesas,scif-r9a07g044 # RZ/G2{L,LC} fallback reg: diff --git a/Documentation/devicetree/bindings/serial/renesas,scifa.yaml b/Documentation/devicetree/bindings/serial/renesas,scifa.yaml index 499507678c..c98657cf46 100644 --- a/Documentation/devicetree/bindings/serial/renesas,scifa.yaml +++ b/Documentation/devicetree/bindings/serial/renesas,scifa.yaml @@ -77,8 +77,6 @@ required: - clock-names - power-domains -unevaluatedProperties: false - if: properties: compatible: @@ -89,6 +87,8 @@ then: required: - resets +unevaluatedProperties: false + examples: - | #include diff --git a/Documentation/devicetree/bindings/serial/renesas,scifb.yaml b/Documentation/devicetree/bindings/serial/renesas,scifb.yaml index 810d8a991f..fb695b3111 100644 --- a/Documentation/devicetree/bindings/serial/renesas,scifb.yaml +++ b/Documentation/devicetree/bindings/serial/renesas,scifb.yaml @@ -77,8 +77,6 @@ required: - clock-names - power-domains -unevaluatedProperties: false - if: properties: compatible: @@ -89,6 +87,8 @@ then: required: - resets +unevaluatedProperties: false + examples: - | #include diff --git a/Documentation/devicetree/bindings/serial/samsung_uart.yaml b/Documentation/devicetree/bindings/serial/samsung_uart.yaml index 8bd88d5cbb..ac60ab1e35 100644 --- a/Documentation/devicetree/bindings/serial/samsung_uart.yaml +++ b/Documentation/devicetree/bindings/serial/samsung_uart.yaml @@ -24,9 +24,6 @@ properties: - enum: - apple,s5l-uart - axis,artpec8-uart - - samsung,s3c2410-uart - - samsung,s3c2412-uart - - samsung,s3c2440-uart - samsung,s3c6400-uart - samsung,s5pv210-uart - samsung,exynos4210-uart @@ -86,8 +83,6 @@ required: - interrupts - reg -unevaluatedProperties: false - allOf: - $ref: serial.yaml# @@ -96,7 +91,6 @@ allOf: compatible: contains: enum: - - samsung,s3c2410-uart - samsung,s5pv210-uart then: properties: @@ -128,6 +122,8 @@ allOf: - const: uart - const: clk_uart_baud0 +unevaluatedProperties: false + examples: - | #include diff --git a/Documentation/devicetree/bindings/serial/serial.yaml b/Documentation/devicetree/bindings/serial/serial.yaml index 5727bd549d..65804ca274 100644 --- a/Documentation/devicetree/bindings/serial/serial.yaml +++ b/Documentation/devicetree/bindings/serial/serial.yaml @@ -87,16 +87,8 @@ properties: description: TX FIFO threshold configuration (in bytes). -if: - required: - - uart-has-rtscts -then: - properties: - cts-gpios: false - rts-gpios: false - patternProperties: - "^(bluetooth|gnss|gps|mcu)$": + "^(bluetooth|bluetooth-gnss|gnss|gps|mcu)$": if: type: object then: @@ -136,6 +128,14 @@ patternProperties: required: - compatible +if: + required: + - uart-has-rtscts +then: + properties: + cts-gpios: false + rts-gpios: false + additionalProperties: true examples: diff --git a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/fsl,cpm1-scc-qmc.yaml b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/fsl,cpm1-scc-qmc.yaml index ec888f48ca..e802e25923 100644 --- a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/fsl,cpm1-scc-qmc.yaml +++ b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/fsl,cpm1-scc-qmc.yaml @@ -64,6 +64,7 @@ patternProperties: description: A channel managed by this controller type: object + additionalProperties: false properties: reg: @@ -100,6 +101,32 @@ patternProperties: Channel assigned Rx time-slots within the Rx time-slots routed by the TSA to this cell. + compatible: + items: + - enum: + - fsl,mpc885-scc-qmc-hdlc + - fsl,mpc866-scc-qmc-hdlc + - const: fsl,cpm1-scc-qmc-hdlc + - const: fsl,qmc-hdlc + + fsl,framer: + $ref: /schemas/types.yaml#/definitions/phandle + description: + phandle to the framer node. The framer is in charge of an E1/T1 line + interface connected to the TDM bus. It can be used to get the E1/T1 line + status such as link up/down. + + allOf: + - if: + properties: + compatible: + not: + contains: + const: fsl,qmc-hdlc + then: + properties: + fsl,framer: false + required: - reg - fsl,tx-ts-mask @@ -137,7 +164,7 @@ examples: channel@16 { /* Ch16 : First 4 even TS from all routed from TSA */ reg = <16>; - fsl,mode = "transparent"; + fsl,operational-mode = "transparent"; fsl,reverse-data; fsl,tx-ts-mask = <0x00000000 0x000000aa>; fsl,rx-ts-mask = <0x00000000 0x000000aa>; @@ -146,7 +173,7 @@ examples: channel@17 { /* Ch17 : First 4 odd TS from all routed from TSA */ reg = <17>; - fsl,mode = "transparent"; + fsl,operational-mode = "transparent"; fsl,reverse-data; fsl,tx-ts-mask = <0x00000000 0x00000055>; fsl,rx-ts-mask = <0x00000000 0x00000055>; @@ -154,9 +181,13 @@ examples: channel@19 { /* Ch19 : 8 TS (TS 8..15) from all routed from TSA */ + compatible = "fsl,mpc885-scc-qmc-hdlc", + "fsl,cpm1-scc-qmc-hdlc", + "fsl,qmc-hdlc"; reg = <19>; - fsl,mode = "hdlc"; + fsl,operational-mode = "hdlc"; fsl,tx-ts-mask = <0x00000000 0x0000ff00>; fsl,rx-ts-mask = <0x00000000 0x0000ff00>; + fsl,framer = <&framer>; }; }; diff --git a/Documentation/devicetree/bindings/soc/mediatek/mediatek,mt7986-wo-ccif.yaml b/Documentation/devicetree/bindings/soc/mediatek/mediatek,mt7986-wo-ccif.yaml index f0fa92b04b..3b212f26ab 100644 --- a/Documentation/devicetree/bindings/soc/mediatek/mediatek,mt7986-wo-ccif.yaml +++ b/Documentation/devicetree/bindings/soc/mediatek/mediatek,mt7986-wo-ccif.yaml @@ -20,6 +20,7 @@ properties: items: - enum: - mediatek,mt7986-wo-ccif + - mediatek,mt7988-wo-ccif - const: syscon reg: diff --git a/Documentation/devicetree/bindings/soc/mediatek/mtk-svs.yaml b/Documentation/devicetree/bindings/soc/mediatek/mtk-svs.yaml index f21eb907ee..7eda63d568 100644 --- a/Documentation/devicetree/bindings/soc/mediatek/mtk-svs.yaml +++ b/Documentation/devicetree/bindings/soc/mediatek/mtk-svs.yaml @@ -22,6 +22,7 @@ properties: compatible: enum: - mediatek,mt8183-svs + - mediatek,mt8188-svs - mediatek,mt8192-svs reg: diff --git a/Documentation/devicetree/bindings/soc/nuvoton/nuvoton,gfxi.yaml b/Documentation/devicetree/bindings/soc/nuvoton/nuvoton,gfxi.yaml new file mode 100644 index 0000000000..0222a43977 --- /dev/null +++ b/Documentation/devicetree/bindings/soc/nuvoton/nuvoton,gfxi.yaml @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/soc/nuvoton/nuvoton,gfxi.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Graphics Core Information block in Nuvoton SoCs + +maintainers: + - Joseph Liu + - Marvin Lin + +description: + The Graphics Core Information (GFXI) are a block of registers in Nuvoton SoCs + that analyzes Graphics core behavior and provides information in registers. + +properties: + compatible: + items: + - enum: + - nuvoton,npcm750-gfxi + - nuvoton,npcm845-gfxi + - const: syscon + + reg: + maxItems: 1 + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + gfxi: gfxi@e000 { + compatible = "nuvoton,npcm750-gfxi", "syscon"; + reg = <0xe000 0x100>; + }; diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,geni-se.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,geni-se.yaml index 8a4b7ba3aa..7b031ef096 100644 --- a/Documentation/devicetree/bindings/soc/qcom/qcom,geni-se.yaml +++ b/Documentation/devicetree/bindings/soc/qcom/qcom,geni-se.yaml @@ -52,6 +52,8 @@ properties: iommus: maxItems: 1 + dma-coherent: true + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,pmic-glink.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,pmic-glink.yaml index bceb479f74..422921cf1f 100644 --- a/Documentation/devicetree/bindings/soc/qcom/qcom,pmic-glink.yaml +++ b/Documentation/devicetree/bindings/soc/qcom/qcom,pmic-glink.yaml @@ -35,6 +35,12 @@ properties: '#size-cells': const: 0 + orientation-gpios: + description: Array of input gpios for the Type-C connector orientation indication. + The GPIO indication is used to detect the orientation of the Type-C connector. + The array should contain a gpio entry for each PMIC Glink connector, in reg order. + It is defined that GPIO active level means "CC2" or Reversed/Flipped orientation. + patternProperties: '^connector@\d$': $ref: /schemas/connector/usb-connector.yaml# @@ -44,6 +50,19 @@ patternProperties: required: - compatible +allOf: + - if: + not: + properties: + compatible: + contains: + enum: + - qcom,sm8450-pmic-glink + - qcom,sm8550-pmic-glink + then: + properties: + orientation-gpios: false + additionalProperties: false examples: diff --git a/Documentation/devicetree/bindings/soc/renesas/renesas,rzg2l-sysc.yaml b/Documentation/devicetree/bindings/soc/renesas/renesas,rzg2l-sysc.yaml index e52e176d8c..4386b2c3fa 100644 --- a/Documentation/devicetree/bindings/soc/renesas/renesas,rzg2l-sysc.yaml +++ b/Documentation/devicetree/bindings/soc/renesas/renesas,rzg2l-sysc.yaml @@ -23,6 +23,7 @@ properties: - renesas,r9a07g043-sysc # RZ/G2UL and RZ/Five - renesas,r9a07g044-sysc # RZ/G2{L,LC} - renesas,r9a07g054-sysc # RZ/V2L + - renesas,r9a08g045-sysc # RZ/G3S reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/soc/renesas/renesas.yaml b/Documentation/devicetree/bindings/soc/renesas/renesas.yaml index 53b95f348f..16ca3ff7b1 100644 --- a/Documentation/devicetree/bindings/soc/renesas/renesas.yaml +++ b/Documentation/devicetree/bindings/soc/renesas/renesas.yaml @@ -302,7 +302,7 @@ properties: - description: R-Car E3 (R8A77990) items: - enum: - - renesas,ebisu # Ebisu (RTP0RC77990SEB0010S) + - renesas,ebisu # Ebisu (RTP0RC77990SEB0010S), Ebisu-4D (RTP0RC77990SEB0020S) - const: renesas,r8a77990 - description: R-Car D3 (R8A77995) @@ -335,6 +335,13 @@ properties: - const: renesas,spider-cpu - const: renesas,r8a779f0 + - description: R-Car S4-8 (R8A779F4) + items: + - enum: + - renesas,s4sk # R-Car S4 Starter Kit board (Y-ASK-RCAR-S4-1000BASE-T#WS12) + - const: renesas,r8a779f4 + - const: renesas,r8a779f0 + - description: R-Car V4H (R8A779G0) items: - enum: @@ -474,6 +481,25 @@ properties: - renesas,rzv2mevk2 # RZ/V2M Eval Board v2.0 - const: renesas,r9a09g011 + - description: RZ/G3S (R9A08G045) + items: + - enum: + - renesas,r9a08g045s33 # PCIe support + - const: renesas,r9a08g045 + + - description: RZ/G3S SMARC Module (SoM) + items: + - const: renesas,rzg3s-smarcm # RZ/G3S SMARC Module (SoM) + - const: renesas,r9a08g045s33 # PCIe support + - const: renesas,r9a08g045 + + - description: RZ SMARC Carrier-II Evaluation Kit + items: + - const: renesas,smarc2-evk # RZ SMARC Carrier-II EVK + - const: renesas,rzg3s-smarcm # RZ/G3S SMARC SoM + - const: renesas,r9a08g045s33 # PCIe support + - const: renesas,r9a08g045 + additionalProperties: true ... diff --git a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml index e4fa6a07b4..1309bf5ae0 100644 --- a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml +++ b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml @@ -233,6 +233,7 @@ allOf: - rockchip,rk3399-grf - rockchip,rk3399-pmugrf - rockchip,rk3568-pmugrf + - rockchip,rk3588-pmugrf - rockchip,rv1108-grf - rockchip,rv1108-pmugrf diff --git a/Documentation/devicetree/bindings/soc/sti/st,sti-syscon.yaml b/Documentation/devicetree/bindings/soc/sti/st,sti-syscon.yaml new file mode 100644 index 0000000000..5f97d9ff17 --- /dev/null +++ b/Documentation/devicetree/bindings/soc/sti/st,sti-syscon.yaml @@ -0,0 +1,46 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/soc/sti/st,sti-syscon.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: STMicroelectronics STi platform sysconfig + +maintainers: + - Patrice Chotard + +description: | + Binding for the various sysconfig nodes used within the STi + platform device-tree to point to some common configuration + registers used by other nodes. + +properties: + compatible: + items: + - enum: + - st,stih407-core-syscfg + - st,stih407-flash-syscfg + - st,stih407-front-syscfg + - st,stih407-lpm-syscfg + - st,stih407-rear-syscfg + - st,stih407-sbc-reg-syscfg + - st,stih407-sbc-syscfg + - const: syscon + + reg: + maxItems: 1 + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + syscfg_sbc: syscon@9620000 { + compatible = "st,stih407-sbc-syscfg", "syscon"; + reg = <0x9620000 0x1000>; + }; + +... diff --git a/Documentation/devicetree/bindings/soc/tegra/nvidia,tegra20-pmc.yaml b/Documentation/devicetree/bindings/soc/tegra/nvidia,tegra20-pmc.yaml new file mode 100644 index 0000000000..b86f6f53ca --- /dev/null +++ b/Documentation/devicetree/bindings/soc/tegra/nvidia,tegra20-pmc.yaml @@ -0,0 +1,416 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/soc/tegra/nvidia,tegra20-pmc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Tegra Power Management Controller (PMC) + +maintainers: + - Thierry Reding + - Jonathan Hunter + +properties: + compatible: + enum: + - nvidia,tegra20-pmc + - nvidia,tegra30-pmc + - nvidia,tegra114-pmc + - nvidia,tegra124-pmc + - nvidia,tegra210-pmc + + reg: + maxItems: 1 + + clock-names: + items: + # Tegra clock of the same name + - const: pclk + # 32 KHz clock input + - const: clk32k_in + + clocks: + maxItems: 2 + + '#clock-cells': + const: 1 + description: | + Tegra PMC has clk_out_1, clk_out_2, and clk_out_3. PMC also has blink + control which allows 32Khz clock output to Tegra blink pad. + + Consumer of PMC clock should specify the desired clock by having the + clock ID in its "clocks" phandle cell with PMC clock provider. See + include/dt-bindings/soc/tegra-pmc.h for the list of Tegra PMC clock IDs. + + '#interrupt-cells': + const: 2 + description: Specifies number of cells needed to encode an interrupt + source. + + interrupt-controller: true + + nvidia,invert-interrupt: + $ref: /schemas/types.yaml#/definitions/flag + description: Inverts the PMU interrupt signal. The PMU is an external Power + Management Unit, whose interrupt output signal is fed into the PMC. This + signal is optionally inverted, and then fed into the ARM GIC. The PMC is + not involved in the detection or handling of this interrupt signal, + merely its inversion. + + nvidia,core-power-req-active-high: + $ref: /schemas/types.yaml#/definitions/flag + description: core power request active-high + + nvidia,sys-clock-req-active-high: + $ref: /schemas/types.yaml#/definitions/flag + description: system clock request active-high + + nvidia,combined-power-req: + $ref: /schemas/types.yaml#/definitions/flag + description: combined power request for CPU and core + + nvidia,cpu-pwr-good-en: + $ref: /schemas/types.yaml#/definitions/flag + description: CPU power good signal from external PMIC to PMC is enabled + + nvidia,suspend-mode: + $ref: /schemas/types.yaml#/definitions/uint32 + description: the suspend mode that the platform should use + oneOf: + - description: LP0, CPU + Core voltage off and DRAM in self-refresh + const: 0 + - description: LP1, CPU voltage off and DRAM in self-refresh + const: 1 + - description: LP2, CPU voltage off + const: 2 + + nvidia,cpu-pwr-good-time: + $ref: /schemas/types.yaml#/definitions/uint32 + description: CPU power good time in microseconds + + nvidia,cpu-pwr-off-time: + $ref: /schemas/types.yaml#/definitions/uint32 + description: CPU power off time in microseconds + + nvidia,core-pwr-good-time: + $ref: /schemas/types.yaml#/definitions/uint32-array + description: core power good time in microseconds + items: + - description: oscillator stable time + - description: power stable time + + nvidia,core-pwr-off-time: + $ref: /schemas/types.yaml#/definitions/uint32 + description: core power off time in microseconds + + nvidia,lp0-vec: + $ref: /schemas/types.yaml#/definitions/uint32-array + description: | + Starting address and length of LP0 vector. The LP0 vector contains the + warm boot code that is executed by AVP when resuming from the LP0 state. + The AVP (Audio-Video Processor) is an ARM7 processor and always being + the first boot processor when chip is power on or resume from deep sleep + mode. When the system is resumed from the deep sleep mode, the warm boot + code will restore some PLLs, clocks and then brings up CPU0 for resuming + the system. + items: + - description: starting address of LP0 vector + - description: length of LP0 vector + + core-supply: + description: phandle to voltage regulator connected to the SoC core power + rail + + core-domain: + type: object + description: The vast majority of hardware blocks of Tegra SoC belong to a + core power domain, which has a dedicated voltage rail that powers the + blocks. + additionalProperties: false + properties: + operating-points-v2: + description: Should contain level, voltages and opp-supported-hw + property. The supported-hw is a bitfield indicating SoC speedo or + process ID mask. + + "#power-domain-cells": + const: 0 + + required: + - operating-points-v2 + - "#power-domain-cells" + + i2c-thermtrip: + type: object + description: On Tegra30, Tegra114 and Tegra124 if i2c-thermtrip subnode + exists, hardware-triggered thermal reset will be enabled. + additionalProperties: false + properties: + nvidia,i2c-controller-id: + $ref: /schemas/types.yaml#/definitions/uint32 + description: ID of I2C controller to send poweroff command to PMU. + Valid values are described in section 9.2.148 "APBDEV_PMC_SCRATCH53_0" + of the Tegra K1 Technical Reference Manual. + + nvidia,bus-addr: + $ref: /schemas/types.yaml#/definitions/uint32 + description: bus address of the PMU on the I2C bus + + nvidia,reg-addr: + $ref: /schemas/types.yaml#/definitions/uint32 + description: PMU I2C register address to issue poweroff command + + nvidia,reg-data: + $ref: /schemas/types.yaml#/definitions/uint32 + description: power-off command to write to PMU + + nvidia,pinmux-id: + $ref: /schemas/types.yaml#/definitions/uint32 + description: Pinmux used by the hardware when issuing power-off command. + Defaults to 0. Valid values are described in section 12.5.2 "Pinmux + Support" of the Tegra4 Technical Reference Manual. + + required: + - nvidia,i2c-controller-id + - nvidia,bus-addr + - nvidia,reg-addr + - nvidia,reg-data + + powergates: + type: object + additionalProperties: false + description: | + This node contains a hierarchy of power domain nodes, which should match + the powergates on the Tegra SoC. Each powergate node represents a power- + domain on the Tegra SoC that can be power-gated by the Tegra PMC. + + Hardware blocks belonging to a power domain should contain "power-domains" + property that is a phandle pointing to corresponding powergate node. + + The name of the powergate node should be one of the below. Note that not + every powergate is applicable to all Tegra devices and the following list + shows which powergates are applicable to which devices. + + Please refer to Tegra TRM for mode details on the powergate nodes to use + for each power-gate block inside Tegra. + + Name Description Devices Applicable + -------------------------------------------------------------- + 3d 3D Graphics Tegra20/114/124/210 + 3d0 3D Graphics 0 Tegra30 + 3d1 3D Graphics 1 Tegra30 + aud Audio Tegra210 + dfd Debug Tegra210 + dis Display A Tegra114/124/210 + disb Display B Tegra114/124/210 + heg 2D Graphics Tegra30/114/124/210 + iram Internal RAM Tegra124/210 + mpe MPEG Encode All + nvdec NVIDIA Video Decode Engine Tegra210 + nvjpg NVIDIA JPEG Engine Tegra210 + pcie PCIE Tegra20/30/124/210 + sata SATA Tegra30/124/210 + sor Display interfaces Tegra124/210 + ve2 Video Encode Engine 2 Tegra210 + venc Video Encode Engine All + vdec Video Decode Engine Tegra20/30/114/124 + vic Video Imaging Compositor Tegra124/210 + xusba USB Partition A Tegra114/124/210 + xusbb USB Partition B Tegra114/124/210 + xusbc USB Partition C Tegra114/124/210 + + patternProperties: + "^[a-z0-9]+$": + type: object + additionalProperties: false + properties: + clocks: + minItems: 1 + maxItems: 10 + + resets: + minItems: 1 + maxItems: 8 + + power-domains: + maxItems: 1 + + '#power-domain-cells': + const: 0 + description: Must be 0. + + required: + - clocks + - resets + - '#power-domain-cells' + + pinmux: + type: object + additionalProperties: + type: object + description: | + This is a pad configuration node. On Tegra SoCs a pad is a set of pins + which are configured as a group. The pin grouping is a fixed attribute + of the hardware. The PMC can be used to set pad power state and + signaling voltage. A pad can be either in active or power down mode. + The support for power state and signaling voltage configuration varies + depending on the pad in question. 3.3V and 1.8V signaling voltages are + supported on pins where software controllable signaling voltage + switching is available. + + The pad configuration state nodes are placed under the pmc node and + they are referred to by the pinctrl client properties. For more + information see: + + Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt + + The pad name should be used as the value of the pins property in pin + configuration nodes. + + The following pads are present on Tegra124 and Tegra132: + + audio, bb, cam, comp, csia, csb, cse, dsi, dsib, dsic, dsid, hdmi, + hsic, hv, lvds, mipi-bias, nand, pex-bias, pex-clk1, pex-clk2, + pex-cntrl, sdmmc1, sdmmc3, sdmmc4, sys_ddc, uart, usb0, usb1, usb2, + usb_bias + + The following pads are present on Tegra210: + + audio, audio-hv, cam, csia, csib, csic, csid, csie, csif, dbg, + debug-nonao, dmic, dp, dsi, dsib, dsic, dsid, emmc, emmc2, gpio, + hdmi, hsic, lvds, mipi-bias, pex-bias, pex-clk1, pex-clk2, pex-cntrl, + sdmmc1, sdmmc3, spi, spi-hv, uart, usb0, usb1, usb2, usb3, usb-bias + additionalProperties: false + properties: + pins: + $ref: /schemas/types.yaml#/definitions/string-array + description: Must contain name of the pad(s) to be configured. + + low-power-enable: + $ref: /schemas/types.yaml#/definitions/flag + description: Configure the pad into power down mode. + + low-power-disable: + $ref: /schemas/types.yaml#/definitions/flag + description: Configure the pad into active mode. + + power-source: + $ref: /schemas/types.yaml#/definitions/uint32 + description: | + Must contain either TEGRA_IO_PAD_VOLTAGE_1V8 or + TEGRA_IO_PAD_VOLTAGE_3V3 to select between signaling voltages. The + values are defined in: + + include/dt-bindings/pinctrl/pinctrl-tegra-io-pad.h + + Power state can be configured on all Tegra124 and Tegra132 pads. + None of the Tegra124 or Tegra132 pads support signaling voltage + switching. All of the listed Tegra210 pads except pex-cntrl support + power state configuration. Signaling voltage switching is supported + on the following Tegra210 pads: + + audio, audio-hv, cam, dbg, dmic, gpio, pex-cntrl, sdmmc1, sdmmc3, + spi, spi-hv, uart + + required: + - pins + +required: + - compatible + - reg + - clock-names + - clocks + - '#clock-cells' + +allOf: + - if: + properties: + compatible: + contains: + const: nvidia,tegra124-pmc + then: + properties: + pinmux: + additionalProperties: + type: object + properties: + pins: + items: + enum: [ audio, bb, cam, comp, csia, csb, cse, dsi, dsib, + dsic, dsid, hdmi, hsic, hv, lvds, mipi-bias, nand, + pex-bias, pex-clk1, pex-clk2, pex-cntrl, sdmmc1, + sdmmc3, sdmmc4, sys_ddc, uart, usb0, usb1, usb2, + usb_bias ] + + - if: + properties: + compatible: + contains: + const: nvidia,tegra210-pmc + then: + properties: + pinmux: + additionalProperties: + type: object + properties: + pins: + items: + enum: [ audio, audio-hv, cam, csia, csib, csic, csid, csie, + csif, dbg, debug-nonao, dmic, dp, dsi, dsib, dsic, + dsid, emmc, emmc2, gpio, hdmi, hsic, lvds, mipi-bias, + pex-bias, pex-clk1, pex-clk2, pex-cntrl, sdmmc1, + sdmmc3, spi, spi-hv, uart, usb0, usb1, usb2, usb3, + usb-bias ] + +additionalProperties: false + +dependencies: + "nvidia,suspend-mode": ["nvidia,core-pwr-off-time", "nvidia,cpu-pwr-off-time"] + "nvidia,core-pwr-off-time": ["nvidia,core-pwr-good-time"] + "nvidia,cpu-pwr-off-time": ["nvidia,cpu-pwr-good-time"] + +examples: + - | + #include + #include + #include + + pmc@7000e400 { + compatible = "nvidia,tegra210-pmc"; + reg = <0x7000e400 0x400>; + core-supply = <®ulator>; + clocks = <&tegra_car TEGRA210_CLK_PCLK>, <&clk32k_in>; + clock-names = "pclk", "clk32k_in"; + #clock-cells = <1>; + + nvidia,invert-interrupt; + nvidia,suspend-mode = <0>; + nvidia,cpu-pwr-good-time = <0>; + nvidia,cpu-pwr-off-time = <0>; + nvidia,core-pwr-good-time = <4587 3876>; + nvidia,core-pwr-off-time = <39065>; + nvidia,core-power-req-active-high; + nvidia,sys-clock-req-active-high; + + pd_core: core-domain { + operating-points-v2 = <&core_opp_table>; + #power-domain-cells = <0>; + }; + + powergates { + pd_audio: aud { + clocks = <&tegra_car TEGRA210_CLK_APE>, + <&tegra_car TEGRA210_CLK_APB2APE>; + resets = <&tegra_car 198>; + power-domains = <&pd_core>; + #power-domain-cells = <0>; + }; + + pd_xusbss: xusba { + clocks = <&tegra_car TEGRA210_CLK_XUSB_SS>; + resets = <&tegra_car TEGRA210_CLK_XUSB_SS>; + power-domains = <&pd_core>; + #power-domain-cells = <0>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/sound/audio-graph-port.yaml b/Documentation/devicetree/bindings/sound/audio-graph-port.yaml index fa9f9a8533..60b5e3fd11 100644 --- a/Documentation/devicetree/bindings/sound/audio-graph-port.yaml +++ b/Documentation/devicetree/bindings/sound/audio-graph-port.yaml @@ -13,19 +13,17 @@ select: false definitions: port-base: - $ref: /schemas/graph.yaml#/$defs/port-base + allOf: + - $ref: /schemas/graph.yaml#/$defs/port-base + - $ref: /schemas/sound/dai-params.yaml# properties: - convert-rate: - $ref: /schemas/sound/dai-params.yaml#/$defs/dai-sample-rate - convert-channels: - $ref: /schemas/sound/dai-params.yaml#/$defs/dai-channels - convert-sample-format: - $ref: /schemas/sound/dai-params.yaml#/$defs/dai-sample-format mclk-fs: $ref: simple-card.yaml#/definitions/mclk-fs endpoint-base: - $ref: /schemas/graph.yaml#/$defs/endpoint-base + allOf: + - $ref: /schemas/graph.yaml#/$defs/endpoint-base + - $ref: /schemas/sound/dai-params.yaml# properties: mclk-fs: $ref: simple-card.yaml#/definitions/mclk-fs @@ -68,12 +66,6 @@ definitions: - pdm - msb - lsb - convert-rate: - $ref: /schemas/sound/dai-params.yaml#/$defs/dai-sample-rate - convert-channels: - $ref: /schemas/sound/dai-params.yaml#/$defs/dai-channels - convert-sample-format: - $ref: /schemas/sound/dai-params.yaml#/$defs/dai-sample-format dai-tdm-slot-num: description: Number of slots in use. diff --git a/Documentation/devicetree/bindings/sound/audio-graph.yaml b/Documentation/devicetree/bindings/sound/audio-graph.yaml index ed31e04ff6..71f52f7e55 100644 --- a/Documentation/devicetree/bindings/sound/audio-graph.yaml +++ b/Documentation/devicetree/bindings/sound/audio-graph.yaml @@ -9,6 +9,9 @@ title: Audio Graph maintainers: - Kuninori Morimoto +allOf: + - $ref: /schemas/sound/dai-params.yaml# + properties: dais: $ref: /schemas/types.yaml#/definitions/phandle-array @@ -30,12 +33,6 @@ properties: widget ("Microphone", "Line", "Headphone", "Speaker"), the second being the machine specific name for the widget. $ref: /schemas/types.yaml#/definitions/non-unique-string-array - convert-rate: - $ref: /schemas/sound/dai-params.yaml#/$defs/dai-sample-rate - convert-channels: - $ref: /schemas/sound/dai-params.yaml#/$defs/dai-channels - convert-sample-format: - $ref: /schemas/sound/dai-params.yaml#/$defs/dai-sample-format pa-gpios: maxItems: 1 diff --git a/Documentation/devicetree/bindings/sound/awinic,aw87390.yaml b/Documentation/devicetree/bindings/sound/awinic,aw87390.yaml new file mode 100644 index 0000000000..ba9d8767c5 --- /dev/null +++ b/Documentation/devicetree/bindings/sound/awinic,aw87390.yaml @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/sound/awinic,aw87390.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Awinic Aw87390 Audio Amplifier + +maintainers: + - Weidong Wang + +description: + The awinic aw87390 is specifically designed to improve + the musical output dynamic range, enhance the overall + sound quallity, which is a new high efficiency, low + noise, constant large volume, 6th Smart K audio amplifier. + +allOf: + - $ref: dai-common.yaml# + +properties: + compatible: + const: awinic,aw87390 + + reg: + maxItems: 1 + + "#sound-dai-cells": + const: 0 + + awinic,audio-channel: + description: + It is used to distinguish multiple PA devices, so that different + configurations can be loaded to different PA devices + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 0 + maximum: 7 + +required: + - compatible + - reg + - "#sound-dai-cells" + - awinic,audio-channel + +unevaluatedProperties: false + +examples: + - | + i2c { + #address-cells = <1>; + #size-cells = <0>; + audio-codec@58 { + compatible = "awinic,aw87390"; + reg = <0x58>; + #sound-dai-cells = <0>; + awinic,audio-channel = <0>; + }; + }; diff --git a/Documentation/devicetree/bindings/sound/awinic,aw88395.yaml b/Documentation/devicetree/bindings/sound/awinic,aw88395.yaml index 4051c2538c..ac5f2e0f42 100644 --- a/Documentation/devicetree/bindings/sound/awinic,aw88395.yaml +++ b/Documentation/devicetree/bindings/sound/awinic,aw88395.yaml @@ -14,14 +14,12 @@ description: digital Smart K audio amplifier with an integrated 10.25V smart boost convert. -allOf: - - $ref: dai-common.yaml# - properties: compatible: enum: - awinic,aw88395 - awinic,aw88261 + - awinic,aw88399 reg: maxItems: 1 @@ -32,11 +30,36 @@ properties: reset-gpios: maxItems: 1 + awinic,audio-channel: + description: + It is used to distinguish multiple PA devices, so that different + configurations can be loaded to different PA devices + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 0 + maximum: 7 + + awinic,sync-flag: + description: + Flag bit used to keep the phase synchronized in the case of multiple PA + $ref: /schemas/types.yaml#/definitions/flag + required: - compatible - reg - '#sound-dai-cells' - - reset-gpios + - awinic,audio-channel + +allOf: + - $ref: dai-common.yaml# + - if: + properties: + compatible: + contains: + enum: + - awinic,aw88261 + then: + properties: + reset-gpios: false unevaluatedProperties: false @@ -51,5 +74,7 @@ examples: reg = <0x34>; #sound-dai-cells = <0>; reset-gpios = <&gpio 10 GPIO_ACTIVE_LOW>; + awinic,audio-channel = <0>; + awinic,sync-flag; }; }; diff --git a/Documentation/devicetree/bindings/sound/cirrus,cs42l43.yaml b/Documentation/devicetree/bindings/sound/cirrus,cs42l43.yaml index 4118aa54bb..7f9d8c7a63 100644 --- a/Documentation/devicetree/bindings/sound/cirrus,cs42l43.yaml +++ b/Documentation/devicetree/bindings/sound/cirrus,cs42l43.yaml @@ -83,7 +83,7 @@ properties: Current at which the headset micbias sense clamp will engage, 0 to disable. enum: [ 0, 14, 24, 43, 52, 61, 71, 90, 99 ] - default: 0 + default: 14 cirrus,bias-ramp-ms: description: @@ -97,7 +97,7 @@ properties: Time in microseconds the type detection will run for. Long values will cause more audible effects, but give more accurate detection. enum: [ 20, 100, 1000, 10000, 50000, 75000, 100000, 200000 ] - default: 10000 + default: 1000 cirrus,button-automute: type: boolean diff --git a/Documentation/devicetree/bindings/sound/dai-params.yaml b/Documentation/devicetree/bindings/sound/dai-params.yaml index f5fb71f9b6..cd85081755 100644 --- a/Documentation/devicetree/bindings/sound/dai-params.yaml +++ b/Documentation/devicetree/bindings/sound/dai-params.yaml @@ -11,15 +11,14 @@ maintainers: select: false -$defs: - - dai-channels: +properties: + convert-channels: description: Number of audio channels used by DAI $ref: /schemas/types.yaml#/definitions/uint32 minimum: 1 maximum: 32 - dai-sample-format: + convert-sample-format: description: Audio sample format used by DAI $ref: /schemas/types.yaml#/definitions/string enum: @@ -29,12 +28,10 @@ $defs: - s24_3le - s32_le - dai-sample-rate: + convert-rate: description: Audio sample rate used by DAI $ref: /schemas/types.yaml#/definitions/uint32 minimum: 8000 maximum: 192000 -properties: {} - additionalProperties: true diff --git a/Documentation/devicetree/bindings/sound/dialog,da7219.yaml b/Documentation/devicetree/bindings/sound/dialog,da7219.yaml index eb7d219e2c..19137abdba 100644 --- a/Documentation/devicetree/bindings/sound/dialog,da7219.yaml +++ b/Documentation/devicetree/bindings/sound/dialog,da7219.yaml @@ -89,6 +89,7 @@ properties: da7219_aad: type: object + additionalProperties: false description: Configuration of advanced accessory detection. properties: diff --git a/Documentation/devicetree/bindings/sound/fsl,qmc-audio.yaml b/Documentation/devicetree/bindings/sound/fsl,qmc-audio.yaml index ff5cd92419..b522ed7dcc 100644 --- a/Documentation/devicetree/bindings/sound/fsl,qmc-audio.yaml +++ b/Documentation/devicetree/bindings/sound/fsl,qmc-audio.yaml @@ -33,6 +33,7 @@ patternProperties: description: A DAI managed by this controller type: object + additionalProperties: false properties: reg: diff --git a/Documentation/devicetree/bindings/sound/mediatek,mt8188-afe.yaml b/Documentation/devicetree/bindings/sound/mediatek,mt8188-afe.yaml index 90520f8920..77af276ed2 100644 --- a/Documentation/devicetree/bindings/sound/mediatek,mt8188-afe.yaml +++ b/Documentation/devicetree/bindings/sound/mediatek,mt8188-afe.yaml @@ -29,7 +29,7 @@ properties: maxItems: 1 description: | Shared memory region for AFE memif. A "shared-dma-pool". - See ../reserved-memory/reserved-memory.yaml for details. + See dtschema reserved-memory/shared-dma-pool.yaml for details. mediatek,topckgen: $ref: /schemas/types.yaml#/definitions/phandle diff --git a/Documentation/devicetree/bindings/sound/mediatek,mt8188-mt6359.yaml b/Documentation/devicetree/bindings/sound/mediatek,mt8188-mt6359.yaml index 43b3b67bdf..4c8c95057e 100644 --- a/Documentation/devicetree/bindings/sound/mediatek,mt8188-mt6359.yaml +++ b/Documentation/devicetree/bindings/sound/mediatek,mt8188-mt6359.yaml @@ -17,6 +17,7 @@ properties: enum: - mediatek,mt8188-mt6359-evb - mediatek,mt8188-nau8825 + - mediatek,mt8188-rt5682s audio-routing: description: diff --git a/Documentation/devicetree/bindings/sound/mt8186-mt6366-rt1019-rt5682s.yaml b/Documentation/devicetree/bindings/sound/mt8186-mt6366-rt1019-rt5682s.yaml index d80083df03..bdf7b09605 100644 --- a/Documentation/devicetree/bindings/sound/mt8186-mt6366-rt1019-rt5682s.yaml +++ b/Documentation/devicetree/bindings/sound/mt8186-mt6366-rt1019-rt5682s.yaml @@ -17,6 +17,7 @@ properties: enum: - mediatek,mt8186-mt6366-rt1019-rt5682s-sound - mediatek,mt8186-mt6366-rt5682s-max98360-sound + - mediatek,mt8186-mt6366-rt5650-sound mediatek,platform: $ref: /schemas/types.yaml#/definitions/phandle diff --git a/Documentation/devicetree/bindings/sound/nxp,tfa9879.yaml b/Documentation/devicetree/bindings/sound/nxp,tfa9879.yaml new file mode 100644 index 0000000000..df26248573 --- /dev/null +++ b/Documentation/devicetree/bindings/sound/nxp,tfa9879.yaml @@ -0,0 +1,44 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/sound/nxp,tfa9879.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NXP TFA9879 class-D audio amplifier + +maintainers: + - Peter Rosin + +allOf: + - $ref: dai-common.yaml# + +properties: + compatible: + const: nxp,tfa9879 + + reg: + maxItems: 1 + + "#sound-dai-cells": + const: 0 + +required: + - compatible + - reg + - '#sound-dai-cells' + +unevaluatedProperties: false + +examples: + - | + i2c1 { + #address-cells = <1>; + #size-cells = <0>; + amplifier@6c { + compatible = "nxp,tfa9879"; + reg = <0x6c>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_i2c1>; + #sound-dai-cells = <0>; + }; + }; diff --git a/Documentation/devicetree/bindings/sound/pcm512x.txt b/Documentation/devicetree/bindings/sound/pcm512x.txt index 3aae3b41bd..77006a4aec 100644 --- a/Documentation/devicetree/bindings/sound/pcm512x.txt +++ b/Documentation/devicetree/bindings/sound/pcm512x.txt @@ -1,12 +1,12 @@ -PCM512x audio CODECs +PCM512x and TAS575x audio CODECs/amplifiers These devices support both I2C and SPI (configured with pin strapping -on the board). +on the board). The TAS575x devices only support I2C. Required properties: - - compatible : One of "ti,pcm5121", "ti,pcm5122", "ti,pcm5141" or - "ti,pcm5142" + - compatible : One of "ti,pcm5121", "ti,pcm5122", "ti,pcm5141", + "ti,pcm5142", "ti,tas5754" or "ti,tas5756" - reg : the I2C address of the device for I2C, the chip select number for SPI. @@ -25,6 +25,7 @@ Optional properties: through <6>. The device will be configured for clock input on the given pll-in pin and PLL output on the given pll-out pin. An external connection from the pll-out pin to the SCLK pin is assumed. + Caution: the TAS-desvices only support gpios 1,2 and 3 Examples: diff --git a/Documentation/devicetree/bindings/sound/qcom,lpass-tx-macro.yaml b/Documentation/devicetree/bindings/sound/qcom,lpass-tx-macro.yaml index 4156981fe0..962701e9eb 100644 --- a/Documentation/devicetree/bindings/sound/qcom,lpass-tx-macro.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,lpass-tx-macro.yaml @@ -13,6 +13,7 @@ properties: compatible: enum: - qcom,sc7280-lpass-tx-macro + - qcom,sm6115-lpass-tx-macro - qcom,sm8250-lpass-tx-macro - qcom,sm8450-lpass-tx-macro - qcom,sm8550-lpass-tx-macro @@ -97,6 +98,23 @@ allOf: - const: dcodec - const: fsgen + - if: + properties: + compatible: + enum: + - qcom,sm6115-lpass-tx-macro + then: + properties: + clocks: + minItems: 4 + maxItems: 4 + clock-names: + items: + - const: mclk + - const: npl + - const: dcodec + - const: fsgen + - if: properties: compatible: diff --git a/Documentation/devicetree/bindings/sound/qcom,sm8250.yaml b/Documentation/devicetree/bindings/sound/qcom,sm8250.yaml index 262de7a60a..e082a4fe09 100644 --- a/Documentation/devicetree/bindings/sound/qcom,sm8250.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,sm8250.yaml @@ -25,6 +25,7 @@ properties: - qcom,apq8016-sbc-sndcard - qcom,msm8916-qdsp6-sndcard - qcom,qrb5165-rb5-sndcard + - qcom,sc7180-qdsp6-sndcard - qcom,sc8280xp-sndcard - qcom,sdm845-sndcard - qcom,sm8250-sndcard diff --git a/Documentation/devicetree/bindings/sound/realtek,rt5616.yaml b/Documentation/devicetree/bindings/sound/realtek,rt5616.yaml new file mode 100644 index 0000000000..248320804e --- /dev/null +++ b/Documentation/devicetree/bindings/sound/realtek,rt5616.yaml @@ -0,0 +1,49 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/sound/realtek,rt5616.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Realtek rt5616 ALSA SoC audio codec driver + +description: | + Pins on the device (for linking into audio routes) for RT5616: + + * IN1P + * IN2P + * IN2N + * LOUTL + * LOUTR + * HPOL + * HPOR + +maintainers: + - Bard Liao + +allOf: + - $ref: dai-common.yaml# + +properties: + compatible: + const: realtek,rt5616 + + reg: + maxItems: 1 + +required: + - compatible + - reg + +unevaluatedProperties: false + +examples: + - | + i2c { + #address-cells = <1>; + #size-cells = <0>; + + audio-codec@1b { + compatible = "realtek,rt5616"; + reg = <0x1b>; + }; + }; diff --git a/Documentation/devicetree/bindings/sound/richtek,rtq9128.yaml b/Documentation/devicetree/bindings/sound/richtek,rtq9128.yaml new file mode 100644 index 0000000000..d54686a19a --- /dev/null +++ b/Documentation/devicetree/bindings/sound/richtek,rtq9128.yaml @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/sound/richtek,rtq9128.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Richtek RTQ9128 Automative Audio Power Amplifier + +maintainers: + - ChiYuan Huang + +description: + The RTQ9128 is a ultra-low output noise, high-efficiency, four-channel + class-D audio power amplifier and delivering 4x75W into 4OHm at 10% + THD+N from a 25V supply in automotive applications. + +allOf: + - $ref: dai-common.yaml# + +properties: + compatible: + enum: + - richtek,rtq9128 + + reg: + maxItems: 1 + + enable-gpios: + maxItems: 1 + + richtek,tdm-input-data2-select: + type: boolean + description: + By default, if TDM mode is used, TDM data input will select 'DATA1' pin + as the data source. This option will configure TDM data input source from + 'DATA1' to 'DATA2' pin. + + '#sound-dai-cells': + const: 0 + +required: + - compatible + - reg + - '#sound-dai-cells' + +unevaluatedProperties: false + +examples: + - | + #include + i2c { + #address-cells = <1>; + #size-cells = <0>; + + speaker@1a { + compatible = "richtek,rtq9128"; + reg = <0x1a>; + enable-gpios = <&gpio 26 GPIO_ACTIVE_HIGH>; + #sound-dai-cells = <0>; + }; + }; diff --git a/Documentation/devicetree/bindings/sound/rt5616.txt b/Documentation/devicetree/bindings/sound/rt5616.txt deleted file mode 100644 index 540a4bf252..0000000000 --- a/Documentation/devicetree/bindings/sound/rt5616.txt +++ /dev/null @@ -1,32 +0,0 @@ -RT5616 audio CODEC - -This device supports I2C only. - -Required properties: - -- compatible : "realtek,rt5616". - -- reg : The I2C address of the device. - -Optional properties: - -- clocks: The phandle of the master clock to the CODEC. - -- clock-names: Should be "mclk". - -Pins on the device (for linking into audio routes) for RT5616: - - * IN1P - * IN2P - * IN2N - * LOUTL - * LOUTR - * HPOL - * HPOR - -Example: - -rt5616: codec@1b { - compatible = "realtek,rt5616"; - reg = <0x1b>; -}; diff --git a/Documentation/devicetree/bindings/sound/starfive,jh7110-pwmdac.yaml b/Documentation/devicetree/bindings/sound/starfive,jh7110-pwmdac.yaml new file mode 100644 index 0000000000..e2b4db6aa2 --- /dev/null +++ b/Documentation/devicetree/bindings/sound/starfive,jh7110-pwmdac.yaml @@ -0,0 +1,76 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/sound/starfive,jh7110-pwmdac.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: StarFive JH7110 PWM-DAC Controller + +description: + The PWM-DAC Controller uses PWM square wave generators plus RC filters to + form a DAC for audio play in StarFive JH7110 SoC. This audio play controller + supports 16 bit audio format, up to 48K sampling frequency, up to left and + right dual channels. + +maintainers: + - Hal Feng + +allOf: + - $ref: dai-common.yaml# + +properties: + compatible: + const: starfive,jh7110-pwmdac + + reg: + maxItems: 1 + + clocks: + items: + - description: PWMDAC APB + - description: PWMDAC CORE + + clock-names: + items: + - const: apb + - const: core + + resets: + maxItems: 1 + description: PWMDAC APB + + dmas: + maxItems: 1 + description: TX DMA Channel + + dma-names: + const: tx + + "#sound-dai-cells": + const: 0 + +required: + - compatible + - reg + - clocks + - clock-names + - resets + - dmas + - dma-names + - "#sound-dai-cells" + +additionalProperties: false + +examples: + - | + pwmdac@100b0000 { + compatible = "starfive,jh7110-pwmdac"; + reg = <0x100b0000 0x1000>; + clocks = <&syscrg 157>, + <&syscrg 158>; + clock-names = "apb", "core"; + resets = <&syscrg 96>; + dmas = <&dma 22>; + dma-names = "tx"; + #sound-dai-cells = <0>; + }; diff --git a/Documentation/devicetree/bindings/sound/tas5805m.yaml b/Documentation/devicetree/bindings/sound/tas5805m.yaml index 63edf52f06..12c4197427 100644 --- a/Documentation/devicetree/bindings/sound/tas5805m.yaml +++ b/Documentation/devicetree/bindings/sound/tas5805m.yaml @@ -37,6 +37,8 @@ properties: generated from TI's PPC3 tool. $ref: /schemas/types.yaml#/definitions/string +additionalProperties: false + examples: - | i2c { @@ -52,5 +54,4 @@ examples: ti,dsp-config-name = "mono_pbtl_48khz"; }; }; - -additionalProperties: true +... diff --git a/Documentation/devicetree/bindings/sound/tfa9879.txt b/Documentation/devicetree/bindings/sound/tfa9879.txt deleted file mode 100644 index 1620e68484..0000000000 --- a/Documentation/devicetree/bindings/sound/tfa9879.txt +++ /dev/null @@ -1,23 +0,0 @@ -NXP TFA9879 class-D audio amplifier - -Required properties: - -- compatible : "nxp,tfa9879" - -- reg : the I2C address of the device - -- #sound-dai-cells : must be 0. - -Example: - -&i2c1 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_i2c1>; - - amp: amp@6c { - #sound-dai-cells = <0>; - compatible = "nxp,tfa9879"; - reg = <0x6c>; - }; -}; - diff --git a/Documentation/devicetree/bindings/sound/ti,pcm3168a.yaml b/Documentation/devicetree/bindings/sound/ti,pcm3168a.yaml index b6a4360ab8..0b4f003989 100644 --- a/Documentation/devicetree/bindings/sound/ti,pcm3168a.yaml +++ b/Documentation/devicetree/bindings/sound/ti,pcm3168a.yaml @@ -60,6 +60,7 @@ properties: ports: $ref: audio-graph-port.yaml#/definitions/port-base + unevaluatedProperties: false properties: port@0: $ref: audio-graph-port.yaml# diff --git a/Documentation/devicetree/bindings/sound/wm8782.txt b/Documentation/devicetree/bindings/sound/wm8782.txt index 256cdec6ec..1a28f32809 100644 --- a/Documentation/devicetree/bindings/sound/wm8782.txt +++ b/Documentation/devicetree/bindings/sound/wm8782.txt @@ -8,10 +8,17 @@ Required properties: - Vdda-supply : phandle to a regulator for the analog power supply (2.7V - 5.5V) - Vdd-supply : phandle to a regulator for the digital power supply (2.7V - 3.6V) +Optional properties: + + - wlf,fsampen: + FSAMPEN pin value, 0 for low, 1 for high, 2 for disconnected. + Defaults to 0 if left unspecified. + Example: wm8782: stereo-adc { compatible = "wlf,wm8782"; Vdda-supply = <&vdda_supply>; Vdd-supply = <&vdd_supply>; + wlf,fsampen = <2>; /* 192KHz */ }; diff --git a/Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml b/Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml index 7d60a96549..3591c8c49b 100644 --- a/Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml +++ b/Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml @@ -209,17 +209,6 @@ properties: label: maxItems: 1 -patternProperties: - "^.*@[0-9a-f],[0-9a-f]$": - type: object - additionalProperties: true - description: - Child nodes for a standalone audio codec or speaker amplifier IC. - It has RX and TX Soundwire secondary devices. - properties: - compatible: - pattern: "^sdw[0-9a-f]{1}[0-9a-f]{4}[0-9a-f]{4}[0-9a-f]{2}$" - required: - compatible - reg @@ -240,7 +229,10 @@ oneOf: - required: - qcom,ports-sinterval -additionalProperties: false +allOf: + - $ref: soundwire-controller.yaml# + +unevaluatedProperties: false examples: - | diff --git a/Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml b/Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml index a6f34bdd1d..e1ab3f523a 100644 --- a/Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml +++ b/Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml @@ -46,6 +46,8 @@ properties: patternProperties: "^.*@[0-9a-f]+": type: object + additionalProperties: true + properties: reg: items: diff --git a/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml b/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml index 28b8ace630..3b47b68b92 100644 --- a/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml +++ b/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml @@ -68,6 +68,8 @@ properties: patternProperties: "^.*@[0-9a-f]+": type: object + additionalProperties: true + properties: reg: items: diff --git a/Documentation/devicetree/bindings/spi/arm,pl022-peripheral-props.yaml b/Documentation/devicetree/bindings/spi/arm,pl022-peripheral-props.yaml new file mode 100644 index 0000000000..bb8b6863b1 --- /dev/null +++ b/Documentation/devicetree/bindings/spi/arm,pl022-peripheral-props.yaml @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/spi/arm,pl022-peripheral-props.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Peripheral-specific properties for Arm PL022 SPI controller + +maintainers: + - Linus Walleij + +select: false + +properties: + pl022,interface: + description: SPI interface type + $ref: /schemas/types.yaml#/definitions/uint32 + enum: + - 0 # SPI + - 1 # Texas Instruments Synchronous Serial Frame Format + - 2 # Microwire (Half Duplex) + + pl022,com-mode: + description: Specifies the transfer mode + $ref: /schemas/types.yaml#/definitions/uint32 + enum: + - 0 # interrupt mode + - 1 # polling mode + - 2 # DMA mode + default: 1 + + pl022,rx-level-trig: + description: Rx FIFO watermark level + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 0 + maximum: 4 + + pl022,tx-level-trig: + description: Tx FIFO watermark level + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 0 + maximum: 4 + + pl022,ctrl-len: + description: Microwire interface - Control length + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 0x03 + maximum: 0x1f + + pl022,wait-state: + description: Microwire interface - Wait state + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1] + + pl022,duplex: + description: Microwire interface - Full/Half duplex + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1] + +additionalProperties: true +... diff --git a/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml b/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml index 9ae1611175..48e97e2402 100644 --- a/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml +++ b/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml @@ -50,6 +50,7 @@ properties: patternProperties: "@[0-9a-f]+$": type: object + additionalProperties: true properties: spi-rx-bus-width: diff --git a/Documentation/devicetree/bindings/spi/qcom,spi-qup.yaml b/Documentation/devicetree/bindings/spi/qcom,spi-qup.yaml index 93f14dd01a..88be132689 100644 --- a/Documentation/devicetree/bindings/spi/qcom,spi-qup.yaml +++ b/Documentation/devicetree/bindings/spi/qcom,spi-qup.yaml @@ -44,9 +44,17 @@ properties: - const: tx - const: rx + interconnects: + maxItems: 1 + interrupts: maxItems: 1 + operating-points-v2: true + + power-domains: + maxItems: 1 + reg: maxItems: 1 @@ -62,7 +70,9 @@ unevaluatedProperties: false examples: - | #include + #include #include + #include spi@7575000 { compatible = "qcom,spi-qup-v2.2.1"; @@ -76,6 +86,9 @@ examples: pinctrl-1 = <&blsp1_spi1_sleep>; dmas = <&blsp1_dma 12>, <&blsp1_dma 13>; dma-names = "tx", "rx"; + power-domains = <&rpmpd MSM8996_VDDCX>; + operating-points-v2 = <&spi_opp_table>; + interconnects = <&pnoc MASTER_BLSP_1 &bimc SLAVE_EBI_CH0>; #address-cells = <1>; #size-cells = <0>; }; diff --git a/Documentation/devicetree/bindings/spi/renesas,rzv2m-csi.yaml b/Documentation/devicetree/bindings/spi/renesas,rzv2m-csi.yaml index e59183e536..bed829837d 100644 --- a/Documentation/devicetree/bindings/spi/renesas,rzv2m-csi.yaml +++ b/Documentation/devicetree/bindings/spi/renesas,rzv2m-csi.yaml @@ -39,6 +39,12 @@ properties: power-domains: maxItems: 1 + renesas,csi-no-ss: + type: boolean + description: + The CSI Slave Selection (SS) pin won't be used to enable transmission and + reception. Only available when in target mode. + required: - compatible - reg @@ -50,6 +56,9 @@ required: - '#address-cells' - '#size-cells' +dependencies: + renesas,csi-no-ss: [ spi-slave ] + unevaluatedProperties: false examples: diff --git a/Documentation/devicetree/bindings/spi/rockchip-sfc.yaml b/Documentation/devicetree/bindings/spi/rockchip-sfc.yaml index 339fb39529..ac1503de04 100644 --- a/Documentation/devicetree/bindings/spi/rockchip-sfc.yaml +++ b/Documentation/devicetree/bindings/spi/rockchip-sfc.yaml @@ -47,6 +47,8 @@ properties: patternProperties: "^flash@[0-3]$": type: object + additionalProperties: true + properties: reg: minimum: 0 diff --git a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml index a47cb144b0..6348a387a2 100644 --- a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml +++ b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml @@ -160,6 +160,8 @@ properties: patternProperties: "^.*@[0-9a-f]+$": type: object + additionalProperties: true + properties: reg: minimum: 0 diff --git a/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml b/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml index 782a014b63..15938f81fd 100644 --- a/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml +++ b/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml @@ -113,8 +113,14 @@ properties: minItems: 2 maxItems: 4 + st,spi-midi-ns: + description: | + Only for STM32H7, (Master Inter-Data Idleness) minimum time + delay in nanoseconds inserted between two consecutive data frames. + # The controller specific properties go here. allOf: + - $ref: arm,pl022-peripheral-props.yaml# - $ref: cdns,qspi-nor-peripheral-props.yaml# - $ref: samsung,spi-peripheral-props.yaml# - $ref: nvidia,tegra210-quad-peripheral-props.yaml# diff --git a/Documentation/devicetree/bindings/spi/spi-pl022.yaml b/Documentation/devicetree/bindings/spi/spi-pl022.yaml index 5e5a704a76..7f174b7d0a 100644 --- a/Documentation/devicetree/bindings/spi/spi-pl022.yaml +++ b/Documentation/devicetree/bindings/spi/spi-pl022.yaml @@ -74,57 +74,6 @@ properties: resets: maxItems: 1 -patternProperties: - "^[a-zA-Z][a-zA-Z0-9,+\\-._]{0,63}@[0-9a-f]+$": - type: object - # SPI slave nodes must be children of the SPI master node and can - # contain the following properties. - properties: - pl022,interface: - description: SPI interface type - $ref: /schemas/types.yaml#/definitions/uint32 - enum: - - 0 # SPI - - 1 # Texas Instruments Synchronous Serial Frame Format - - 2 # Microwire (Half Duplex) - - pl022,com-mode: - description: Specifies the transfer mode - $ref: /schemas/types.yaml#/definitions/uint32 - enum: - - 0 # interrupt mode - - 1 # polling mode - - 2 # DMA mode - default: 1 - - pl022,rx-level-trig: - description: Rx FIFO watermark level - $ref: /schemas/types.yaml#/definitions/uint32 - minimum: 0 - maximum: 4 - - pl022,tx-level-trig: - description: Tx FIFO watermark level - $ref: /schemas/types.yaml#/definitions/uint32 - minimum: 0 - maximum: 4 - - pl022,ctrl-len: - description: Microwire interface - Control length - $ref: /schemas/types.yaml#/definitions/uint32 - minimum: 0x03 - maximum: 0x1f - - pl022,wait-state: - description: Microwire interface - Wait state - $ref: /schemas/types.yaml#/definitions/uint32 - enum: [0, 1] - - pl022,duplex: - description: Microwire interface - Full/Half duplex - $ref: /schemas/types.yaml#/definitions/uint32 - enum: [0, 1] - required: - compatible - reg diff --git a/Documentation/devicetree/bindings/spi/st,stm32-spi.yaml b/Documentation/devicetree/bindings/spi/st,stm32-spi.yaml index 9ca1a843c8..ae0f082bd3 100644 --- a/Documentation/devicetree/bindings/spi/st,stm32-spi.yaml +++ b/Documentation/devicetree/bindings/spi/st,stm32-spi.yaml @@ -18,15 +18,6 @@ maintainers: allOf: - $ref: spi-controller.yaml# - - if: - properties: - compatible: - contains: - const: st,stm32f4-spi - - then: - properties: - st,spi-midi-ns: false properties: compatible: @@ -59,17 +50,6 @@ properties: - const: rx - const: tx -patternProperties: - "^[a-zA-Z][a-zA-Z0-9,+\\-._]{0,63}@[0-9a-f]+$": - type: object - # SPI slave nodes must be children of the SPI master node and can - # contain the following properties. - properties: - st,spi-midi-ns: - description: | - Only for STM32H7, (Master Inter-Data Idleness) minimum time - delay in nanoseconds inserted between two consecutive data frames. - required: - compatible - reg diff --git a/Documentation/devicetree/bindings/thermal/fsl,scu-thermal.yaml b/Documentation/devicetree/bindings/thermal/fsl,scu-thermal.yaml index 3721c8c8ec..e02d04d4f7 100644 --- a/Documentation/devicetree/bindings/thermal/fsl,scu-thermal.yaml +++ b/Documentation/devicetree/bindings/thermal/fsl,scu-thermal.yaml @@ -18,7 +18,9 @@ allOf: properties: compatible: items: - - const: fsl,imx8qxp-sc-thermal + - enum: + - fsl,imx8dxl-sc-thermal + - fsl,imx8qxp-sc-thermal - const: fsl,imx-sc-thermal '#thermal-sensor-cells': diff --git a/Documentation/devicetree/bindings/thermal/imx-thermal.yaml b/Documentation/devicetree/bindings/thermal/imx-thermal.yaml index 3aecea7786..808d987bd8 100644 --- a/Documentation/devicetree/bindings/thermal/imx-thermal.yaml +++ b/Documentation/devicetree/bindings/thermal/imx-thermal.yaml @@ -60,6 +60,9 @@ properties: clocks: maxItems: 1 + "#thermal-sensor-cells": + const: 0 + required: - compatible - interrupts @@ -67,6 +70,9 @@ required: - nvmem-cells - nvmem-cell-names +allOf: + - $ref: thermal-sensor.yaml# + additionalProperties: false examples: @@ -104,5 +110,6 @@ examples: nvmem-cells = <&tempmon_calib>, <&tempmon_temp_grade>; nvmem-cell-names = "calib", "temp_grade"; clocks = <&clks IMX6SX_CLK_PLL3_USB_OTG>; + #thermal-sensor-cells = <0>; }; }; diff --git a/Documentation/devicetree/bindings/thermal/mediatek,lvts-thermal.yaml b/Documentation/devicetree/bindings/thermal/mediatek,lvts-thermal.yaml index fe9ae4c425..e6665af52e 100644 --- a/Documentation/devicetree/bindings/thermal/mediatek,lvts-thermal.yaml +++ b/Documentation/devicetree/bindings/thermal/mediatek,lvts-thermal.yaml @@ -18,6 +18,7 @@ description: | properties: compatible: enum: + - mediatek,mt7988-lvts-ap - mediatek,mt8192-lvts-ap - mediatek,mt8192-lvts-mcu - mediatek,mt8195-lvts-ap diff --git a/Documentation/devicetree/bindings/thermal/nvidia,tegra124-soctherm.yaml b/Documentation/devicetree/bindings/thermal/nvidia,tegra124-soctherm.yaml index 04a2ba1aa9..b0237d2360 100644 --- a/Documentation/devicetree/bindings/thermal/nvidia,tegra124-soctherm.yaml +++ b/Documentation/devicetree/bindings/thermal/nvidia,tegra124-soctherm.yaml @@ -68,7 +68,12 @@ properties: patternProperties: "^(light|heavy|oc1)$": type: object + additionalProperties: false + properties: + "#cooling-cells": + const: 2 + nvidia,priority: $ref: /schemas/types.yaml#/definitions/uint32 minimum: 1 diff --git a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml index 27e9e16e64..437b747328 100644 --- a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml +++ b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml @@ -51,6 +51,7 @@ properties: - qcom,msm8996-tsens - qcom,msm8998-tsens - qcom,qcm2290-tsens + - qcom,sa8775p-tsens - qcom,sc7180-tsens - qcom,sc7280-tsens - qcom,sc8180x-tsens diff --git a/Documentation/devicetree/bindings/thermal/thermal-zones.yaml b/Documentation/devicetree/bindings/thermal/thermal-zones.yaml index 4f3acdc4de..4a8dabc481 100644 --- a/Documentation/devicetree/bindings/thermal/thermal-zones.yaml +++ b/Documentation/devicetree/bindings/thermal/thermal-zones.yaml @@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/base.yaml# title: Thermal zone maintainers: - - Amit Kucheria + - Daniel Lezcano description: | Thermal management is achieved in devicetree by describing the sensor hardware diff --git a/Documentation/devicetree/bindings/timer/cirrus,ep9301-timer.yaml b/Documentation/devicetree/bindings/timer/cirrus,ep9301-timer.yaml new file mode 100644 index 0000000000..e463e11e25 --- /dev/null +++ b/Documentation/devicetree/bindings/timer/cirrus,ep9301-timer.yaml @@ -0,0 +1,49 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/timer/cirrus,ep9301-timer.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Cirrus Logic EP93xx timer + +maintainers: + - Alexander Sverdlin + - Nikita Shubin + +properties: + compatible: + oneOf: + - const: cirrus,ep9301-timer + - items: + - enum: + - cirrus,ep9302-timer + - cirrus,ep9307-timer + - cirrus,ep9312-timer + - cirrus,ep9315-timer + - const: cirrus,ep9301-timer + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + resets: + maxItems: 1 + +required: + - compatible + - reg + - interrupts + +additionalProperties: false + +examples: + - | + timer@80810000 { + compatible = "cirrus,ep9301-timer"; + reg = <0x80810000 0x100>; + interrupt-parent = <&vic1>; + interrupts = <19>; + }; +... diff --git a/Documentation/devicetree/bindings/timer/fsl,imxgpt.yaml b/Documentation/devicetree/bindings/timer/fsl,imxgpt.yaml index dbe1267af0..e2607377cb 100644 --- a/Documentation/devicetree/bindings/timer/fsl,imxgpt.yaml +++ b/Documentation/devicetree/bindings/timer/fsl,imxgpt.yaml @@ -36,7 +36,9 @@ properties: - fsl,imxrt1170-gpt - const: fsl,imx6dl-gpt - items: - - const: fsl,imx6ul-gpt + - enum: + - fsl,imx6ul-gpt + - fsl,imx7d-gpt - const: fsl,imx6sx-gpt reg: @@ -46,14 +48,18 @@ properties: maxItems: 1 clocks: + minItems: 2 items: - description: SoC GPT ipg clock - description: SoC GPT per clock + - description: SoC GPT osc per clock clock-names: + minItems: 2 items: - const: ipg - const: per + - const: osc_per required: - compatible @@ -62,6 +68,29 @@ required: - clocks - clock-names +allOf: + - if: + properties: + compatible: + contains: + enum: + - fsl,imx6dl-gpt + - fsl,imx6q-gpt + then: + properties: + clocks: + minItems: 2 + maxItems: 3 + clock-names: + minItems: 2 + maxItems: 3 + else: + properties: + clocks: + maxItems: 2 + clock-names: + maxItems: 2 + additionalProperties: false examples: diff --git a/Documentation/devicetree/bindings/timer/renesas,rz-mtu3.yaml b/Documentation/devicetree/bindings/timer/renesas,rz-mtu3.yaml index fbac40b958..3931054b42 100644 --- a/Documentation/devicetree/bindings/timer/renesas,rz-mtu3.yaml +++ b/Documentation/devicetree/bindings/timer/renesas,rz-mtu3.yaml @@ -11,8 +11,8 @@ maintainers: description: | This hardware block consists of eight 16-bit timer channels and one - 32- bit timer channel. It supports the following specifications: - - Pulse input/output: 28 lines max. + 32-bit timer channel. It supports the following specifications: + - Pulse input/output: 28 lines max - Pulse input 3 lines - Count clock 11 clocks for each channel (14 clocks for MTU0, 12 clocks for MTU2, and 10 clocks for MTU5, four clocks for MTU1-MTU2 combination @@ -23,11 +23,11 @@ description: | - Input capture function (noise filter setting available) - Counter-clearing operation - Simultaneous writing to multiple timer counters (TCNT) - (excluding MTU8). + (excluding MTU8) - Simultaneous clearing on compare match or input capture - (excluding MTU8). + (excluding MTU8) - Simultaneous input and output to registers in synchronization with - counter operations (excluding MTU8). + counter operations (excluding MTU8) - Up to 12-phase PWM output in combination with synchronous operation (excluding MTU8) - [MTU0 MTU3, MTU4, MTU6, MTU7, and MTU8] @@ -40,26 +40,26 @@ description: | - [MTU3, MTU4, MTU6, and MTU7] - Through interlocked operation of MTU3/4 and MTU6/7, the positive and negative signals in six phases (12 phases in total) can be output in - complementary PWM and reset-synchronized PWM operation. + complementary PWM and reset-synchronized PWM operation - In complementary PWM mode, values can be transferred from buffer registers to temporary registers at crests and troughs of the timer- counter values or when the buffer registers (TGRD registers in MTU4 - and MTU7) are written to. - - Double-buffering selectable in complementary PWM mode. + and MTU7) are written to + - Double-buffering selectable in complementary PWM mode - [MTU3 and MTU4] - Through interlocking with MTU0, a mode for driving AC synchronous motors (brushless DC motors) by using complementary PWM output and reset-synchronized PWM output is settable and allows the selection - of two types of waveform output (chopping or level). + of two types of waveform output (chopping or level) - [MTU5] - - Capable of operation as a dead-time compensation counter. + - Capable of operation as a dead-time compensation counter - [MTU0/MTU5, MTU1, MTU2, and MTU8] - 32-bit phase counting mode specifiable by combining MTU1 and MTU2 and - through interlocked operation with MTU0/MTU5 and MTU8. + through interlocked operation with MTU0/MTU5 and MTU8 - Interrupt-skipping function - In complementary PWM mode, interrupts on crests and troughs of counter values and triggers to start conversion by the A/D converter can be - skipped. + skipped - Interrupt sources: 43 sources. - Buffer operation: - Automatic transfer of register data (transfer from the buffer @@ -68,9 +68,9 @@ description: | - A/D converter start triggers can be generated - A/D converter start request delaying function enables A/D converter to be started with any desired timing and to be synchronized with - PWM output. + PWM output - Low power consumption function - - The MTU3a can be placed in the module-stop state. + - The MTU3a can be placed in the module-stop state There are two phase counting modes. 16-bit phase counting mode in which MTU1 and MTU2 operate independently, and cascade connection 32-bit phase @@ -109,6 +109,7 @@ properties: compatible: items: - enum: + - renesas,r9a07g043-mtu3 # RZ/{G2UL,Five} - renesas,r9a07g044-mtu3 # RZ/G2{L,LC} - renesas,r9a07g054-mtu3 # RZ/V2L - const: renesas,rz-mtu3 diff --git a/Documentation/devicetree/bindings/timer/sifive,clint.yaml b/Documentation/devicetree/bindings/timer/sifive,clint.yaml index a0185e15a4..e8be6c4703 100644 --- a/Documentation/devicetree/bindings/timer/sifive,clint.yaml +++ b/Documentation/devicetree/bindings/timer/sifive,clint.yaml @@ -37,6 +37,7 @@ properties: - items: - enum: - allwinner,sun20i-d1-clint + - sophgo,cv1800b-clint - thead,th1520-clint - const: thead,c900-clint - items: diff --git a/Documentation/devicetree/bindings/timer/thead,c900-aclint-mtimer.yaml b/Documentation/devicetree/bindings/timer/thead,c900-aclint-mtimer.yaml new file mode 100644 index 0000000000..2e92bcdeb4 --- /dev/null +++ b/Documentation/devicetree/bindings/timer/thead,c900-aclint-mtimer.yaml @@ -0,0 +1,50 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/timer/thead,c900-aclint-mtimer.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Sophgo CLINT Timer + +maintainers: + - Inochi Amaoto + +properties: + compatible: + items: + - enum: + - sophgo,sg2042-aclint-mtimer + - const: thead,c900-aclint-mtimer + + reg: + items: + - description: MTIMECMP Registers + + reg-names: + items: + - const: mtimecmp + + interrupts-extended: + minItems: 1 + maxItems: 4095 + +additionalProperties: false + +required: + - compatible + - reg + - reg-names + - interrupts-extended + +examples: + - | + timer@ac000000 { + compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; + interrupts-extended = <&cpu1intc 7>, + <&cpu2intc 7>, + <&cpu3intc 7>, + <&cpu4intc 7>; + reg = <0xac000000 0x00010000>; + reg-names = "mtimecmp"; + }; +... diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml index 430a814f64..c3190f2a16 100644 --- a/Documentation/devicetree/bindings/trivial-devices.yaml +++ b/Documentation/devicetree/bindings/trivial-devices.yaml @@ -151,8 +151,6 @@ properties: - infineon,slb9645tt # Infineon SLB9673 I2C TPM 2.0 - infineon,slb9673 - # Infineon TDA38640 Voltage Regulator - - infineon,tda38640 # Infineon TLV493D-A1B6 I2C 3D Magnetic Sensor - infineon,tlv493d-a1b6 # Infineon Multi-phase Digital VR Controller xdpe11280 @@ -311,8 +309,6 @@ properties: - nuvoton,w83773g # OKI ML86V7667 video decoder - oki,ml86v7667 - # OV5642: Color CMOS QSXGA (5-megapixel) Image Sensor with OmniBSI and Embedded TrueFocus - - ovti,ov5642 # 48-Lane, 12-Port PCI Express Gen 2 (5.0 GT/s) Switch - plx,pex8648 # Pulsedlight LIDAR range-finding sensor @@ -348,6 +344,8 @@ properties: # Silicon Labs SI3210 Programmable CMOS SLIC/CODEC with SPI interface - silabs,si3210 # Relative Humidity and Temperature Sensors + - silabs,si7005 + # Relative Humidity and Temperature Sensors - silabs,si7020 # Skyworks SKY81452: Six-Channel White LED Driver with Touch Panel Bias Supply - skyworks,sky81452 diff --git a/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml b/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml index 462ead5a1c..2cf3d016db 100644 --- a/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml +++ b/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml @@ -36,6 +36,7 @@ properties: - qcom,sm8350-ufshc - qcom,sm8450-ufshc - qcom,sm8550-ufshc + - qcom,sm8650-ufshc - const: qcom,ufshc - const: jedec,ufs-2.0 @@ -122,6 +123,7 @@ allOf: - qcom,sm8350-ufshc - qcom,sm8450-ufshc - qcom,sm8550-ufshc + - qcom,sm8650-ufshc then: properties: clocks: diff --git a/Documentation/devicetree/bindings/ufs/ufs-common.yaml b/Documentation/devicetree/bindings/ufs/ufs-common.yaml index bbaee4f5f7..985ea8f64d 100644 --- a/Documentation/devicetree/bindings/ufs/ufs-common.yaml +++ b/Documentation/devicetree/bindings/ufs/ufs-common.yaml @@ -20,11 +20,25 @@ properties: items: - description: Minimum frequency for given clock in Hz - description: Maximum frequency for given clock in Hz + deprecated: true description: | + Preferred is operating-points-v2. + Array of operating frequencies in Hz stored in the same order - as the clocks property. If this property is not defined or a value in the - array is "0" then it is assumed that the frequency is set by the parent - clock or a fixed rate clock source. + as the clocks property. If either this property or operating-points-v2 is + not defined or a value in the array is "0" then it is assumed that the + frequency is set by the parent clock or a fixed rate clock source. + + operating-points-v2: + description: + Preferred over freq-table-hz. + If present, each OPP must contain array of frequencies stored in the same + order for each clock. If clock frequency in the array is "0" then it is + assumed that the frequency is set by the parent clock or a fixed rate + clock source. + + opp-table: + type: object interrupts: maxItems: 1 @@ -75,8 +89,23 @@ properties: dependencies: freq-table-hz: [ clocks ] + operating-points-v2: [ clocks, clock-names ] required: - interrupts +allOf: + - if: + required: + - freq-table-hz + then: + properties: + operating-points-v2: false + - if: + required: + - operating-points-v2 + then: + properties: + freq-table-hz: false + additionalProperties: true diff --git a/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.yaml b/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.yaml index 1394557517..b7e664f739 100644 --- a/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.yaml +++ b/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.yaml @@ -15,7 +15,9 @@ properties: oneOf: - enum: - chipidea,usb2 + - fsl,imx27-usb - lsi,zevio-usb + - nuvoton,npcm750-udc - nvidia,tegra20-ehci - nvidia,tegra20-udc - nvidia,tegra30-ehci @@ -66,6 +68,10 @@ properties: - items: - const: xlnx,zynq-usb-2.20a - const: chipidea,usb2 + - items: + - enum: + - nuvoton,npcm845-udc + - const: nuvoton,npcm750-udc reg: minItems: 1 @@ -388,6 +394,7 @@ allOf: enum: - chipidea,usb2 - lsi,zevio-usb + - nuvoton,npcm750-udc - nvidia,tegra20-udc - nvidia,tegra30-udc - nvidia,tegra114-udc diff --git a/Documentation/devicetree/bindings/usb/fcs,fsa4480.yaml b/Documentation/devicetree/bindings/usb/fcs,fsa4480.yaml index f6e7a5c1ff..f9410eb76a 100644 --- a/Documentation/devicetree/bindings/usb/fcs,fsa4480.yaml +++ b/Documentation/devicetree/bindings/usb/fcs,fsa4480.yaml @@ -11,8 +11,12 @@ maintainers: properties: compatible: - enum: - - fcs,fsa4480 + oneOf: + - const: fcs,fsa4480 + - items: + - enum: + - ocs,ocp96011 + - const: fcs,fsa4480 reg: maxItems: 1 @@ -32,10 +36,43 @@ properties: type: boolean port: - $ref: /schemas/graph.yaml#/properties/port + $ref: /schemas/graph.yaml#/$defs/port-base description: A port node to link the FSA4480 to a TypeC controller for the purpose of handling altmode muxing and orientation switching. + unevaluatedProperties: false + + properties: + endpoint: + $ref: /schemas/graph.yaml#/$defs/endpoint-base + unevaluatedProperties: false + + properties: + data-lanes: + $ref: /schemas/types.yaml#/definitions/uint32-array + description: + Specifies how the AUX+/- lines are connected to SBU1/2. + oneOf: + - items: + - const: 0 + - const: 1 + description: | + Default AUX/SBU layout (FSA4480) + - AUX+ connected to SBU2 + - AUX- connected to SBU1 + Default AUX/SBU layout (OCP96011) + - AUX+ connected to SBU1 + - AUX- connected to SBU2 + - items: + - const: 1 + - const: 0 + description: | + Swapped AUX/SBU layout (FSA4480) + - AUX+ connected to SBU1 + - AUX- connected to SBU2 + Swapped AUX/SBU layout (OCP96011) + - AUX+ connected to SBU2 + - AUX- connected to SBU1 required: - compatible diff --git a/Documentation/devicetree/bindings/usb/genesys,gl850g.yaml b/Documentation/devicetree/bindings/usb/genesys,gl850g.yaml index d0927f6768..ee08b9c372 100644 --- a/Documentation/devicetree/bindings/usb/genesys,gl850g.yaml +++ b/Documentation/devicetree/bindings/usb/genesys,gl850g.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/usb/genesys,gl850g.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Genesys Logic GL850G USB 2.0 hub controller +title: Genesys Logic USB hub controller maintainers: - Icenowy Zheng @@ -18,6 +18,7 @@ properties: - usb5e3,608 - usb5e3,610 - usb5e3,620 + - usb5e3,626 reg: true diff --git a/Documentation/devicetree/bindings/usb/gpio-sbu-mux.yaml b/Documentation/devicetree/bindings/usb/gpio-sbu-mux.yaml index f196beb826..d3b2b666ec 100644 --- a/Documentation/devicetree/bindings/usb/gpio-sbu-mux.yaml +++ b/Documentation/devicetree/bindings/usb/gpio-sbu-mux.yaml @@ -19,6 +19,7 @@ properties: compatible: items: - enum: + - nxp,cbdtu02043 - onnn,fsusb43l10x - pericom,pi3usb102 - const: gpio-sbu-mux @@ -50,7 +51,6 @@ required: - compatible - enable-gpios - select-gpios - - mode-switch - orientation-switch - port diff --git a/Documentation/devicetree/bindings/usb/nxp,ptn36502.yaml b/Documentation/devicetree/bindings/usb/nxp,ptn36502.yaml new file mode 100644 index 0000000000..eee548ac1a --- /dev/null +++ b/Documentation/devicetree/bindings/usb/nxp,ptn36502.yaml @@ -0,0 +1,94 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/usb/nxp,ptn36502.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NXP PTN36502 Type-C USB 3.1 Gen 1 and DisplayPort v1.2 combo redriver + +maintainers: + - Luca Weiss + +properties: + compatible: + enum: + - nxp,ptn36502 + + reg: + maxItems: 1 + + vdd18-supply: + description: Power supply for VDD18 pin + + retimer-switch: + description: Flag the port as possible handle of SuperSpeed signals retiming + type: boolean + + orientation-switch: + description: Flag the port as possible handler of orientation switching + type: boolean + + ports: + $ref: /schemas/graph.yaml#/properties/ports + properties: + port@0: + $ref: /schemas/graph.yaml#/properties/port + description: Super Speed (SS) Output endpoint to the Type-C connector + + port@1: + $ref: /schemas/graph.yaml#/properties/port + description: Super Speed (SS) Input endpoint from the Super-Speed PHY + + port@2: + $ref: /schemas/graph.yaml#/properties/port + description: + Sideband Use (SBU) AUX lines endpoint to the Type-C connector for the purpose of + handling altmode muxing and orientation switching. + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + i2c { + #address-cells = <1>; + #size-cells = <0>; + + typec-mux@1a { + compatible = "nxp,ptn36502"; + reg = <0x1a>; + + vdd18-supply = <&usb_redrive_1v8>; + + retimer-switch; + orientation-switch; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + usb_con_ss: endpoint { + remote-endpoint = <&typec_con_ss>; + }; + }; + port@1 { + reg = <1>; + phy_con_ss: endpoint { + remote-endpoint = <&usb_phy_ss>; + }; + }; + port@2 { + reg = <2>; + usb_con_sbu: endpoint { + remote-endpoint = <&typec_dp_aux>; + }; + }; + }; + }; + }; +... diff --git a/Documentation/devicetree/bindings/usb/qcom,dwc3.yaml b/Documentation/devicetree/bindings/usb/qcom,dwc3.yaml index 67591057f2..915c820562 100644 --- a/Documentation/devicetree/bindings/usb/qcom,dwc3.yaml +++ b/Documentation/devicetree/bindings/usb/qcom,dwc3.yaml @@ -14,6 +14,7 @@ properties: items: - enum: - qcom,ipq4019-dwc3 + - qcom,ipq5018-dwc3 - qcom,ipq5332-dwc3 - qcom,ipq6018-dwc3 - qcom,ipq8064-dwc3 @@ -34,6 +35,7 @@ properties: - qcom,sdm845-dwc3 - qcom,sdx55-dwc3 - qcom,sdx65-dwc3 + - qcom,sdx75-dwc3 - qcom,sm4250-dwc3 - qcom,sm6115-dwc3 - qcom,sm6125-dwc3 @@ -180,6 +182,8 @@ allOf: - qcom,sdm670-dwc3 - qcom,sdm845-dwc3 - qcom,sdx55-dwc3 + - qcom,sdx65-dwc3 + - qcom,sdx75-dwc3 - qcom,sm6350-dwc3 then: properties: @@ -238,6 +242,7 @@ allOf: compatible: contains: enum: + - qcom,ipq5018-dwc3 - qcom,ipq5332-dwc3 - qcom,msm8994-dwc3 - qcom,qcs404-dwc3 @@ -363,6 +368,7 @@ allOf: - qcom,sdm845-dwc3 - qcom,sdx55-dwc3 - qcom,sdx65-dwc3 + - qcom,sdx75-dwc3 - qcom,sm4250-dwc3 - qcom,sm6125-dwc3 - qcom,sm6350-dwc3 @@ -411,6 +417,7 @@ allOf: compatible: contains: enum: + - qcom,ipq5018-dwc3 - qcom,ipq5332-dwc3 - qcom,sdm660-dwc3 then: @@ -514,8 +521,8 @@ examples: interrupts = , , - , - ; + , + ; interrupt-names = "hs_phy_irq", "ss_phy_irq", "dm_hs_phy_irq", "dp_hs_phy_irq"; diff --git a/Documentation/devicetree/bindings/usb/realtek,rtd-dwc3.yaml b/Documentation/devicetree/bindings/usb/realtek,rtd-dwc3.yaml new file mode 100644 index 0000000000..345d0132d4 --- /dev/null +++ b/Documentation/devicetree/bindings/usb/realtek,rtd-dwc3.yaml @@ -0,0 +1,80 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +# Copyright 2023 Realtek Semiconductor Corporation +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/usb/realtek,rtd-dwc3.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Realtek DWC3 USB SoC Controller Glue + +maintainers: + - Stanley Chang + +description: + The Realtek DHC SoC embeds a DWC3 USB IP Core configured for USB 2.0 + and USB 3.0 in host or dual-role mode. + +properties: + compatible: + items: + - enum: + - realtek,rtd1295-dwc3 + - realtek,rtd1315e-dwc3 + - realtek,rtd1319-dwc3 + - realtek,rtd1319d-dwc3 + - realtek,rtd1395-dwc3 + - realtek,rtd1619-dwc3 + - realtek,rtd1619b-dwc3 + - const: realtek,rtd-dwc3 + + reg: + items: + - description: Address and length of register set for wrapper of dwc3 core. + - description: Address and length of register set for pm control. + + '#address-cells': + const: 1 + + '#size-cells': + const: 1 + + ranges: true + +patternProperties: + "^usb@[0-9a-f]+$": + $ref: snps,dwc3.yaml# + description: Required child node + +required: + - compatible + - reg + - "#address-cells" + - "#size-cells" + - ranges + +additionalProperties: false + +examples: + - | + usb@98013e00 { + compatible = "realtek,rtd1319d-dwc3", "realtek,rtd-dwc3"; + reg = <0x98013e00 0x140>, <0x98013f60 0x4>; + #address-cells = <1>; + #size-cells = <1>; + ranges; + + usb@98050000 { + compatible = "snps,dwc3"; + reg = <0x98050000 0x9000>; + interrupts = <0 94 4>; + phys = <&usb2phy &usb3phy>; + phy-names = "usb2-phy", "usb3-phy"; + dr_mode = "otg"; + usb-role-switch; + role-switch-default-mode = "host"; + snps,dis_u2_susphy_quirk; + snps,parkmode-disable-ss-quirk; + snps,parkmode-disable-hs-quirk; + maximum-speed = "high-speed"; + }; + }; diff --git a/Documentation/devicetree/bindings/usb/realtek,rtd-type-c.yaml b/Documentation/devicetree/bindings/usb/realtek,rtd-type-c.yaml new file mode 100644 index 0000000000..6142b0b5b5 --- /dev/null +++ b/Documentation/devicetree/bindings/usb/realtek,rtd-type-c.yaml @@ -0,0 +1,82 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright 2023 Realtek Semiconductor Corporation +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/usb/realtek,rtd-type-c.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Realtek DHC RTD SoCs USB Type-C Connector detection + +maintainers: + - Stanley Chang + +description: + Realtek digital home center (DHC) RTD series SoCs include a type c module. + This module is able to detect the state of type c connector. + +properties: + compatible: + enum: + - realtek,rtd1295-type-c + - realtek,rtd1312c-type-c + - realtek,rtd1315e-type-c + - realtek,rtd1319-type-c + - realtek,rtd1319d-type-c + - realtek,rtd1395-type-c + - realtek,rtd1619-type-c + - realtek,rtd1619b-type-c + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + nvmem-cell-names: + items: + - const: usb-cal + + nvmem-cells: + maxItems: 1 + description: + The phandle to nvmem cell that contains the trimming data. + The type c parameter trimming data specified via efuse. + If unspecified, default value is used. + + realtek,rd-ctrl-gpios: + description: The gpio node to control external Rd on board. + maxItems: 1 + + connector: + $ref: /schemas/connector/usb-connector.yaml# + description: Properties for usb c connector. + type: object + +required: + - compatible + - reg + - interrupts + +additionalProperties: false + +examples: + - | + #include + + type-c@7220 { + compatible = "realtek,rtd1619b-type-c"; + reg = <0x7220 0x20>; + interrupts = <0 60 IRQ_TYPE_LEVEL_HIGH>; + + pinctrl-names = "default"; + pinctrl-0 = <&usb_cc1_pins>, <&usb_cc2_pins>; + nvmem-cells = <&otp_usb_cal>; + nvmem-cell-names = "usb-cal"; + + connector { + compatible = "usb-c-connector"; + label = "USB-C"; + data-role = "dual"; + power-role = "dual"; + }; + }; diff --git a/Documentation/devicetree/bindings/usb/rockchip,dwc3.yaml b/Documentation/devicetree/bindings/usb/rockchip,dwc3.yaml index 291844c8f3..c4924113f9 100644 --- a/Documentation/devicetree/bindings/usb/rockchip,dwc3.yaml +++ b/Documentation/devicetree/bindings/usb/rockchip,dwc3.yaml @@ -15,14 +15,11 @@ description: Phy documentation is provided in the following places. USB2.0 PHY - Documentation/devicetree/bindings/phy/phy-rockchip-inno-usb2.yaml + Documentation/devicetree/bindings/phy/rockchip,inno-usb2phy.yaml Type-C PHY Documentation/devicetree/bindings/phy/phy-rockchip-typec.txt -allOf: - - $ref: snps,dwc3.yaml# - select: properties: compatible: @@ -30,6 +27,7 @@ select: enum: - rockchip,rk3328-dwc3 - rockchip,rk3568-dwc3 + - rockchip,rk3588-dwc3 required: - compatible @@ -39,6 +37,7 @@ properties: - enum: - rockchip,rk3328-dwc3 - rockchip,rk3568-dwc3 + - rockchip,rk3588-dwc3 - const: snps,dwc3 reg: @@ -58,7 +57,9 @@ properties: Master/Core clock, must to be >= 62.5 MHz for SS operation and >= 30MHz for HS operation - description: - Controller grf clock + Controller grf clock OR UTMI clock + - description: + PIPE clock clock-names: minItems: 3 @@ -66,7 +67,10 @@ properties: - const: ref_clk - const: suspend_clk - const: bus_clk - - const: grf_clk + - enum: + - grf_clk + - utmi + - const: pipe power-domains: maxItems: 1 @@ -86,6 +90,52 @@ required: - clocks - clock-names +allOf: + - $ref: snps,dwc3.yaml# + - if: + properties: + compatible: + contains: + const: rockchip,rk3328-dwc3 + then: + properties: + clocks: + minItems: 3 + maxItems: 4 + clock-names: + minItems: 3 + items: + - const: ref_clk + - const: suspend_clk + - const: bus_clk + - const: grf_clk + - if: + properties: + compatible: + contains: + const: rockchip,rk3568-dwc3 + then: + properties: + clocks: + maxItems: 3 + clock-names: + maxItems: 3 + - if: + properties: + compatible: + contains: + const: rockchip,rk3588-dwc3 + then: + properties: + clock-names: + minItems: 3 + items: + - const: ref_clk + - const: suspend_clk + - const: bus_clk + - const: utmi + - const: pipe + examples: - | #include diff --git a/Documentation/devicetree/bindings/usb/snps,dwc3.yaml b/Documentation/devicetree/bindings/usb/snps,dwc3.yaml index a696f23730..ee5af4b381 100644 --- a/Documentation/devicetree/bindings/usb/snps,dwc3.yaml +++ b/Documentation/devicetree/bindings/usb/snps,dwc3.yaml @@ -310,6 +310,62 @@ properties: maximum: 62 deprecated: true + snps,rx-thr-num-pkt: + description: + USB RX packet threshold count. In host mode, this field specifies + the space that must be available in the RX FIFO before the core can + start the corresponding USB RX transaction (burst). + In device mode, this field specifies the space that must be + available in the RX FIFO before the core can send ERDY for a + flow-controlled endpoint. It is only used for SuperSpeed. + The valid values for this field are from 1 to 15. (DWC3 SuperSpeed + USB 3.0 Controller Databook) + $ref: /schemas/types.yaml#/definitions/uint8 + minimum: 1 + maximum: 15 + + snps,rx-max-burst: + description: + Max USB RX burst size. In host mode, this field specifies the + Maximum Bulk IN burst the DWC_usb3 core can perform. When the system + bus is slower than the USB, RX FIFO can overrun during a long burst. + You can program a smaller value to this field to limit the RX burst + size that the core can perform. It only applies to SS Bulk, + Isochronous, and Interrupt IN endpoints in the host mode. + In device mode, this field specifies the NUMP value that is sent in + ERDY for an OUT endpoint. + The valid values for this field are from 1 to 16. (DWC3 SuperSpeed + USB 3.0 Controller Databook) + $ref: /schemas/types.yaml#/definitions/uint8 + minimum: 1 + maximum: 16 + + snps,tx-thr-num-pkt: + description: + USB TX packet threshold count. This field specifies the number of + packets that must be in the TXFIFO before the core can start + transmission for the corresponding USB transaction (burst). + This count is valid in both host and device modes. It is only used + for SuperSpeed operation. + Valid values are from 1 to 15. (DWC3 SuperSpeed USB 3.0 Controller + Databook) + $ref: /schemas/types.yaml#/definitions/uint8 + minimum: 1 + maximum: 15 + + snps,tx-max-burst: + description: + Max USB TX burst size. When the system bus is slower than the USB, + TX FIFO can underrun during a long burst. Program a smaller value + to this field to limit the TX burst size that the core can execute. + In Host mode, it only applies to SS Bulk, Isochronous, and Interrupt + OUT endpoints. This value is not used in device mode. + Valid values are from 1 to 16. (DWC3 SuperSpeed USB 3.0 Controller + Databook) + $ref: /schemas/types.yaml#/definitions/uint8 + minimum: 1 + maximum: 16 + snps,rx-thr-num-pkt-prd: description: Periodic ESS RX packet threshold count (host mode only). Set this and diff --git a/Documentation/devicetree/bindings/usb/ti,tps6598x.yaml b/Documentation/devicetree/bindings/usb/ti,tps6598x.yaml index 5497a60cdd..323d664ae0 100644 --- a/Documentation/devicetree/bindings/usb/ti,tps6598x.yaml +++ b/Documentation/devicetree/bindings/usb/ti,tps6598x.yaml @@ -20,8 +20,23 @@ properties: enum: - ti,tps6598x - apple,cd321x + - ti,tps25750 + reg: - maxItems: 1 + minItems: 1 + items: + - description: main PD controller address + - description: | + I2C slave address field in PBMs input data + which is used as the device address when writing the + patch for TPS25750. + The patch address can be any value except 0x00, 0x20, + 0x21, 0x22, and 0x23 + + reg-names: + items: + - const: main + - const: patch-address wakeup-source: true @@ -32,11 +47,46 @@ properties: items: - const: irq + connector: + $ref: /schemas/connector/usb-connector.yaml# + + firmware-name: + description: | + Should contain the name of the default patch binary + file located on the firmware search path which is + used to switch the controller into APP mode. + This is used when tps25750 doesn't have an EEPROM + connected to it. + maxItems: 1 + required: - compatible - reg -additionalProperties: true +allOf: + - if: + properties: + compatible: + contains: + const: ti,tps25750 + then: + properties: + reg: + maxItems: 2 + + connector: + required: + - data-role + + required: + - connector + - reg-names + else: + properties: + reg: + maxItems: 1 + +additionalProperties: false examples: - | @@ -68,4 +118,36 @@ examples: }; }; }; + + - | + #include + i2c { + #address-cells = <1>; + #size-cells = <0>; + + typec@21 { + compatible = "ti,tps25750"; + reg = <0x21>, <0x0f>; + reg-names = "main", "patch-address"; + + interrupt-parent = <&msmgpio>; + interrupts = <100 IRQ_TYPE_LEVEL_LOW>; + interrupt-names = "irq"; + firmware-name = "tps25750.bin"; + + pinctrl-names = "default"; + pinctrl-0 = <&typec_pins>; + + typec_con0: connector { + compatible = "usb-c-connector"; + label = "USB-C"; + data-role = "dual"; + port { + typec_ep0: endpoint { + remote-endpoint = <&otg_ep>; + }; + }; + }; + }; + }; ... diff --git a/Documentation/devicetree/bindings/usb/usb-hcd.yaml b/Documentation/devicetree/bindings/usb/usb-hcd.yaml index 692dd60e3f..45a19d4928 100644 --- a/Documentation/devicetree/bindings/usb/usb-hcd.yaml +++ b/Documentation/devicetree/bindings/usb/usb-hcd.yaml @@ -41,7 +41,7 @@ examples: - | usb { phys = <&usb2_phy1>, <&usb3_phy1>; - phy-names = "usb"; + phy-names = "usb2", "usb3"; #address-cells = <1>; #size-cells = <0>; diff --git a/Documentation/devicetree/bindings/usb/vialab,vl817.yaml b/Documentation/devicetree/bindings/usb/vialab,vl817.yaml index 76db9071b3..c815010ba9 100644 --- a/Documentation/devicetree/bindings/usb/vialab,vl817.yaml +++ b/Documentation/devicetree/bindings/usb/vialab,vl817.yaml @@ -37,7 +37,6 @@ properties: required: - compatible - reg - - reset-gpios - vdd-supply - peer-hub diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index 573578db95..309b94c328 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -59,6 +59,8 @@ patternProperties: description: AD Holdings Plc. "^adi,.*": description: Analog Devices, Inc. + "^adieng,.*": + description: ADI Engineering, Inc. "^advantech,.*": description: Advantech Corporation "^aeroflexgaisler,.*": @@ -127,6 +129,8 @@ patternProperties: description: Arasan Chip Systems "^archermind,.*": description: ArcherMind Technology (Nanjing) Co., Ltd. + "^arcom,.*": + description: Arcom Controllers "^arctic,.*": description: Arctic Sand "^arcx,.*": @@ -194,6 +198,8 @@ patternProperties: description: Shanghai Belling Co., Ltd. "^bhf,.*": description: Beckhoff Automation GmbH & Co. KG + "^bigtreetech,.*": + description: Shenzhen BigTree Tech Co., LTD "^bitmain,.*": description: Bitmain Technologies "^blutek,.*": @@ -484,6 +490,8 @@ patternProperties: description: FocalTech Systems Co.,Ltd "^forlinx,.*": description: Baoding Forlinx Embedded Technology Co., Ltd. + "^freecom,.*": + description: Freecom Gmbh "^frida,.*": description: Shenzhen Frida LCD Co., Ltd. "^friendlyarm,.*": @@ -496,6 +504,8 @@ patternProperties: description: FX Technology Ltd. "^gardena,.*": description: GARDENA GmbH + "^gateway,.*": + description: Gateway Communications "^gateworks,.*": description: Gateworks Corporation "^gcw,.*": @@ -510,6 +520,8 @@ patternProperties: description: GE Fanuc Intelligent Platforms Embedded Systems, Inc. "^gemei,.*": description: Gemei Digital Technology Co., Ltd. + "^gemtek,.*": + description: Gemtek Technology Co., Ltd. "^genesys,.*": description: Genesys Logic, Inc. "^geniatech,.*": @@ -530,6 +542,8 @@ patternProperties: description: Shenzhen Huiding Technology Co., Ltd. "^google,.*": description: Google, Inc. + "^goramo,.*": + description: Goramo Gorecki "^gplus,.*": description: GPLUS "^grinn,.*": @@ -805,6 +819,8 @@ patternProperties: description: Mantix Display Technology Co.,Ltd. "^mapleboard,.*": description: Mapleboard.org + "^marantec,.*": + description: Marantec electronics GmbH "^marvell,.*": description: Marvell Technology Group Ltd. "^maxbotix,.*": @@ -863,6 +879,8 @@ patternProperties: description: MikroElektronika d.o.o. "^mikrotik,.*": description: MikroTik + "^milkv,.*": + description: MilkV Technology Co., Ltd "^miniand,.*": description: Miniand Tech "^minix,.*": @@ -871,6 +889,8 @@ patternProperties: description: MiraMEMS Sensing Technology Co., Ltd. "^mitsubishi,.*": description: Mitsubishi Electric Corporation + "^mitsumi,.*": + description: Mitsumi Electric Co., Ltd. "^mixel,.*": description: Mixel, Inc. "^miyoo,.*": @@ -1081,6 +1101,8 @@ patternProperties: description: Powertip Tech. Corp. "^powervr,.*": description: PowerVR (deprecated, use img) + "^powkiddy,.*": + description: Powkiddy "^primux,.*": description: Primux Trading, S.L. "^probox2,.*": @@ -1273,6 +1295,8 @@ patternProperties: description: Solomon Systech Limited "^sony,.*": description: Sony Corporation + "^sophgo,.*": + description: Sophgo Technology Inc. "^sourceparts,.*": description: Source Parts Inc. "^spansion,.*": @@ -1420,6 +1444,8 @@ patternProperties: description: Truly Semiconductors Limited "^tsd,.*": description: Theobroma Systems Design und Consulting GmbH + "^turing,.*": + description: Turing Machines, Inc. "^tyan,.*": description: Tyan Computer Corporation "^u-blox,.*": @@ -1444,6 +1470,8 @@ patternProperties: description: United Radiant Technology Corporation "^usi,.*": description: Universal Scientific Industrial Co., Ltd. + "^usr,.*": + description: U.S. Robotics Corporation "^utoo,.*": description: Aigo Digital Technology Co., Ltd. "^v3,.*": diff --git a/Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml b/Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml index 443e2e7ab4..69845ec32e 100644 --- a/Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml @@ -15,9 +15,15 @@ allOf: properties: compatible: - enum: - - amlogic,meson-gxbb-wdt - - amlogic,t7-wdt + oneOf: + - enum: + - amlogic,meson-gxbb-wdt + - amlogic,t7-wdt + - items: + - enum: + - amlogic,c3-wdt + - amlogic,s4-wdt + - const: amlogic,t7-wdt reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt b/Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt index a8197632d6..3208adb3e5 100644 --- a/Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt +++ b/Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt @@ -47,7 +47,15 @@ Optional properties for AST2500-compatible watchdogs: is configured as push-pull, then set the pulse polarity to active-high. The default is active-low. -Example: +Optional properties for AST2500- and AST2600-compatible watchdogs: + - aspeed,reset-mask: A bitmask indicating which peripherals will be reset if + the watchdog timer expires. On AST2500 this should be a + single word defined using the AST2500_WDT_RESET_* macros; + on AST2600 this should be a two-word array with the first + word defined using the AST2600_WDT_RESET1_* macros and the + second word defined using the AST2600_WDT_RESET2_* macros. + +Examples: wdt1: watchdog@1e785000 { compatible = "aspeed,ast2400-wdt"; @@ -55,3 +63,11 @@ Example: aspeed,reset-type = "system"; aspeed,external-signal; }; + + #include + wdt2: watchdog@1e785040 { + compatible = "aspeed,ast2600-wdt"; + reg = <0x1e785040 0x40>; + aspeed,reset-mask = ; + }; diff --git a/Documentation/devicetree/bindings/watchdog/atmel,at91rm9200-wdt.yaml b/Documentation/devicetree/bindings/watchdog/atmel,at91rm9200-wdt.yaml new file mode 100644 index 0000000000..7af3571d89 --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/atmel,at91rm9200-wdt.yaml @@ -0,0 +1,33 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/watchdog/atmel,at91rm9200-wdt.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Atmel AT91RM9200 System Timer Watchdog + +maintainers: + - Nicolas Ferre + +allOf: + - $ref: watchdog.yaml# + +properties: + compatible: + const: atmel,at91rm9200-wdt + + reg: + maxItems: 1 + +required: + - compatible + - reg + +unevaluatedProperties: false + +examples: + - | + watchdog@fffffd00 { + compatible = "atmel,at91rm9200-wdt"; + reg = <0xfffffd00 0x10>; + }; diff --git a/Documentation/devicetree/bindings/watchdog/atmel-at91rm9200-wdt.txt b/Documentation/devicetree/bindings/watchdog/atmel-at91rm9200-wdt.txt deleted file mode 100644 index d4d86cf8f9..0000000000 --- a/Documentation/devicetree/bindings/watchdog/atmel-at91rm9200-wdt.txt +++ /dev/null @@ -1,9 +0,0 @@ -Atmel AT91RM9200 System Timer Watchdog - -Required properties: -- compatible: must be "atmel,at91sam9260-wdt". - -Example: - watchdog@fffffd00 { - compatible = "atmel,at91rm9200-wdt"; - }; diff --git a/Documentation/devicetree/bindings/watchdog/cnxt,cx92755-wdt.yaml b/Documentation/devicetree/bindings/watchdog/cnxt,cx92755-wdt.yaml new file mode 100644 index 0000000000..1844d7e026 --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/cnxt,cx92755-wdt.yaml @@ -0,0 +1,45 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/watchdog/cnxt,cx92755-wdt.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Conexant Digicolor SoCs Watchdog timer + +description: | + The watchdog functionality in Conexant Digicolor SoCs relies on the so called + "Agent Communication" block. This block includes the eight programmable system + timer counters. The first timer (called "Timer A") is the only one that can be + used as watchdog. + +allOf: + - $ref: watchdog.yaml# + +maintainers: + - Baruch Siach + +properties: + compatible: + const: cnxt,cx92755-wdt + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + +required: + - compatible + - reg + - clocks + +unevaluatedProperties: false + +examples: + - | + watchdog@f0000fc0 { + compatible = "cnxt,cx92755-wdt"; + reg = <0xf0000fc0 0x8>; + clocks = <&main_clk>; + timeout-sec = <15>; + }; diff --git a/Documentation/devicetree/bindings/watchdog/da9062-wdt.txt b/Documentation/devicetree/bindings/watchdog/da9062-wdt.txt deleted file mode 100644 index 354314d854..0000000000 --- a/Documentation/devicetree/bindings/watchdog/da9062-wdt.txt +++ /dev/null @@ -1,34 +0,0 @@ -* Dialog Semiconductor DA9062/61 Watchdog Timer - -Required properties: - -- compatible: should be one of the following valid compatible string lines: - "dlg,da9061-watchdog", "dlg,da9062-watchdog" - "dlg,da9062-watchdog" - -Optional properties: -- dlg,use-sw-pm: Add this property to disable the watchdog during suspend. - Only use this option if you can't use the watchdog automatic suspend - function during a suspend (see register CONTROL_B). -- dlg,wdt-sd: Set what happens on watchdog timeout. If this bit is set the - watchdog timeout triggers SHUTDOWN, if cleared the watchdog triggers - POWERDOWN. Can be 0 or 1. Only use this option if you want to change the - default chip's OTP setting for WATCHDOG_SD bit. If this property is NOT - set the WATCHDOG_SD bit and on timeout watchdog behavior will match the - chip's OTP settings. - -Example: DA9062 - - pmic0: da9062@58 { - watchdog { - compatible = "dlg,da9062-watchdog"; - }; - }; - -Example: DA9061 using a fall-back compatible for the DA9062 watchdog driver - - pmic0: da9061@58 { - watchdog { - compatible = "dlg,da9061-watchdog", "dlg,da9062-watchdog"; - }; - }; diff --git a/Documentation/devicetree/bindings/watchdog/digicolor-wdt.txt b/Documentation/devicetree/bindings/watchdog/digicolor-wdt.txt deleted file mode 100644 index a882967e17..0000000000 --- a/Documentation/devicetree/bindings/watchdog/digicolor-wdt.txt +++ /dev/null @@ -1,25 +0,0 @@ -Conexant Digicolor SoCs Watchdog timer - -The watchdog functionality in Conexant Digicolor SoCs relies on the so called -"Agent Communication" block. This block includes the eight programmable system -timer counters. The first timer (called "Timer A") is the only one that can be -used as watchdog. - -Required properties: - -- compatible : Should be "cnxt,cx92755-wdt" -- reg : Specifies base physical address and size of the registers -- clocks : phandle; specifies the clock that drives the timer - -Optional properties: - -- timeout-sec : Contains the watchdog timeout in seconds - -Example: - - watchdog@f0000fc0 { - compatible = "cnxt,cx92755-wdt"; - reg = <0xf0000fc0 0x8>; - clocks = <&main_clk>; - timeout-sec = <15>; - }; diff --git a/Documentation/devicetree/bindings/watchdog/dlg,da9062-watchdog.yaml b/Documentation/devicetree/bindings/watchdog/dlg,da9062-watchdog.yaml new file mode 100644 index 0000000000..f058628bb6 --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/dlg,da9062-watchdog.yaml @@ -0,0 +1,50 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/watchdog/dlg,da9062-watchdog.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Dialog Semiconductor DA9062/61 Watchdog Timer + +maintainers: + - Steve Twiss + +allOf: + - $ref: watchdog.yaml# + +properties: + compatible: + enum: + - dlg,da9061-watchdog + - dlg,da9062-watchdog + + dlg,use-sw-pm: + type: boolean + description: + Add this property to disable the watchdog during suspend. + Only use this option if you can't use the watchdog automatic suspend + function during a suspend (see register CONTROL_B). + + dlg,wdt-sd: + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1] + description: + Set what happens on watchdog timeout. If this bit is set the + watchdog timeout triggers SHUTDOWN, if cleared the watchdog triggers + POWERDOWN. Can be 0 or 1. Only use this option if you want to change the + default chip's OTP setting for WATCHDOG_SD bit. If this property is NOT + set the WATCHDOG_SD bit and on timeout watchdog behavior will match the + chip's OTP settings. + +required: + - compatible + +unevaluatedProperties: false + +examples: + - | + watchdog { + compatible = "dlg,da9062-watchdog"; + dlg,use-sw-pm; + dlg,wdt-sd = <1>; + }; diff --git a/Documentation/devicetree/bindings/watchdog/fsl,scu-wdt.yaml b/Documentation/devicetree/bindings/watchdog/fsl,scu-wdt.yaml index 47701248cd..8b7aa92224 100644 --- a/Documentation/devicetree/bindings/watchdog/fsl,scu-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/fsl,scu-wdt.yaml @@ -18,7 +18,9 @@ allOf: properties: compatible: items: - - const: fsl,imx8qxp-sc-wdt + - enum: + - fsl,imx8dxl-sc-wdt + - fsl,imx8qxp-sc-wdt - const: fsl,imx-sc-wdt required: diff --git a/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml b/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml index 4b7ed13557..9c50766bf6 100644 --- a/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml @@ -30,6 +30,11 @@ properties: clocks: maxItems: 1 + fsl,ext-reset-output: + description: + When set, wdog can generate external reset from the wdog_any pin. + type: boolean + required: - compatible - interrupts diff --git a/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml b/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml index 5046dfa55f..c12bc852ae 100644 --- a/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml @@ -21,6 +21,8 @@ properties: - qcom,apss-wdt-ipq5018 - qcom,apss-wdt-ipq5332 - qcom,apss-wdt-ipq9574 + - qcom,apss-wdt-msm8226 + - qcom,apss-wdt-msm8974 - qcom,apss-wdt-msm8994 - qcom,apss-wdt-qcm2290 - qcom,apss-wdt-qcs404 diff --git a/Documentation/devicetree/bindings/writing-schema.rst b/Documentation/devicetree/bindings/writing-schema.rst index 4a381d20f2..0a6cf19a14 100644 --- a/Documentation/devicetree/bindings/writing-schema.rst +++ b/Documentation/devicetree/bindings/writing-schema.rst @@ -159,11 +159,14 @@ It is possible to run both in a single command:: make dt_binding_check dtbs_check It is also possible to run checks with a subset of matching schema files by -setting the ``DT_SCHEMA_FILES`` variable to a specific schema file or pattern. +setting the ``DT_SCHEMA_FILES`` variable to 1 or more specific schema files or +patterns (partial match of a fixed string). Each file or pattern should be +separated by ':'. :: make dt_binding_check DT_SCHEMA_FILES=trivial-devices.yaml + make dt_binding_check DT_SCHEMA_FILES=trivial-devices.yaml:rtc.yaml make dt_binding_check DT_SCHEMA_FILES=/gpio/ make dtbs_check DT_SCHEMA_FILES=trivial-devices.yaml diff --git a/Documentation/doc-guide/contributing.rst b/Documentation/doc-guide/contributing.rst index d4793826ad..662c7a840c 100644 --- a/Documentation/doc-guide/contributing.rst +++ b/Documentation/doc-guide/contributing.rst @@ -138,6 +138,10 @@ times, but it's highly important. If we can actually eliminate warnings from the documentation build, then we can start expecting developers to avoid adding new ones. +In addition to warnings from the regular documentation build, you can also +run ``make refcheckdocs`` to find references to nonexistent documentation +files. + Languishing kerneldoc comments ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/Documentation/driver-api/80211/mac80211.rst b/Documentation/driver-api/80211/mac80211.rst index 67d2e58b45..e38a220401 100644 --- a/Documentation/driver-api/80211/mac80211.rst +++ b/Documentation/driver-api/80211/mac80211.rst @@ -120,7 +120,7 @@ functions/definitions ieee80211_rx ieee80211_rx_ni ieee80211_rx_irqsafe - ieee80211_tx_status + ieee80211_tx_status_skb ieee80211_tx_status_ni ieee80211_tx_status_irqsafe ieee80211_rts_get diff --git a/Documentation/driver-api/dma-buf.rst b/Documentation/driver-api/dma-buf.rst index f92a32d095..0c153d79cc 100644 --- a/Documentation/driver-api/dma-buf.rst +++ b/Documentation/driver-api/dma-buf.rst @@ -5,14 +5,30 @@ The dma-buf subsystem provides the framework for sharing buffers for hardware (DMA) access across multiple device drivers and subsystems, and for synchronizing asynchronous hardware access. -This is used, for example, by drm "prime" multi-GPU support, but is of -course not limited to GPU use cases. - -The three main components of this are: (1) dma-buf, representing a -sg_table and exposed to userspace as a file descriptor to allow passing -between devices, (2) fence, which provides a mechanism to signal when -one device has finished access, and (3) reservation, which manages the -shared or exclusive fence(s) associated with the buffer. +As an example, it is used extensively by the DRM subsystem to exchange +buffers between processes, contexts, library APIs within the same +process, and also to exchange buffers with other subsystems such as +V4L2. + +This document describes the way in which kernel subsystems can use and +interact with the three main primitives offered by dma-buf: + + - dma-buf, representing a sg_table and exposed to userspace as a file + descriptor to allow passing between processes, subsystems, devices, + etc; + - dma-fence, providing a mechanism to signal when an asynchronous + hardware operation has completed; and + - dma-resv, which manages a set of dma-fences for a particular dma-buf + allowing implicit (kernel-ordered) synchronization of work to + preserve the illusion of coherent access + + +Userspace API principles and use +-------------------------------- + +For more details on how to design your subsystem's API for dma-buf use, please +see Documentation/userspace-api/dma-buf-alloc-exchange.rst. + Shared DMA Buffers ------------------ diff --git a/Documentation/driver-api/dpll.rst b/Documentation/driver-api/dpll.rst new file mode 100644 index 0000000000..e3d593841a --- /dev/null +++ b/Documentation/driver-api/dpll.rst @@ -0,0 +1,551 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============================== +The Linux kernel dpll subsystem +=============================== + +DPLL +==== + +PLL - Phase Locked Loop is an electronic circuit which syntonizes clock +signal of a device with an external clock signal. Effectively enabling +device to run on the same clock signal beat as provided on a PLL input. + +DPLL - Digital Phase Locked Loop is an integrated circuit which in +addition to plain PLL behavior incorporates a digital phase detector +and may have digital divider in the loop. As a result, the frequency on +DPLL's input and output may be configurable. + +Subsystem +========= + +The main purpose of dpll subsystem is to provide general interface +to configure devices that use any kind of Digital PLL and could use +different sources of input signal to synchronize to, as well as +different types of outputs. +The main interface is NETLINK_GENERIC based protocol with an event +monitoring multicast group defined. + +Device object +============= + +Single dpll device object means single Digital PLL circuit and bunch of +connected pins. +It reports the supported modes of operation and current status to the +user in response to the `do` request of netlink command +``DPLL_CMD_DEVICE_GET`` and list of dplls registered in the subsystem +with `dump` netlink request of the same command. +Changing the configuration of dpll device is done with `do` request of +netlink ``DPLL_CMD_DEVICE_SET`` command. +A device handle is ``DPLL_A_ID``, it shall be provided to get or set +configuration of particular device in the system. It can be obtained +with a ``DPLL_CMD_DEVICE_GET`` `dump` request or +a ``DPLL_CMD_DEVICE_ID_GET`` `do` request, where the one must provide +attributes that result in single device match. + +Pin object +========== + +A pin is amorphic object which represents either input or output, it +could be internal component of the device, as well as externally +connected. +The number of pins per dpll vary, but usually multiple pins shall be +provided for a single dpll device. +Pin's properties, capabilities and status is provided to the user in +response to `do` request of netlink ``DPLL_CMD_PIN_GET`` command. +It is also possible to list all the pins that were registered in the +system with `dump` request of ``DPLL_CMD_PIN_GET`` command. +Configuration of a pin can be changed by `do` request of netlink +``DPLL_CMD_PIN_SET`` command. +Pin handle is a ``DPLL_A_PIN_ID``, it shall be provided to get or set +configuration of particular pin in the system. It can be obtained with +``DPLL_CMD_PIN_GET`` `dump` request or ``DPLL_CMD_PIN_ID_GET`` `do` +request, where user provides attributes that result in single pin match. + +Pin selection +============= + +In general, selected pin (the one which signal is driving the dpll +device) can be obtained from ``DPLL_A_PIN_STATE`` attribute, and only +one pin shall be in ``DPLL_PIN_STATE_CONNECTED`` state for any dpll +device. + +Pin selection can be done either manually or automatically, depending +on hardware capabilities and active dpll device work mode +(``DPLL_A_MODE`` attribute). The consequence is that there are +differences for each mode in terms of available pin states, as well as +for the states the user can request for a dpll device. + +In manual mode (``DPLL_MODE_MANUAL``) the user can request or receive +one of following pin states: + +- ``DPLL_PIN_STATE_CONNECTED`` - the pin is used to drive dpll device +- ``DPLL_PIN_STATE_DISCONNECTED`` - the pin is not used to drive dpll + device + +In automatic mode (``DPLL_MODE_AUTOMATIC``) the user can request or +receive one of following pin states: + +- ``DPLL_PIN_STATE_SELECTABLE`` - the pin shall be considered as valid + input for automatic selection algorithm +- ``DPLL_PIN_STATE_DISCONNECTED`` - the pin shall be not considered as + a valid input for automatic selection algorithm + +In automatic mode (``DPLL_MODE_AUTOMATIC``) the user can only receive +pin state ``DPLL_PIN_STATE_CONNECTED`` once automatic selection +algorithm locks a dpll device with one of the inputs. + +Shared pins +=========== + +A single pin object can be attached to multiple dpll devices. +Then there are two groups of configuration knobs: + +1) Set on a pin - the configuration affects all dpll devices pin is + registered to (i.e., ``DPLL_A_PIN_FREQUENCY``), +2) Set on a pin-dpll tuple - the configuration affects only selected + dpll device (i.e., ``DPLL_A_PIN_PRIO``, ``DPLL_A_PIN_STATE``, + ``DPLL_A_PIN_DIRECTION``). + +MUX-type pins +============= + +A pin can be MUX-type, it aggregates child pins and serves as a pin +multiplexer. One or more pins are registered with MUX-type instead of +being directly registered to a dpll device. +Pins registered with a MUX-type pin provide user with additional nested +attribute ``DPLL_A_PIN_PARENT_PIN`` for each parent they were registered +with. +If a pin was registered with multiple parent pins, they behave like a +multiple output multiplexer. In this case output of a +``DPLL_CMD_PIN_GET`` would contain multiple pin-parent nested +attributes with current state related to each parent, like:: + + 'pin': [{{ + 'clock-id': 282574471561216, + 'module-name': 'ice', + 'capabilities': 4, + 'id': 13, + 'parent-pin': [ + {'parent-id': 2, 'state': 'connected'}, + {'parent-id': 3, 'state': 'disconnected'} + ], + 'type': 'synce-eth-port' + }}] + +Only one child pin can provide its signal to the parent MUX-type pin at +a time, the selection is done by requesting change of a child pin state +on desired parent, with the use of ``DPLL_A_PIN_PARENT`` nested +attribute. Example of netlink `set state on parent pin` message format: + + ========================== ============================================= + ``DPLL_A_PIN_ID`` child pin id + ``DPLL_A_PIN_PARENT_PIN`` nested attribute for requesting configuration + related to parent pin + ``DPLL_A_PIN_PARENT_ID`` parent pin id + ``DPLL_A_PIN_STATE`` requested pin state on parent + ========================== ============================================= + +Pin priority +============ + +Some devices might offer a capability of automatic pin selection mode +(enum value ``DPLL_MODE_AUTOMATIC`` of ``DPLL_A_MODE`` attribute). +Usually, automatic selection is performed on the hardware level, which +means only pins directly connected to the dpll can be used for automatic +input pin selection. +In automatic selection mode, the user cannot manually select a input +pin for the device, instead the user shall provide all directly +connected pins with a priority ``DPLL_A_PIN_PRIO``, the device would +pick a highest priority valid signal and use it to control the DPLL +device. Example of netlink `set priority on parent pin` message format: + + ============================ ============================================= + ``DPLL_A_PIN_ID`` configured pin id + ``DPLL_A_PIN_PARENT_DEVICE`` nested attribute for requesting configuration + related to parent dpll device + ``DPLL_A_PIN_PARENT_ID`` parent dpll device id + ``DPLL_A_PIN_PRIO`` requested pin prio on parent dpll + ============================ ============================================= + +Child pin of MUX-type pin is not capable of automatic input pin selection, +in order to configure active input of a MUX-type pin, the user needs to +request desired pin state of the child pin on the parent pin, +as described in the ``MUX-type pins`` chapter. + +Phase offset measurement and adjustment +======================================== + +Device may provide ability to measure a phase difference between signals +on a pin and its parent dpll device. If pin-dpll phase offset measurement +is supported, it shall be provided with ``DPLL_A_PIN_PHASE_OFFSET`` +attribute for each parent dpll device. + +Device may also provide ability to adjust a signal phase on a pin. +If pin phase adjustment is supported, minimal and maximal values that pin +handle shall be provide to the user on ``DPLL_CMD_PIN_GET`` respond +with ``DPLL_A_PIN_PHASE_ADJUST_MIN`` and ``DPLL_A_PIN_PHASE_ADJUST_MAX`` +attributes. Configured phase adjust value is provided with +``DPLL_A_PIN_PHASE_ADJUST`` attribute of a pin, and value change can be +requested with the same attribute with ``DPLL_CMD_PIN_SET`` command. + + =============================== ====================================== + ``DPLL_A_PIN_ID`` configured pin id + ``DPLL_A_PIN_PHASE_ADJUST_MIN`` attr minimum value of phase adjustment + ``DPLL_A_PIN_PHASE_ADJUST_MAX`` attr maximum value of phase adjustment + ``DPLL_A_PIN_PHASE_ADJUST`` attr configured value of phase + adjustment on parent dpll device + ``DPLL_A_PIN_PARENT_DEVICE`` nested attribute for requesting + configuration on given parent dpll + device + ``DPLL_A_PIN_PARENT_ID`` parent dpll device id + ``DPLL_A_PIN_PHASE_OFFSET`` attr measured phase difference + between a pin and parent dpll device + =============================== ====================================== + +All phase related values are provided in pico seconds, which represents +time difference between signals phase. The negative value means that +phase of signal on pin is earlier in time than dpll's signal. Positive +value means that phase of signal on pin is later in time than signal of +a dpll. + +Phase adjust (also min and max) values are integers, but measured phase +offset values are fractional with 3-digit decimal places and shell be +divided with ``DPLL_PIN_PHASE_OFFSET_DIVIDER`` to get integer part and +modulo divided to get fractional part. + +Configuration commands group +============================ + +Configuration commands are used to get information about registered +dpll devices (and pins), as well as set configuration of device or pins. +As dpll devices must be abstracted and reflect real hardware, +there is no way to add new dpll device via netlink from user space and +each device should be registered by its driver. + +All netlink commands require ``GENL_ADMIN_PERM``. This is to prevent +any spamming/DoS from unauthorized userspace applications. + +List of netlink commands with possible attributes +================================================= + +Constants identifying command types for dpll device uses a +``DPLL_CMD_`` prefix and suffix according to command purpose. +The dpll device related attributes use a ``DPLL_A_`` prefix and +suffix according to attribute purpose. + + ==================================== ================================= + ``DPLL_CMD_DEVICE_ID_GET`` command to get device ID + ``DPLL_A_MODULE_NAME`` attr module name of registerer + ``DPLL_A_CLOCK_ID`` attr Unique Clock Identifier + (EUI-64), as defined by the + IEEE 1588 standard + ``DPLL_A_TYPE`` attr type of dpll device + ==================================== ================================= + + ==================================== ================================= + ``DPLL_CMD_DEVICE_GET`` command to get device info or + dump list of available devices + ``DPLL_A_ID`` attr unique dpll device ID + ``DPLL_A_MODULE_NAME`` attr module name of registerer + ``DPLL_A_CLOCK_ID`` attr Unique Clock Identifier + (EUI-64), as defined by the + IEEE 1588 standard + ``DPLL_A_MODE`` attr selection mode + ``DPLL_A_MODE_SUPPORTED`` attr available selection modes + ``DPLL_A_LOCK_STATUS`` attr dpll device lock status + ``DPLL_A_TEMP`` attr device temperature info + ``DPLL_A_TYPE`` attr type of dpll device + ==================================== ================================= + + ==================================== ================================= + ``DPLL_CMD_DEVICE_SET`` command to set dpll device config + ``DPLL_A_ID`` attr internal dpll device index + ``DPLL_A_MODE`` attr selection mode to configure + ==================================== ================================= + +Constants identifying command types for pins uses a +``DPLL_CMD_PIN_`` prefix and suffix according to command purpose. +The pin related attributes use a ``DPLL_A_PIN_`` prefix and suffix +according to attribute purpose. + + ==================================== ================================= + ``DPLL_CMD_PIN_ID_GET`` command to get pin ID + ``DPLL_A_PIN_MODULE_NAME`` attr module name of registerer + ``DPLL_A_PIN_CLOCK_ID`` attr Unique Clock Identifier + (EUI-64), as defined by the + IEEE 1588 standard + ``DPLL_A_PIN_BOARD_LABEL`` attr pin board label provided + by registerer + ``DPLL_A_PIN_PANEL_LABEL`` attr pin panel label provided + by registerer + ``DPLL_A_PIN_PACKAGE_LABEL`` attr pin package label provided + by registerer + ``DPLL_A_PIN_TYPE`` attr type of a pin + ==================================== ================================= + + ==================================== ================================== + ``DPLL_CMD_PIN_GET`` command to get pin info or dump + list of available pins + ``DPLL_A_PIN_ID`` attr unique a pin ID + ``DPLL_A_PIN_MODULE_NAME`` attr module name of registerer + ``DPLL_A_PIN_CLOCK_ID`` attr Unique Clock Identifier + (EUI-64), as defined by the + IEEE 1588 standard + ``DPLL_A_PIN_BOARD_LABEL`` attr pin board label provided + by registerer + ``DPLL_A_PIN_PANEL_LABEL`` attr pin panel label provided + by registerer + ``DPLL_A_PIN_PACKAGE_LABEL`` attr pin package label provided + by registerer + ``DPLL_A_PIN_TYPE`` attr type of a pin + ``DPLL_A_PIN_FREQUENCY`` attr current frequency of a pin + ``DPLL_A_PIN_FREQUENCY_SUPPORTED`` nested attr provides supported + frequencies + ``DPLL_A_PIN_ANY_FREQUENCY_MIN`` attr minimum value of frequency + ``DPLL_A_PIN_ANY_FREQUENCY_MAX`` attr maximum value of frequency + ``DPLL_A_PIN_PHASE_ADJUST_MIN`` attr minimum value of phase + adjustment + ``DPLL_A_PIN_PHASE_ADJUST_MAX`` attr maximum value of phase + adjustment + ``DPLL_A_PIN_PHASE_ADJUST`` attr configured value of phase + adjustment on parent device + ``DPLL_A_PIN_PARENT_DEVICE`` nested attr for each parent device + the pin is connected with + ``DPLL_A_PIN_PARENT_ID`` attr parent dpll device id + ``DPLL_A_PIN_PRIO`` attr priority of pin on the + dpll device + ``DPLL_A_PIN_STATE`` attr state of pin on the parent + dpll device + ``DPLL_A_PIN_DIRECTION`` attr direction of a pin on the + parent dpll device + ``DPLL_A_PIN_PHASE_OFFSET`` attr measured phase difference + between a pin and parent dpll + ``DPLL_A_PIN_PARENT_PIN`` nested attr for each parent pin + the pin is connected with + ``DPLL_A_PIN_PARENT_ID`` attr parent pin id + ``DPLL_A_PIN_STATE`` attr state of pin on the parent + pin + ``DPLL_A_PIN_CAPABILITIES`` attr bitmask of pin capabilities + ==================================== ================================== + + ==================================== ================================= + ``DPLL_CMD_PIN_SET`` command to set pins configuration + ``DPLL_A_PIN_ID`` attr unique a pin ID + ``DPLL_A_PIN_FREQUENCY`` attr requested frequency of a pin + ``DPLL_A_PIN_PHASE_ADJUST`` attr requested value of phase + adjustment on parent device + ``DPLL_A_PIN_PARENT_DEVICE`` nested attr for each parent dpll + device configuration request + ``DPLL_A_PIN_PARENT_ID`` attr parent dpll device id + ``DPLL_A_PIN_DIRECTION`` attr requested direction of a pin + ``DPLL_A_PIN_PRIO`` attr requested priority of pin on + the dpll device + ``DPLL_A_PIN_STATE`` attr requested state of pin on + the dpll device + ``DPLL_A_PIN_PARENT_PIN`` nested attr for each parent pin + configuration request + ``DPLL_A_PIN_PARENT_ID`` attr parent pin id + ``DPLL_A_PIN_STATE`` attr requested state of pin on + parent pin + ==================================== ================================= + +Netlink dump requests +===================== + +The ``DPLL_CMD_DEVICE_GET`` and ``DPLL_CMD_PIN_GET`` commands are +capable of dump type netlink requests, in which case the response is in +the same format as for their ``do`` request, but every device or pin +registered in the system is returned. + +SET commands format +=================== + +``DPLL_CMD_DEVICE_SET`` - to target a dpll device, the user provides +``DPLL_A_ID``, which is unique identifier of dpll device in the system, +as well as parameter being configured (``DPLL_A_MODE``). + +``DPLL_CMD_PIN_SET`` - to target a pin user must provide a +``DPLL_A_PIN_ID``, which is unique identifier of a pin in the system. +Also configured pin parameters must be added. +If ``DPLL_A_PIN_FREQUENCY`` is configured, this affects all the dpll +devices that are connected with the pin, that is why frequency attribute +shall not be enclosed in ``DPLL_A_PIN_PARENT_DEVICE``. +Other attributes: ``DPLL_A_PIN_PRIO``, ``DPLL_A_PIN_STATE`` or +``DPLL_A_PIN_DIRECTION`` must be enclosed in +``DPLL_A_PIN_PARENT_DEVICE`` as their configuration relates to only one +of parent dplls, targeted by ``DPLL_A_PIN_PARENT_ID`` attribute which is +also required inside that nest. +For MUX-type pins the ``DPLL_A_PIN_STATE`` attribute is configured in +similar way, by enclosing required state in ``DPLL_A_PIN_PARENT_PIN`` +nested attribute and targeted parent pin id in ``DPLL_A_PIN_PARENT_ID``. + +In general, it is possible to configure multiple parameters at once, but +internally each parameter change will be invoked separately, where order +of configuration is not guaranteed by any means. + +Configuration pre-defined enums +=============================== + +.. kernel-doc:: include/uapi/linux/dpll.h + +Notifications +============= + +dpll device can provide notifications regarding status changes of the +device, i.e. lock status changes, input/output changes or other alarms. +There is one multicast group that is used to notify user-space apps via +netlink socket: ``DPLL_MCGRP_MONITOR`` + +Notifications messages: + + ============================== ===================================== + ``DPLL_CMD_DEVICE_CREATE_NTF`` dpll device was created + ``DPLL_CMD_DEVICE_DELETE_NTF`` dpll device was deleted + ``DPLL_CMD_DEVICE_CHANGE_NTF`` dpll device has changed + ``DPLL_CMD_PIN_CREATE_NTF`` dpll pin was created + ``DPLL_CMD_PIN_DELETE_NTF`` dpll pin was deleted + ``DPLL_CMD_PIN_CHANGE_NTF`` dpll pin has changed + ============================== ===================================== + +Events format is the same as for the corresponding get command. +Format of ``DPLL_CMD_DEVICE_`` events is the same as response of +``DPLL_CMD_DEVICE_GET``. +Format of ``DPLL_CMD_PIN_`` events is same as response of +``DPLL_CMD_PIN_GET``. + +Device driver implementation +============================ + +Device is allocated by dpll_device_get() call. Second call with the +same arguments will not create new object but provides pointer to +previously created device for given arguments, it also increases +refcount of that object. +Device is deallocated by dpll_device_put() call, which first +decreases the refcount, once refcount is cleared the object is +destroyed. + +Device should implement set of operations and register device via +dpll_device_register() at which point it becomes available to the +users. Multiple driver instances can obtain reference to it with +dpll_device_get(), as well as register dpll device with their own +ops and priv. + +The pins are allocated separately with dpll_pin_get(), it works +similarly to dpll_device_get(). Function first creates object and then +for each call with the same arguments only the object refcount +increases. Also dpll_pin_put() works similarly to dpll_device_put(). + +A pin can be registered with parent dpll device or parent pin, depending +on hardware needs. Each registration requires registerer to provide set +of pin callbacks, and private data pointer for calling them: + +- dpll_pin_register() - register pin with a dpll device, +- dpll_pin_on_pin_register() - register pin with another MUX type pin. + +Notifications of adding or removing dpll devices are created within +subsystem itself. +Notifications about registering/deregistering pins are also invoked by +the subsystem. +Notifications about status changes either of dpll device or a pin are +invoked in two ways: + +- after successful change was requested on dpll subsystem, the subsystem + calls corresponding notification, +- requested by device driver with dpll_device_change_ntf() or + dpll_pin_change_ntf() when driver informs about the status change. + +The device driver using dpll interface is not required to implement all +the callback operation. Nevertheless, there are few required to be +implemented. +Required dpll device level callback operations: + +- ``.mode_get``, +- ``.lock_status_get``. + +Required pin level callback operations: + +- ``.state_on_dpll_get`` (pins registered with dpll device), +- ``.state_on_pin_get`` (pins registered with parent pin), +- ``.direction_get``. + +Every other operation handler is checked for existence and +``-EOPNOTSUPP`` is returned in case of absence of specific handler. + +The simplest implementation is in the OCP TimeCard driver. The ops +structures are defined like this: + +.. code-block:: c + + static const struct dpll_device_ops dpll_ops = { + .lock_status_get = ptp_ocp_dpll_lock_status_get, + .mode_get = ptp_ocp_dpll_mode_get, + .mode_supported = ptp_ocp_dpll_mode_supported, + }; + + static const struct dpll_pin_ops dpll_pins_ops = { + .frequency_get = ptp_ocp_dpll_frequency_get, + .frequency_set = ptp_ocp_dpll_frequency_set, + .direction_get = ptp_ocp_dpll_direction_get, + .direction_set = ptp_ocp_dpll_direction_set, + .state_on_dpll_get = ptp_ocp_dpll_state_get, + }; + +The registration part is then looks like this part: + +.. code-block:: c + + clkid = pci_get_dsn(pdev); + bp->dpll = dpll_device_get(clkid, 0, THIS_MODULE); + if (IS_ERR(bp->dpll)) { + err = PTR_ERR(bp->dpll); + dev_err(&pdev->dev, "dpll_device_alloc failed\n"); + goto out; + } + + err = dpll_device_register(bp->dpll, DPLL_TYPE_PPS, &dpll_ops, bp); + if (err) + goto out; + + for (i = 0; i < OCP_SMA_NUM; i++) { + bp->sma[i].dpll_pin = dpll_pin_get(clkid, i, THIS_MODULE, &bp->sma[i].dpll_prop); + if (IS_ERR(bp->sma[i].dpll_pin)) { + err = PTR_ERR(bp->dpll); + goto out_dpll; + } + + err = dpll_pin_register(bp->dpll, bp->sma[i].dpll_pin, &dpll_pins_ops, + &bp->sma[i]); + if (err) { + dpll_pin_put(bp->sma[i].dpll_pin); + goto out_dpll; + } + } + +In the error path we have to rewind every allocation in the reverse order: + +.. code-block:: c + + while (i) { + --i; + dpll_pin_unregister(bp->dpll, bp->sma[i].dpll_pin, &dpll_pins_ops, &bp->sma[i]); + dpll_pin_put(bp->sma[i].dpll_pin); + } + dpll_device_put(bp->dpll); + +More complex example can be found in Intel's ICE driver or nVidia's mlx5 driver. + +SyncE enablement +================ +For SyncE enablement it is required to allow control over dpll device +for a software application which monitors and configures the inputs of +dpll device in response to current state of a dpll device and its +inputs. +In such scenario, dpll device input signal shall be also configurable +to drive dpll with signal recovered from the PHY netdevice. +This is done by exposing a pin to the netdevice - attaching pin to the +netdevice itself with +``netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin)``. +Exposed pin id handle ``DPLL_A_PIN_ID`` is then identifiable by the user +as it is attached to rtnetlink respond to get ``RTM_NEWLINK`` command in +nested attribute ``IFLA_DPLL_PIN``. diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst index 8be086b3f8..c5f99d834e 100644 --- a/Documentation/driver-api/driver-model/devres.rst +++ b/Documentation/driver-api/driver-model/devres.rst @@ -322,10 +322,8 @@ IOMAP devm_platform_ioremap_resource_byname() devm_platform_get_and_ioremap_resource() devm_iounmap() - pcim_iomap() - pcim_iomap_regions() : do request_region() and iomap() on multiple BARs - pcim_iomap_table() : array of mapped addresses indexed by BAR - pcim_iounmap() + + Note: For the PCI devices the specific pcim_*() functions may be used, see below. IRQ devm_free_irq() @@ -392,8 +390,16 @@ PCI devm_pci_alloc_host_bridge() : managed PCI host bridge allocation devm_pci_remap_cfgspace() : ioremap PCI configuration space devm_pci_remap_cfg_resource() : ioremap PCI configuration space resource + pcim_enable_device() : after success, all PCI ops become managed + pcim_iomap() : do iomap() on a single BAR + pcim_iomap_regions() : do request_region() and iomap() on multiple BARs + pcim_iomap_regions_request_all() : do request_region() on all and iomap() on multiple BARs + pcim_iomap_table() : array of mapped addresses indexed by BAR + pcim_iounmap() : do iounmap() on a single BAR + pcim_iounmap_regions() : do iounmap() and release_region() on multiple BARs pcim_pin_device() : keep PCI device enabled after release + pcim_set_mwi() : enable Memory-Write-Invalidate PCI transaction PHY devm_usb_get_phy() diff --git a/Documentation/driver-api/gpio/consumer.rst b/Documentation/driver-api/gpio/consumer.rst index de6fc79ad6..3e588b9d67 100644 --- a/Documentation/driver-api/gpio/consumer.rst +++ b/Documentation/driver-api/gpio/consumer.rst @@ -29,6 +29,10 @@ warnings. These stubs are used for two use cases: will use it under other compile-time configurations. In this case the consumer must make sure not to call into these functions, or the user will be met with console warnings that may be perceived as intimidating. + Combining truly optional GPIOLIB usage with calls to + ``[devm_]gpiod_get_optional()`` is a *bad idea*, and will result in weird + error messages. Use the ordinary getter functions with optional GPIOLIB: + some open coding of error handling should be expected when you do this. All the functions that work with the descriptor-based GPIO interface are prefixed with ``gpiod_``. The ``gpio_`` prefix is used for the legacy diff --git a/Documentation/driver-api/i3c/protocol.rst b/Documentation/driver-api/i3c/protocol.rst index 02653defa0..23a0b93c62 100644 --- a/Documentation/driver-api/i3c/protocol.rst +++ b/Documentation/driver-api/i3c/protocol.rst @@ -71,8 +71,8 @@ During DAA, each I3C device reports 3 important things: related capabilities * DCR: Device Characteristic Register. This 8-bit register describes the functionalities provided by the device -* Provisional ID: A 48-bit unique identifier. On a given bus there should be no - Provisional ID collision, otherwise the discovery mechanism may fail. +* Provisioned ID: A 48-bit unique identifier. On a given bus there should be no + Provisioned ID collision, otherwise the discovery mechanism may fail. I3C slave events ================ diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst index 1e16a40da3..f549a68951 100644 --- a/Documentation/driver-api/index.rst +++ b/Documentation/driver-api/index.rst @@ -114,6 +114,7 @@ available subsections can be seen below. zorro hte/index wmi + dpll .. only:: subproject and html diff --git a/Documentation/driver-api/media/camera-sensor.rst b/Documentation/driver-api/media/camera-sensor.rst index 93f4f2536c..6456145f96 100644 --- a/Documentation/driver-api/media/camera-sensor.rst +++ b/Documentation/driver-api/media/camera-sensor.rst @@ -1,8 +1,14 @@ .. SPDX-License-Identifier: GPL-2.0 +.. _media_writing_camera_sensor_drivers: + Writing camera sensor drivers ============================= +This document covers the in-kernel APIs only. For the best practices on +userspace API implementation in camera sensor drivers, please see +:ref:`media_using_camera_sensor_drivers`. + CSI-2 and parallel (BT.601 and BT.656) busses --------------------------------------------- @@ -13,7 +19,7 @@ Handling clocks Camera sensors have an internal clock tree including a PLL and a number of divisors. The clock tree is generally configured by the driver based on a few -input parameters that are specific to the hardware:: the external clock frequency +input parameters that are specific to the hardware: the external clock frequency and the link frequency. The two parameters generally are obtained from system firmware. **No other frequencies should be used in any circumstances.** @@ -32,110 +38,61 @@ can rely on this frequency being used. Devicetree ~~~~~~~~~~ -The currently preferred way to achieve this is using ``assigned-clocks``, -``assigned-clock-parents`` and ``assigned-clock-rates`` properties. See -``Documentation/devicetree/bindings/clock/clock-bindings.txt`` for more -information. The driver then gets the frequency using ``clk_get_rate()``. +The preferred way to achieve this is using ``assigned-clocks``, +``assigned-clock-parents`` and ``assigned-clock-rates`` properties. See the +`clock device tree bindings +`_ +for more information. The driver then gets the frequency using +``clk_get_rate()``. This approach has the drawback that there's no guarantee that the frequency hasn't been modified directly or indirectly by another driver, or supported by the board's clock tree to begin with. Changes to the Common Clock Framework API are required to ensure reliability. -Frame size ----------- - -There are two distinct ways to configure the frame size produced by camera -sensors. - -Freely configurable camera sensor drivers -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Freely configurable camera sensor drivers expose the device's internal -processing pipeline as one or more sub-devices with different cropping and -scaling configurations. The output size of the device is the result of a series -of cropping and scaling operations from the device's pixel array's size. - -An example of such a driver is the CCS driver (see ``drivers/media/i2c/ccs``). - -Register list based drivers -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Register list based drivers generally, instead of able to configure the device -they control based on user requests, are limited to a number of preset -configurations that combine a number of different parameters that on hardware -level are independent. How a driver picks such configuration is based on the -format set on a source pad at the end of the device's internal pipeline. - -Most sensor drivers are implemented this way, see e.g. -``drivers/media/i2c/imx319.c`` for an example. - -Frame interval configuration ----------------------------- - -There are two different methods for obtaining possibilities for different frame -intervals as well as configuring the frame interval. Which one to implement -depends on the type of the device. - -Raw camera sensors -~~~~~~~~~~~~~~~~~~ - -Instead of a high level parameter such as frame interval, the frame interval is -a result of the configuration of a number of camera sensor implementation -specific parameters. Luckily, these parameters tend to be the same for more or -less all modern raw camera sensors. - -The frame interval is calculated using the following equation:: - - frame interval = (analogue crop width + horizontal blanking) * - (analogue crop height + vertical blanking) / pixel rate - -The formula is bus independent and is applicable for raw timing parameters on -large variety of devices beyond camera sensors. Devices that have no analogue -crop, use the full source image size, i.e. pixel array size. - -Horizontal and vertical blanking are specified by ``V4L2_CID_HBLANK`` and -``V4L2_CID_VBLANK``, respectively. The unit of the ``V4L2_CID_HBLANK`` control -is pixels and the unit of the ``V4L2_CID_VBLANK`` is lines. The pixel rate in -the sensor's **pixel array** is specified by ``V4L2_CID_PIXEL_RATE`` in the same -sub-device. The unit of that control is pixels per second. - -Register list based drivers need to implement read-only sub-device nodes for the -purpose. Devices that are not register list based need these to configure the -device's internal processing pipeline. - -The first entity in the linear pipeline is the pixel array. The pixel array may -be followed by other entities that are there to allow configuring binning, -skipping, scaling or digital crop :ref:`v4l2-subdev-selections`. - -USB cameras etc. devices -~~~~~~~~~~~~~~~~~~~~~~~~ - -USB video class hardware, as well as many cameras offering a similar higher -level interface natively, generally use the concept of frame interval (or frame -rate) on device level in firmware or hardware. This means lower level controls -implemented by raw cameras may not be used on uAPI (or even kAPI) to control the -frame interval on these devices. - Power management ---------------- -Always use runtime PM to manage the power states of your device. Camera sensor -drivers are in no way special in this respect: they are responsible for -controlling the power state of the device they otherwise control as well. In -general, the device must be powered on at least when its registers are being -accessed and when it is streaming. - -Existing camera sensor drivers may rely on the old -struct v4l2_subdev_core_ops->s_power() callback for bridge or ISP drivers to -manage their power state. This is however **deprecated**. If you feel you need -to begin calling an s_power from an ISP or a bridge driver, instead please add -runtime PM support to the sensor driver you are using. Likewise, new drivers -should not use s_power. - -Please see examples in e.g. ``drivers/media/i2c/ov8856.c`` and -``drivers/media/i2c/ccs/ccs-core.c``. The two drivers work in both ACPI -and DT based systems. +Camera sensors are used in conjunction with other devices to form a camera +pipeline. They must obey the rules listed herein to ensure coherent power +management over the pipeline. + +Camera sensor drivers are responsible for controlling the power state of the +device they otherwise control as well. They shall use runtime PM to manage +power states. Runtime PM shall be enabled at probe time and disabled at remove +time. Drivers should enable runtime PM autosuspend. + +The runtime PM handlers shall handle clocks, regulators, GPIOs, and other +system resources required to power the sensor up and down. For drivers that +don't use any of those resources (such as drivers that support ACPI systems +only), the runtime PM handlers may be left unimplemented. + +In general, the device shall be powered on at least when its registers are +being accessed and when it is streaming. Drivers should use +``pm_runtime_resume_and_get()`` when starting streaming and +``pm_runtime_put()`` or ``pm_runtime_put_autosuspend()`` when stopping +streaming. They may power the device up at probe time (for example to read +identification registers), but should not keep it powered unconditionally after +probe. + +At system suspend time, the whole camera pipeline must stop streaming, and +restart when the system is resumed. This requires coordination between the +camera sensor and the rest of the camera pipeline. Bridge drivers are +responsible for this coordination, and instruct camera sensors to stop and +restart streaming by calling the appropriate subdev operations +(``.s_stream()``, ``.enable_streams()`` or ``.disable_streams()``). Camera +sensor drivers shall therefore **not** keep track of the streaming state to +stop streaming in the PM suspend handler and restart it in the resume handler. +Drivers should in general not implement the system PM handlers. + +Camera sensor drivers shall **not** implement the subdev ``.s_power()`` +operation, as it is deprecated. While this operation is implemented in some +existing drivers as they predate the deprecation, new drivers shall use runtime +PM instead. If you feel you need to begin calling ``.s_power()`` from an ISP or +a bridge driver, instead add runtime PM support to the sensor driver you are +using and drop its ``.s_power()`` handler. + +Please also see :ref:`examples `. Control framework ~~~~~~~~~~~~~~~~~ @@ -155,21 +112,36 @@ access the device. Rotation, orientation and flipping ---------------------------------- -Some systems have the camera sensor mounted upside down compared to its natural -mounting rotation. In such cases, drivers shall expose the information to -userspace with the :ref:`V4L2_CID_CAMERA_SENSOR_ROTATION -` control. - -Sensor drivers shall also report the sensor's mounting orientation with the -:ref:`V4L2_CID_CAMERA_SENSOR_ORIENTATION `. - Use ``v4l2_fwnode_device_parse()`` to obtain rotation and orientation information from system firmware and ``v4l2_ctrl_new_fwnode_properties()`` to register the appropriate controls. -Sensor drivers that have any vertical or horizontal flips embedded in the -register programming sequences shall initialize the V4L2_CID_HFLIP and -V4L2_CID_VFLIP controls with the values programmed by the register sequences. -The default values of these controls shall be 0 (disabled). Especially these -controls shall not be inverted, independently of the sensor's mounting -rotation. +.. _media-camera-sensor-examples: + +Example drivers +--------------- + +Features implemented by sensor drivers vary, and depending on the set of +supported features and other qualities, particular sensor drivers better serve +the purpose of an example. The following drivers are known to be good examples: + +.. flat-table:: Example sensor drivers + :header-rows: 0 + :widths: 1 1 1 2 + + * - Driver name + - File(s) + - Driver type + - Example topic + * - CCS + - ``drivers/media/i2c/ccs/`` + - Freely configurable + - Power management (ACPI and DT), UAPI + * - imx219 + - ``drivers/media/i2c/imx219.c`` + - Register list based + - Power management (DT), UAPI, mode selection + * - imx319 + - ``drivers/media/i2c/imx319.c`` + - Register list based + - Power management (ACPI and DT) diff --git a/Documentation/driver-api/media/drivers/ccs/ccs.rst b/Documentation/driver-api/media/drivers/ccs/ccs.rst index 7389204afc..776eec72bc 100644 --- a/Documentation/driver-api/media/drivers/ccs/ccs.rst +++ b/Documentation/driver-api/media/drivers/ccs/ccs.rst @@ -30,7 +30,7 @@ that purpose, selection target ``V4L2_SEL_TGT_COMPOSE`` is supported on the sink pad (0). Additionally, if a device has no scaler or digital crop functionality, the -source pad (1) expses another digital crop selection rectangle that can only +source pad (1) exposes another digital crop selection rectangle that can only crop at the end of the lines and frames. Scaler @@ -78,6 +78,14 @@ For SMIA (non-++) compliant devices the static data file name is vvvv or vv denotes MIPI and SMIA manufacturer IDs respectively, mmmm model ID and rrrr or rr revision number. +CCS tools +~~~~~~~~~ + +`CCS tools `_ is a set of +tools for working with CCS static data files. CCS tools includes a +definition of the human-readable CCS static data YAML format and includes a +program to convert it to a binary. + Register definition generator ----------------------------- diff --git a/Documentation/driver-api/media/v4l2-core.rst b/Documentation/driver-api/media/v4l2-core.rst index 239045ecc8..58cba831ad 100644 --- a/Documentation/driver-api/media/v4l2-core.rst +++ b/Documentation/driver-api/media/v4l2-core.rst @@ -13,7 +13,6 @@ Video4Linux devices v4l2-subdev v4l2-event v4l2-controls - v4l2-videobuf v4l2-videobuf2 v4l2-dv-timings v4l2-flash-led-class diff --git a/Documentation/driver-api/media/v4l2-dev.rst b/Documentation/driver-api/media/v4l2-dev.rst index 99e3b5fa74..d5cb19b21a 100644 --- a/Documentation/driver-api/media/v4l2-dev.rst +++ b/Documentation/driver-api/media/v4l2-dev.rst @@ -157,14 +157,6 @@ changing the e.g. exposure of the webcam. Of course, you can always do all the locking yourself by leaving both lock pointers at ``NULL``. -If you use the old :ref:`videobuf framework ` then you must -pass the :c:type:`video_device`->lock to the videobuf queue initialize -function: if videobuf has to wait for a frame to arrive, then it will -temporarily unlock the lock and relock it afterwards. If your driver also -waits in the code, then you should do the same to allow other -processes to access the device node while the first process is waiting for -something. - In the case of :ref:`videobuf2 ` you will need to implement the ``wait_prepare()`` and ``wait_finish()`` callbacks to unlock/lock if applicable. If you use the ``queue->lock`` pointer, then you can use the helper functions diff --git a/Documentation/driver-api/media/v4l2-videobuf.rst b/Documentation/driver-api/media/v4l2-videobuf.rst deleted file mode 100644 index 4b1d84eefe..0000000000 --- a/Documentation/driver-api/media/v4l2-videobuf.rst +++ /dev/null @@ -1,403 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -.. _vb_framework: - -Videobuf Framework -================== - -Author: Jonathan Corbet - -Current as of 2.6.33 - -.. note:: - - The videobuf framework was deprecated in favor of videobuf2. Shouldn't - be used on new drivers. - -Introduction ------------- - -The videobuf layer functions as a sort of glue layer between a V4L2 driver -and user space. It handles the allocation and management of buffers for -the storage of video frames. There is a set of functions which can be used -to implement many of the standard POSIX I/O system calls, including read(), -poll(), and, happily, mmap(). Another set of functions can be used to -implement the bulk of the V4L2 ioctl() calls related to streaming I/O, -including buffer allocation, queueing and dequeueing, and streaming -control. Using videobuf imposes a few design decisions on the driver -author, but the payback comes in the form of reduced code in the driver and -a consistent implementation of the V4L2 user-space API. - -Buffer types ------------- - -Not all video devices use the same kind of buffers. In fact, there are (at -least) three common variations: - - - Buffers which are scattered in both the physical and (kernel) virtual - address spaces. (Almost) all user-space buffers are like this, but it - makes great sense to allocate kernel-space buffers this way as well when - it is possible. Unfortunately, it is not always possible; working with - this kind of buffer normally requires hardware which can do - scatter/gather DMA operations. - - - Buffers which are physically scattered, but which are virtually - contiguous; buffers allocated with vmalloc(), in other words. These - buffers are just as hard to use for DMA operations, but they can be - useful in situations where DMA is not available but virtually-contiguous - buffers are convenient. - - - Buffers which are physically contiguous. Allocation of this kind of - buffer can be unreliable on fragmented systems, but simpler DMA - controllers cannot deal with anything else. - -Videobuf can work with all three types of buffers, but the driver author -must pick one at the outset and design the driver around that decision. - -[It's worth noting that there's a fourth kind of buffer: "overlay" buffers -which are located within the system's video memory. The overlay -functionality is considered to be deprecated for most use, but it still -shows up occasionally in system-on-chip drivers where the performance -benefits merit the use of this technique. Overlay buffers can be handled -as a form of scattered buffer, but there are very few implementations in -the kernel and a description of this technique is currently beyond the -scope of this document.] - -Data structures, callbacks, and initialization ----------------------------------------------- - -Depending on which type of buffers are being used, the driver should -include one of the following files: - -.. code-block:: none - - /* Physically scattered */ - /* vmalloc() buffers */ - /* Physically contiguous */ - -The driver's data structure describing a V4L2 device should include a -struct videobuf_queue instance for the management of the buffer queue, -along with a list_head for the queue of available buffers. There will also -need to be an interrupt-safe spinlock which is used to protect (at least) -the queue. - -The next step is to write four simple callbacks to help videobuf deal with -the management of buffers: - -.. code-block:: none - - struct videobuf_queue_ops { - int (*buf_setup)(struct videobuf_queue *q, - unsigned int *count, unsigned int *size); - int (*buf_prepare)(struct videobuf_queue *q, - struct videobuf_buffer *vb, - enum v4l2_field field); - void (*buf_queue)(struct videobuf_queue *q, - struct videobuf_buffer *vb); - void (*buf_release)(struct videobuf_queue *q, - struct videobuf_buffer *vb); - }; - -buf_setup() is called early in the I/O process, when streaming is being -initiated; its purpose is to tell videobuf about the I/O stream. The count -parameter will be a suggested number of buffers to use; the driver should -check it for rationality and adjust it if need be. As a practical rule, a -minimum of two buffers are needed for proper streaming, and there is -usually a maximum (which cannot exceed 32) which makes sense for each -device. The size parameter should be set to the expected (maximum) size -for each frame of data. - -Each buffer (in the form of a struct videobuf_buffer pointer) will be -passed to buf_prepare(), which should set the buffer's size, width, height, -and field fields properly. If the buffer's state field is -VIDEOBUF_NEEDS_INIT, the driver should pass it to: - -.. code-block:: none - - int videobuf_iolock(struct videobuf_queue* q, struct videobuf_buffer *vb, - struct v4l2_framebuffer *fbuf); - -Among other things, this call will usually allocate memory for the buffer. -Finally, the buf_prepare() function should set the buffer's state to -VIDEOBUF_PREPARED. - -When a buffer is queued for I/O, it is passed to buf_queue(), which should -put it onto the driver's list of available buffers and set its state to -VIDEOBUF_QUEUED. Note that this function is called with the queue spinlock -held; if it tries to acquire it as well things will come to a screeching -halt. Yes, this is the voice of experience. Note also that videobuf may -wait on the first buffer in the queue; placing other buffers in front of it -could again gum up the works. So use list_add_tail() to enqueue buffers. - -Finally, buf_release() is called when a buffer is no longer intended to be -used. The driver should ensure that there is no I/O active on the buffer, -then pass it to the appropriate free routine(s): - -.. code-block:: none - - /* Scatter/gather drivers */ - int videobuf_dma_unmap(struct videobuf_queue *q, - struct videobuf_dmabuf *dma); - int videobuf_dma_free(struct videobuf_dmabuf *dma); - - /* vmalloc drivers */ - void videobuf_vmalloc_free (struct videobuf_buffer *buf); - - /* Contiguous drivers */ - void videobuf_dma_contig_free(struct videobuf_queue *q, - struct videobuf_buffer *buf); - -One way to ensure that a buffer is no longer under I/O is to pass it to: - -.. code-block:: none - - int videobuf_waiton(struct videobuf_buffer *vb, int non_blocking, int intr); - -Here, vb is the buffer, non_blocking indicates whether non-blocking I/O -should be used (it should be zero in the buf_release() case), and intr -controls whether an interruptible wait is used. - -File operations ---------------- - -At this point, much of the work is done; much of the rest is slipping -videobuf calls into the implementation of the other driver callbacks. The -first step is in the open() function, which must initialize the -videobuf queue. The function to use depends on the type of buffer used: - -.. code-block:: none - - void videobuf_queue_sg_init(struct videobuf_queue *q, - struct videobuf_queue_ops *ops, - struct device *dev, - spinlock_t *irqlock, - enum v4l2_buf_type type, - enum v4l2_field field, - unsigned int msize, - void *priv); - - void videobuf_queue_vmalloc_init(struct videobuf_queue *q, - struct videobuf_queue_ops *ops, - struct device *dev, - spinlock_t *irqlock, - enum v4l2_buf_type type, - enum v4l2_field field, - unsigned int msize, - void *priv); - - void videobuf_queue_dma_contig_init(struct videobuf_queue *q, - struct videobuf_queue_ops *ops, - struct device *dev, - spinlock_t *irqlock, - enum v4l2_buf_type type, - enum v4l2_field field, - unsigned int msize, - void *priv); - -In each case, the parameters are the same: q is the queue structure for the -device, ops is the set of callbacks as described above, dev is the device -structure for this video device, irqlock is an interrupt-safe spinlock to -protect access to the data structures, type is the buffer type used by the -device (cameras will use V4L2_BUF_TYPE_VIDEO_CAPTURE, for example), field -describes which field is being captured (often V4L2_FIELD_NONE for -progressive devices), msize is the size of any containing structure used -around struct videobuf_buffer, and priv is a private data pointer which -shows up in the priv_data field of struct videobuf_queue. Note that these -are void functions which, evidently, are immune to failure. - -V4L2 capture drivers can be written to support either of two APIs: the -read() system call and the rather more complicated streaming mechanism. As -a general rule, it is necessary to support both to ensure that all -applications have a chance of working with the device. Videobuf makes it -easy to do that with the same code. To implement read(), the driver need -only make a call to one of: - -.. code-block:: none - - ssize_t videobuf_read_one(struct videobuf_queue *q, - char __user *data, size_t count, - loff_t *ppos, int nonblocking); - - ssize_t videobuf_read_stream(struct videobuf_queue *q, - char __user *data, size_t count, - loff_t *ppos, int vbihack, int nonblocking); - -Either one of these functions will read frame data into data, returning the -amount actually read; the difference is that videobuf_read_one() will only -read a single frame, while videobuf_read_stream() will read multiple frames -if they are needed to satisfy the count requested by the application. A -typical driver read() implementation will start the capture engine, call -one of the above functions, then stop the engine before returning (though a -smarter implementation might leave the engine running for a little while in -anticipation of another read() call happening in the near future). - -The poll() function can usually be implemented with a direct call to: - -.. code-block:: none - - unsigned int videobuf_poll_stream(struct file *file, - struct videobuf_queue *q, - poll_table *wait); - -Note that the actual wait queue eventually used will be the one associated -with the first available buffer. - -When streaming I/O is done to kernel-space buffers, the driver must support -the mmap() system call to enable user space to access the data. In many -V4L2 drivers, the often-complex mmap() implementation simplifies to a -single call to: - -.. code-block:: none - - int videobuf_mmap_mapper(struct videobuf_queue *q, - struct vm_area_struct *vma); - -Everything else is handled by the videobuf code. - -The release() function requires two separate videobuf calls: - -.. code-block:: none - - void videobuf_stop(struct videobuf_queue *q); - int videobuf_mmap_free(struct videobuf_queue *q); - -The call to videobuf_stop() terminates any I/O in progress - though it is -still up to the driver to stop the capture engine. The call to -videobuf_mmap_free() will ensure that all buffers have been unmapped; if -so, they will all be passed to the buf_release() callback. If buffers -remain mapped, videobuf_mmap_free() returns an error code instead. The -purpose is clearly to cause the closing of the file descriptor to fail if -buffers are still mapped, but every driver in the 2.6.32 kernel cheerfully -ignores its return value. - -ioctl() operations ------------------- - -The V4L2 API includes a very long list of driver callbacks to respond to -the many ioctl() commands made available to user space. A number of these -- those associated with streaming I/O - turn almost directly into videobuf -calls. The relevant helper functions are: - -.. code-block:: none - - int videobuf_reqbufs(struct videobuf_queue *q, - struct v4l2_requestbuffers *req); - int videobuf_querybuf(struct videobuf_queue *q, struct v4l2_buffer *b); - int videobuf_qbuf(struct videobuf_queue *q, struct v4l2_buffer *b); - int videobuf_dqbuf(struct videobuf_queue *q, struct v4l2_buffer *b, - int nonblocking); - int videobuf_streamon(struct videobuf_queue *q); - int videobuf_streamoff(struct videobuf_queue *q); - -So, for example, a VIDIOC_REQBUFS call turns into a call to the driver's -vidioc_reqbufs() callback which, in turn, usually only needs to locate the -proper struct videobuf_queue pointer and pass it to videobuf_reqbufs(). -These support functions can replace a great deal of buffer management -boilerplate in a lot of V4L2 drivers. - -The vidioc_streamon() and vidioc_streamoff() functions will be a bit more -complex, of course, since they will also need to deal with starting and -stopping the capture engine. - -Buffer allocation ------------------ - -Thus far, we have talked about buffers, but have not looked at how they are -allocated. The scatter/gather case is the most complex on this front. For -allocation, the driver can leave buffer allocation entirely up to the -videobuf layer; in this case, buffers will be allocated as anonymous -user-space pages and will be very scattered indeed. If the application is -using user-space buffers, no allocation is needed; the videobuf layer will -take care of calling get_user_pages() and filling in the scatterlist array. - -If the driver needs to do its own memory allocation, it should be done in -the vidioc_reqbufs() function, *after* calling videobuf_reqbufs(). The -first step is a call to: - -.. code-block:: none - - struct videobuf_dmabuf *videobuf_to_dma(struct videobuf_buffer *buf); - -The returned videobuf_dmabuf structure (defined in -) includes a couple of relevant fields: - -.. code-block:: none - - struct scatterlist *sglist; - int sglen; - -The driver must allocate an appropriately-sized scatterlist array and -populate it with pointers to the pieces of the allocated buffer; sglen -should be set to the length of the array. - -Drivers using the vmalloc() method need not (and cannot) concern themselves -with buffer allocation at all; videobuf will handle those details. The -same is normally true of contiguous-DMA drivers as well; videobuf will -allocate the buffers (with dma_alloc_coherent()) when it sees fit. That -means that these drivers may be trying to do high-order allocations at any -time, an operation which is not always guaranteed to work. Some drivers -play tricks by allocating DMA space at system boot time; videobuf does not -currently play well with those drivers. - -As of 2.6.31, contiguous-DMA drivers can work with a user-supplied buffer, -as long as that buffer is physically contiguous. Normal user-space -allocations will not meet that criterion, but buffers obtained from other -kernel drivers, or those contained within huge pages, will work with these -drivers. - -Filling the buffers -------------------- - -The final part of a videobuf implementation has no direct callback - it's -the portion of the code which actually puts frame data into the buffers, -usually in response to interrupts from the device. For all types of -drivers, this process works approximately as follows: - - - Obtain the next available buffer and make sure that somebody is actually - waiting for it. - - - Get a pointer to the memory and put video data there. - - - Mark the buffer as done and wake up the process waiting for it. - -Step (1) above is done by looking at the driver-managed list_head structure -- the one which is filled in the buf_queue() callback. Because starting -the engine and enqueueing buffers are done in separate steps, it's possible -for the engine to be running without any buffers available - in the -vmalloc() case especially. So the driver should be prepared for the list -to be empty. It is equally possible that nobody is yet interested in the -buffer; the driver should not remove it from the list or fill it until a -process is waiting on it. That test can be done by examining the buffer's -done field (a wait_queue_head_t structure) with waitqueue_active(). - -A buffer's state should be set to VIDEOBUF_ACTIVE before being mapped for -DMA; that ensures that the videobuf layer will not try to do anything with -it while the device is transferring data. - -For scatter/gather drivers, the needed memory pointers will be found in the -scatterlist structure described above. Drivers using the vmalloc() method -can get a memory pointer with: - -.. code-block:: none - - void *videobuf_to_vmalloc(struct videobuf_buffer *buf); - -For contiguous DMA drivers, the function to use is: - -.. code-block:: none - - dma_addr_t videobuf_to_dma_contig(struct videobuf_buffer *buf); - -The contiguous DMA API goes out of its way to hide the kernel-space address -of the DMA buffer from drivers. - -The final step is to set the size field of the relevant videobuf_buffer -structure to the actual size of the captured image, set state to -VIDEOBUF_DONE, then call wake_up() on the done queue. At this point, the -buffer is owned by the videobuf layer and the driver should not touch it -again. - -Developers who are interested in more information can go into the relevant -header files; there are a few low-level functions declared there which have -not been talked about here. Note also that all of these calls are exported -GPL-only, so they will not be available to non-GPL kernel modules. diff --git a/Documentation/driver-api/pps.rst b/Documentation/driver-api/pps.rst index 2d6b99766e..78dded03e5 100644 --- a/Documentation/driver-api/pps.rst +++ b/Documentation/driver-api/pps.rst @@ -200,11 +200,17 @@ Generators Sometimes one needs to be able not only to catch PPS signals but to produce them also. For example, running a distributed simulation, which requires -computers' clock to be synchronized very tightly. One way to do this is to -invent some complicated hardware solutions but it may be neither necessary -nor affordable. The cheap way is to load a PPS generator on one of the -computers (master) and PPS clients on others (slaves), and use very simple -cables to deliver signals using parallel ports, for example. +computers' clock to be synchronized very tightly. + + +Parallel port generator +------------------------ + +One way to do this is to invent some complicated hardware solutions but it +may be neither necessary nor affordable. The cheap way is to load a PPS +generator on one of the computers (master) and PPS clients on others +(slaves), and use very simple cables to deliver signals using parallel +ports, for example. Parallel port cable pinout:: diff --git a/Documentation/driver-api/pwm.rst b/Documentation/driver-api/pwm.rst index 3fdc95f7a1..bb264490a8 100644 --- a/Documentation/driver-api/pwm.rst +++ b/Documentation/driver-api/pwm.rst @@ -111,13 +111,13 @@ channel that was exported. The following properties will then be available: duty_cycle The active time of the PWM signal (read/write). - Value is in nanoseconds and must be less than the period. + Value is in nanoseconds and must be less than or equal to the period. polarity Changes the polarity of the PWM signal (read/write). Writes to this property only work if the PWM chip supports changing - the polarity. The polarity can only be changed if the PWM is not - enabled. Value is the string "normal" or "inversed". + the polarity. + Value is the string "normal" or "inversed". enable Enable/disable the PWM signal (read/write). diff --git a/Documentation/driver-api/thermal/intel_dptf.rst b/Documentation/driver-api/thermal/intel_dptf.rst index 9ab4316322..8fb8c5b2d6 100644 --- a/Documentation/driver-api/thermal/intel_dptf.rst +++ b/Documentation/driver-api/thermal/intel_dptf.rst @@ -164,6 +164,16 @@ ABI. ``power_limit_1_tmax_us`` (RO) Maximum powercap sysfs constraint_1_time_window_us for Intel RAPL +``power_floor_status`` (RO) + When set to 1, the power floor of the system in the current + configuration has been reached. It needs to be reconfigured to allow + power to be reduced any further. + +``power_floor_enable`` (RW) + When set to 1, enable reading and notification of the power floor + status. Notifications are triggered for the power_floor_status + attribute value changes. + :file:`/sys/bus/pci/devices/0000\:00\:04.0/` ``tcc_offset_degree_celsius`` (RW) @@ -315,3 +325,57 @@ DPTF Fan Control ---------------------------------------- Refer to Documentation/admin-guide/acpi/fan_performance_states.rst + +Workload Type Hints +---------------------------------------- + +The firmware in Meteor Lake processor generation is capable of identifying +workload type and passing hints regarding it to the OS. A special sysfs +interface is provided to allow user space to obtain workload type hints from +the firmware and control the rate at which they are provided. + +User space can poll attribute "workload_type_index" for the current hint or +can receive a notification whenever the value of this attribute is updated. + +file:`/sys/bus/pci/devices/0000:00:04.0/workload_hint/` +Segment 0, bus 0, device 4, function 0 is reserved for the processor thermal +device on all Intel client processors. So, the above path doesn't change +based on the processor generation. + +``workload_hint_enable`` (RW) + Enable firmware to send workload type hints to user space. + +``notification_delay_ms`` (RW) + Minimum delay in milliseconds before firmware will notify OS. This is + for the rate control of notifications. This delay is between changing + the workload type prediction in the firmware and notifying the OS about + the change. The default delay is 1024 ms. The delay of 0 is invalid. + The delay is rounded up to the nearest power of 2 to simplify firmware + programming of the delay value. The read of notification_delay_ms + attribute shows the effective value used. + +``workload_type_index`` (RO) + Predicted workload type index. User space can get notification of + change via existing sysfs attribute change notification mechanism. + + The supported index values and their meaning for the Meteor Lake + processor generation are as follows: + + 0 - Idle: System performs no tasks, power and idle residency are + consistently low for long periods of time. + + 1 – Battery Life: Power is relatively low, but the processor may + still be actively performing a task, such as video playback for + a long period of time. + + 2 – Sustained: Power level that is relatively high for a long period + of time, with very few to no periods of idleness, which will + eventually exhaust RAPL Power Limit 1 and 2. + + 3 – Bursty: Consumes a relatively constant average amount of power, but + periods of relative idleness are interrupted by bursts of + activity. The bursts are relatively short and the periods of + relative idleness between them typically prevent RAPL Power + Limit 1 from being exhausted. + + 4 – Unknown: Can't classify. diff --git a/Documentation/driver-api/tty/index.rst b/Documentation/driver-api/tty/index.rst index 2d32606a42..b490da11f2 100644 --- a/Documentation/driver-api/tty/index.rst +++ b/Documentation/driver-api/tty/index.rst @@ -36,6 +36,7 @@ In-detail description of the named TTY structures is in separate documents: tty_struct tty_ldisc tty_buffer + tty_ioctl tty_internals Writing TTY Driver diff --git a/Documentation/driver-api/tty/tty_ioctl.rst b/Documentation/driver-api/tty/tty_ioctl.rst new file mode 100644 index 0000000000..3ff1ac5e07 --- /dev/null +++ b/Documentation/driver-api/tty/tty_ioctl.rst @@ -0,0 +1,10 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================= +TTY IOCTL Helpers +================= + +.. kernel-doc:: drivers/tty/tty_ioctl.c + +.. kernel-doc:: include/linux/tty.h + :identifiers: tty_get_baud_rate diff --git a/Documentation/driver-api/usb/dma.rst b/Documentation/driver-api/usb/dma.rst index d32c27e11b..02f6825ff8 100644 --- a/Documentation/driver-api/usb/dma.rst +++ b/Documentation/driver-api/usb/dma.rst @@ -93,44 +93,18 @@ DMA address space of the device. However, most buffers passed to your driver can safely be used with such DMA mapping. (See the first section of Documentation/core-api/dma-api-howto.rst, titled "What memory is DMA-able?") -- When you're using scatterlists, you can map everything at once. On some - systems, this kicks in an IOMMU and turns the scatterlists into single - DMA transactions:: +- When you have the scatterlists which have been mapped for the USB controller, + you could use the new ``usb_sg_*()`` calls, which would turn scatterlist + into URBs:: - int usb_buffer_map_sg (struct usb_device *dev, unsigned pipe, - struct scatterlist *sg, int nents); + int usb_sg_init(struct usb_sg_request *io, struct usb_device *dev, + unsigned pipe, unsigned period, struct scatterlist *sg, + int nents, size_t length, gfp_t mem_flags); - void usb_buffer_dmasync_sg (struct usb_device *dev, unsigned pipe, - struct scatterlist *sg, int n_hw_ents); + void usb_sg_wait(struct usb_sg_request *io); - void usb_buffer_unmap_sg (struct usb_device *dev, unsigned pipe, - struct scatterlist *sg, int n_hw_ents); + void usb_sg_cancel(struct usb_sg_request *io); - It's probably easier to use the new ``usb_sg_*()`` calls, which do the DMA - mapping and apply other tweaks to make scatterlist i/o be fast. - -- Some drivers may prefer to work with the model that they're mapping large - buffers, synchronizing their safe re-use. (If there's no re-use, then let - usbcore do the map/unmap.) Large periodic transfers make good examples - here, since it's cheaper to just synchronize the buffer than to unmap it - each time an urb completes and then re-map it on during resubmission. - - These calls all work with initialized urbs: ``urb->dev``, ``urb->pipe``, - ``urb->transfer_buffer``, and ``urb->transfer_buffer_length`` must all be - valid when these calls are used (``urb->setup_packet`` must be valid too - if urb is a control request):: - - struct urb *usb_buffer_map (struct urb *urb); - - void usb_buffer_dmasync (struct urb *urb); - - void usb_buffer_unmap (struct urb *urb); - - The calls manage ``urb->transfer_dma`` for you, and set - ``URB_NO_TRANSFER_DMA_MAP`` so that usbcore won't map or unmap the buffer. - They cannot be used for setup_packet buffers in control requests. - -Note that several of those interfaces are currently commented out, since -they don't have current users. See the source code. Other than the dmasync -calls (where the underlying DMA primitives have changed), most of them can -easily be commented back in if you want to use them. + When the USB controller doesn't support DMA, the ``usb_sg_init()`` would try + to submit URBs in PIO way as long as the page in scatterlists is not in the + Highmem, which could be very rare in modern architectures. diff --git a/Documentation/features/core/cBPF-JIT/arch-support.txt b/Documentation/features/core/cBPF-JIT/arch-support.txt index 0a1f5bb7ee..937840080d 100644 --- a/Documentation/features/core/cBPF-JIT/arch-support.txt +++ b/Documentation/features/core/cBPF-JIT/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | TODO | | csky: | TODO | | hexagon: | TODO | - | ia64: | TODO | | loongarch: | TODO | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/core/eBPF-JIT/arch-support.txt b/Documentation/features/core/eBPF-JIT/arch-support.txt index 6c0f3d759e..7434b43c2f 100644 --- a/Documentation/features/core/eBPF-JIT/arch-support.txt +++ b/Documentation/features/core/eBPF-JIT/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | TODO | | hexagon: | TODO | - | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/core/generic-idle-thread/arch-support.txt b/Documentation/features/core/generic-idle-thread/arch-support.txt index 0b94099cf6..0735cb5367 100644 --- a/Documentation/features/core/generic-idle-thread/arch-support.txt +++ b/Documentation/features/core/generic-idle-thread/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | ok | | hexagon: | ok | - | ia64: | ok | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/core/jump-labels/arch-support.txt b/Documentation/features/core/jump-labels/arch-support.txt index 94d9dece58..ccada81556 100644 --- a/Documentation/features/core/jump-labels/arch-support.txt +++ b/Documentation/features/core/jump-labels/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | - | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/core/thread-info-in-task/arch-support.txt b/Documentation/features/core/thread-info-in-task/arch-support.txt index 97c65ed2ac..2afeb6bf6e 100644 --- a/Documentation/features/core/thread-info-in-task/arch-support.txt +++ b/Documentation/features/core/thread-info-in-task/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | TODO | | hexagon: | TODO | - | ia64: | TODO | | loongarch: | TODO | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/core/tracehook/arch-support.txt b/Documentation/features/core/tracehook/arch-support.txt index aed5679da6..a72330e255 100644 --- a/Documentation/features/core/tracehook/arch-support.txt +++ b/Documentation/features/core/tracehook/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | ok | | hexagon: | ok | - | ia64: | ok | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/debug/KASAN/arch-support.txt b/Documentation/features/debug/KASAN/arch-support.txt index c4581c2edb..39c6e78c0c 100644 --- a/Documentation/features/debug/KASAN/arch-support.txt +++ b/Documentation/features/debug/KASAN/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | TODO | | hexagon: | TODO | - | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/debug/debug-vm-pgtable/arch-support.txt b/Documentation/features/debug/debug-vm-pgtable/arch-support.txt index 9ec5d13f49..bbf029f095 100644 --- a/Documentation/features/debug/debug-vm-pgtable/arch-support.txt +++ b/Documentation/features/debug/debug-vm-pgtable/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | TODO | | hexagon: | TODO | - | ia64: | TODO | | loongarch: | TODO | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/debug/gcov-profile-all/arch-support.txt b/Documentation/features/debug/gcov-profile-all/arch-support.txt index dc4014f7e1..63494bddc2 100644 --- a/Documentation/features/debug/gcov-profile-all/arch-support.txt +++ b/Documentation/features/debug/gcov-profile-all/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | - | ia64: | TODO | | loongarch: | TODO | | m68k: | TODO | | microblaze: | ok | diff --git a/Documentation/features/debug/kcov/arch-support.txt b/Documentation/features/debug/kcov/arch-support.txt index de84cefbcd..4449e1f55c 100644 --- a/Documentation/features/debug/kcov/arch-support.txt +++ b/Documentation/features/debug/kcov/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | TODO | | hexagon: | TODO | - | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/debug/kgdb/arch-support.txt b/Documentation/features/debug/kgdb/arch-support.txt index 5e91ec78c8..f287f16ce0 100644 --- a/Documentation/features/debug/kgdb/arch-support.txt +++ b/Documentation/features/debug/kgdb/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | TODO | | hexagon: | ok | - | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | ok | diff --git a/Documentation/features/debug/kmemleak/arch-support.txt b/Documentation/features/debug/kmemleak/arch-support.txt index 4e205ef703..f45149cfa3 100644 --- a/Documentation/features/debug/kmemleak/arch-support.txt +++ b/Documentation/features/debug/kmemleak/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | - | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | ok | diff --git a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt index 38a0a54b79..02febc8835 100644 --- a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt +++ b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | TODO | | csky: | ok | | hexagon: | TODO | - | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/debug/kprobes/arch-support.txt b/Documentation/features/debug/kprobes/arch-support.txt index aad83b5758..1ea27aedd0 100644 --- a/Documentation/features/debug/kprobes/arch-support.txt +++ b/Documentation/features/debug/kprobes/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | - | ia64: | ok | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/debug/kretprobes/arch-support.txt b/Documentation/features/debug/kretprobes/arch-support.txt index 61380010a4..022be42e64 100644 --- a/Documentation/features/debug/kretprobes/arch-support.txt +++ b/Documentation/features/debug/kretprobes/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | - | ia64: | ok | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/debug/optprobes/arch-support.txt b/Documentation/features/debug/optprobes/arch-support.txt index 83a4639a5c..92f5d0f444 100644 --- a/Documentation/features/debug/optprobes/arch-support.txt +++ b/Documentation/features/debug/optprobes/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | TODO | | csky: | TODO | | hexagon: | TODO | - | ia64: | TODO | | loongarch: | TODO | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/debug/stackprotector/arch-support.txt b/Documentation/features/debug/stackprotector/arch-support.txt index 4c64c5d596..de8f43f2e5 100644 --- a/Documentation/features/debug/stackprotector/arch-support.txt +++ b/Documentation/features/debug/stackprotector/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | - | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/debug/uprobes/arch-support.txt b/Documentation/features/debug/uprobes/arch-support.txt index 24c8423b0a..0c698003ce 100644 --- a/Documentation/features/debug/uprobes/arch-support.txt +++ b/Documentation/features/debug/uprobes/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | - | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/debug/user-ret-profiler/arch-support.txt b/Documentation/features/debug/user-ret-profiler/arch-support.txt index 059110a5fa..3e43176758 100644 --- a/Documentation/features/debug/user-ret-profiler/arch-support.txt +++ b/Documentation/features/debug/user-ret-profiler/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | TODO | | csky: | TODO | | hexagon: | TODO | - | ia64: | TODO | | loongarch: | TODO | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/io/dma-contiguous/arch-support.txt b/Documentation/features/io/dma-contiguous/arch-support.txt index bfe0921a38..3c6ce35d70 100644 --- a/Documentation/features/io/dma-contiguous/arch-support.txt +++ b/Documentation/features/io/dma-contiguous/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | - | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | ok | diff --git a/Documentation/features/locking/cmpxchg-local/arch-support.txt b/Documentation/features/locking/cmpxchg-local/arch-support.txt index 68329e96df..2c3a4b91f1 100644 --- a/Documentation/features/locking/cmpxchg-local/arch-support.txt +++ b/Documentation/features/locking/cmpxchg-local/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | TODO | | hexagon: | TODO | - | ia64: | TODO | | loongarch: | TODO | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/locking/lockdep/arch-support.txt b/Documentation/features/locking/lockdep/arch-support.txt index a36e231670..b6b00469f7 100644 --- a/Documentation/features/locking/lockdep/arch-support.txt +++ b/Documentation/features/locking/lockdep/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | ok | | hexagon: | ok | - | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | ok | diff --git a/Documentation/features/locking/queued-rwlocks/arch-support.txt b/Documentation/features/locking/queued-rwlocks/arch-support.txt index 5deb845477..b286a5fff2 100644 --- a/Documentation/features/locking/queued-rwlocks/arch-support.txt +++ b/Documentation/features/locking/queued-rwlocks/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | - | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt index 2d3961bfef..22f2990392 100644 --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | - | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/perf/kprobes-event/arch-support.txt b/Documentation/features/perf/kprobes-event/arch-support.txt index 641a7d2ff2..713a69fcd6 100644 --- a/Documentation/features/perf/kprobes-event/arch-support.txt +++ b/Documentation/features/perf/kprobes-event/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | ok | | hexagon: | ok | - | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/perf/perf-regs/arch-support.txt b/Documentation/features/perf/perf-regs/arch-support.txt index 33866eb242..09431518b0 100644 --- a/Documentation/features/perf/perf-regs/arch-support.txt +++ b/Documentation/features/perf/perf-regs/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | - | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/perf/perf-stackdump/arch-support.txt b/Documentation/features/perf/perf-stackdump/arch-support.txt index c8e4c7c650..f9db4dd8ef 100644 --- a/Documentation/features/perf/perf-stackdump/arch-support.txt +++ b/Documentation/features/perf/perf-stackdump/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | - | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/sched/membarrier-sync-core/arch-support.txt b/Documentation/features/sched/membarrier-sync-core/arch-support.txt index 23260ca449..d96b778b87 100644 --- a/Documentation/features/sched/membarrier-sync-core/arch-support.txt +++ b/Documentation/features/sched/membarrier-sync-core/arch-support.txt @@ -35,7 +35,6 @@ | arm64: | ok | | csky: | TODO | | hexagon: | TODO | - | ia64: | TODO | | loongarch: | TODO | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/sched/numa-balancing/arch-support.txt b/Documentation/features/sched/numa-balancing/arch-support.txt index 532cc67cdf..984601c7c4 100644 --- a/Documentation/features/sched/numa-balancing/arch-support.txt +++ b/Documentation/features/sched/numa-balancing/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | .. | | hexagon: | .. | - | ia64: | TODO | | loongarch: | ok | | m68k: | .. | | microblaze: | .. | diff --git a/Documentation/features/seccomp/seccomp-filter/arch-support.txt b/Documentation/features/seccomp/seccomp-filter/arch-support.txt index 3a7237b989..13feb67964 100644 --- a/Documentation/features/seccomp/seccomp-filter/arch-support.txt +++ b/Documentation/features/seccomp/seccomp-filter/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | - | ia64: | TODO | | loongarch: | ok | | m68k: | ok | | microblaze: | TODO | diff --git a/Documentation/features/time/arch-tick-broadcast/arch-support.txt b/Documentation/features/time/arch-tick-broadcast/arch-support.txt index 9bffac8001..ccba965e8d 100644 --- a/Documentation/features/time/arch-tick-broadcast/arch-support.txt +++ b/Documentation/features/time/arch-tick-broadcast/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | TODO | | hexagon: | TODO | - | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/time/clockevents/arch-support.txt b/Documentation/features/time/clockevents/arch-support.txt index 625160048f..4d4bfac529 100644 --- a/Documentation/features/time/clockevents/arch-support.txt +++ b/Documentation/features/time/clockevents/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | ok | | hexagon: | ok | - | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | ok | diff --git a/Documentation/features/time/context-tracking/arch-support.txt b/Documentation/features/time/context-tracking/arch-support.txt index 72bc5bad03..891be9f619 100644 --- a/Documentation/features/time/context-tracking/arch-support.txt +++ b/Documentation/features/time/context-tracking/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | - | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/time/irq-time-acct/arch-support.txt b/Documentation/features/time/irq-time-acct/arch-support.txt index ceb036610d..3d10075a8a 100644 --- a/Documentation/features/time/irq-time-acct/arch-support.txt +++ b/Documentation/features/time/irq-time-acct/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | TODO | | hexagon: | TODO | - | ia64: | .. | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/time/virt-cpuacct/arch-support.txt b/Documentation/features/time/virt-cpuacct/arch-support.txt index c063dffd52..21f11d47ef 100644 --- a/Documentation/features/time/virt-cpuacct/arch-support.txt +++ b/Documentation/features/time/virt-cpuacct/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | - | ia64: | ok | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/vm/ELF-ASLR/arch-support.txt b/Documentation/features/vm/ELF-ASLR/arch-support.txt index 47909c3dd6..57406c0d53 100644 --- a/Documentation/features/vm/ELF-ASLR/arch-support.txt +++ b/Documentation/features/vm/ELF-ASLR/arch-support.txt @@ -13,7 +13,6 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | - | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/vm/PG_uncached/arch-support.txt b/Documentation/features/vm/PG_uncached/arch-support.txt index 5acd64b97d..5a7508b8c9 100644 --- a/Documentation/features/vm/PG_uncached/arch-support.txt +++ b/Documentation/features/vm/PG_uncached/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | TODO | | csky: | TODO | | hexagon: | TODO | - | ia64: | ok | | loongarch: | TODO | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/vm/THP/arch-support.txt b/Documentation/features/vm/THP/arch-support.txt index 9dd7d75d04..b4a5ce1694 100644 --- a/Documentation/features/vm/THP/arch-support.txt +++ b/Documentation/features/vm/THP/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | .. | | hexagon: | .. | - | ia64: | TODO | | loongarch: | ok | | m68k: | .. | | microblaze: | .. | diff --git a/Documentation/features/vm/TLB/arch-support.txt b/Documentation/features/vm/TLB/arch-support.txt index 76208db88f..8fd22073a8 100644 --- a/Documentation/features/vm/TLB/arch-support.txt +++ b/Documentation/features/vm/TLB/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | TODO | | hexagon: | TODO | - | ia64: | TODO | | loongarch: | TODO | | m68k: | .. | | microblaze: | .. | diff --git a/Documentation/features/vm/huge-vmap/arch-support.txt b/Documentation/features/vm/huge-vmap/arch-support.txt index 34647d9bdc..2d6de7b045 100644 --- a/Documentation/features/vm/huge-vmap/arch-support.txt +++ b/Documentation/features/vm/huge-vmap/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | TODO | | hexagon: | TODO | - | ia64: | TODO | | loongarch: | TODO | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/vm/ioremap_prot/arch-support.txt b/Documentation/features/vm/ioremap_prot/arch-support.txt index a24149e59d..1638c2cb17 100644 --- a/Documentation/features/vm/ioremap_prot/arch-support.txt +++ b/Documentation/features/vm/ioremap_prot/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | TODO | | hexagon: | TODO | - | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/vm/pte_special/arch-support.txt b/Documentation/features/vm/pte_special/arch-support.txt index d2b22a0694..3f777f8b67 100644 --- a/Documentation/features/vm/pte_special/arch-support.txt +++ b/Documentation/features/vm/pte_special/arch-support.txt @@ -12,7 +12,6 @@ | arm64: | ok | | csky: | TODO | | hexagon: | TODO | - | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst index f200d78744..cc4626d6ee 100644 --- a/Documentation/filesystems/erofs.rst +++ b/Documentation/filesystems/erofs.rst @@ -91,6 +91,10 @@ compatibility checking tool (fsck.erofs), and a debugging tool (dump.erofs): - git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git +For more information, please also refer to the documentation site: + +- https://erofs.docs.kernel.org + Bugs and patches are welcome, please kindly help us and send to the following linux-erofs mailing list: @@ -199,7 +203,7 @@ may not. All metadatas can be now observed in two different spaces (views): | | |__________________| 64 bytes - Xattrs, extents, data inline are followed by the corresponding inode with + Xattrs, extents, data inline are placed after the corresponding inode with proper alignment, and they could be optional for different data mappings. _currently_ total 5 data layouts are supported: diff --git a/Documentation/filesystems/files.rst b/Documentation/filesystems/files.rst index bcf8445991..9e38e4c221 100644 --- a/Documentation/filesystems/files.rst +++ b/Documentation/filesystems/files.rst @@ -62,7 +62,7 @@ the fdtable structure - be held. 4. To look up the file structure given an fd, a reader - must use either lookup_fd_rcu() or files_lookup_fd_rcu() APIs. These + must use either lookup_fdget_rcu() or files_lookup_fdget_rcu() APIs. These take care of barrier requirements due to lock-free lookup. An example:: @@ -70,43 +70,22 @@ the fdtable structure - struct file *file; rcu_read_lock(); - file = lookup_fd_rcu(fd); - if (file) { - ... - } - .... + file = lookup_fdget_rcu(fd); rcu_read_unlock(); - -5. Handling of the file structures is special. Since the look-up - of the fd (fget()/fget_light()) are lock-free, it is possible - that look-up may race with the last put() operation on the - file structure. This is avoided using atomic_long_inc_not_zero() - on ->f_count:: - - rcu_read_lock(); - file = files_lookup_fd_rcu(files, fd); if (file) { - if (atomic_long_inc_not_zero(&file->f_count)) - *fput_needed = 1; - else - /* Didn't get the reference, someone's freed */ - file = NULL; + ... + fput(file); } - rcu_read_unlock(); .... - return file; - - atomic_long_inc_not_zero() detects if refcounts is already zero or - goes to zero during increment. If it does, we fail - fget()/fget_light(). -6. Since both fdtable and file structures can be looked up +5. Since both fdtable and file structures can be looked up lock-free, they must be installed using rcu_assign_pointer() API. If they are looked up lock-free, rcu_dereference() must be used. However it is advisable to use files_fdtable() - and lookup_fd_rcu()/files_lookup_fd_rcu() which take care of these issues. + and lookup_fdget_rcu()/files_lookup_fdget_rcu() which take care of these + issues. -7. While updating, the fdtable pointer must be looked up while +6. While updating, the fdtable pointer must be looked up while holding files->file_lock. If ->file_lock is dropped, then another thread expand the files thereby creating a new fdtable and making the earlier fdtable pointer stale. @@ -126,3 +105,19 @@ the fdtable structure - Since locate_fd() can drop ->file_lock (and reacquire ->file_lock), the fdtable pointer (fdt) must be loaded after locate_fd(). +On newer kernels rcu based file lookup has been switched to rely on +SLAB_TYPESAFE_BY_RCU instead of call_rcu(). It isn't sufficient anymore +to just acquire a reference to the file in question under rcu using +atomic_long_inc_not_zero() since the file might have already been +recycled and someone else might have bumped the reference. In other +words, callers might see reference count bumps from newer users. For +this is reason it is necessary to verify that the pointer is the same +before and after the reference count increment. This pattern can be seen +in get_file_rcu() and __files_get_rcu(). + +In addition, it isn't possible to access or check fields in struct file +without first aqcuiring a reference on it under rcu lookup. Not doing +that was always very dodgy and it was only usable for non-pointer data +in struct file. With SLAB_TYPESAFE_BY_RCU it is necessary that callers +either first acquire a reference or they must hold the files_lock of the +fdtable. diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst index a624e92f26..1b84f818e5 100644 --- a/Documentation/filesystems/fscrypt.rst +++ b/Documentation/filesystems/fscrypt.rst @@ -261,9 +261,9 @@ DIRECT_KEY policies The Adiantum encryption mode (see `Encryption modes and usage`_) is suitable for both contents and filenames encryption, and it accepts -long IVs --- long enough to hold both an 8-byte logical block number -and a 16-byte per-file nonce. Also, the overhead of each Adiantum key -is greater than that of an AES-256-XTS key. +long IVs --- long enough to hold both an 8-byte data unit index and a +16-byte per-file nonce. Also, the overhead of each Adiantum key is +greater than that of an AES-256-XTS key. Therefore, to improve performance and save memory, for Adiantum a "direct key" configuration is supported. When the user has enabled @@ -300,8 +300,8 @@ IV_INO_LBLK_32 policies IV_INO_LBLK_32 policies work like IV_INO_LBLK_64, except that for IV_INO_LBLK_32, the inode number is hashed with SipHash-2-4 (where the -SipHash key is derived from the master key) and added to the file -logical block number mod 2^32 to produce a 32-bit IV. +SipHash key is derived from the master key) and added to the file data +unit index mod 2^32 to produce a 32-bit IV. This format is optimized for use with inline encryption hardware compliant with the eMMC v5.2 standard, which supports only 32 IV bits @@ -451,31 +451,62 @@ acceleration is recommended: Contents encryption ------------------- -For file contents, each filesystem block is encrypted independently. -Starting from Linux kernel 5.5, encryption of filesystems with block -size less than system's page size is supported. - -Each block's IV is set to the logical block number within the file as -a little endian number, except that: - -- With CBC mode encryption, ESSIV is also used. Specifically, each IV - is encrypted with AES-256 where the AES-256 key is the SHA-256 hash - of the file's data encryption key. - -- With `DIRECT_KEY policies`_, the file's nonce is appended to the IV. - Currently this is only allowed with the Adiantum encryption mode. - -- With `IV_INO_LBLK_64 policies`_, the logical block number is limited - to 32 bits and is placed in bits 0-31 of the IV. The inode number - (which is also limited to 32 bits) is placed in bits 32-63. - -- With `IV_INO_LBLK_32 policies`_, the logical block number is limited - to 32 bits and is placed in bits 0-31 of the IV. The inode number - is then hashed and added mod 2^32. - -Note that because file logical block numbers are included in the IVs, -filesystems must enforce that blocks are never shifted around within -encrypted files, e.g. via "collapse range" or "insert range". +For contents encryption, each file's contents is divided into "data +units". Each data unit is encrypted independently. The IV for each +data unit incorporates the zero-based index of the data unit within +the file. This ensures that each data unit within a file is encrypted +differently, which is essential to prevent leaking information. + +Note: the encryption depending on the offset into the file means that +operations like "collapse range" and "insert range" that rearrange the +extent mapping of files are not supported on encrypted files. + +There are two cases for the sizes of the data units: + +* Fixed-size data units. This is how all filesystems other than UBIFS + work. A file's data units are all the same size; the last data unit + is zero-padded if needed. By default, the data unit size is equal + to the filesystem block size. On some filesystems, users can select + a sub-block data unit size via the ``log2_data_unit_size`` field of + the encryption policy; see `FS_IOC_SET_ENCRYPTION_POLICY`_. + +* Variable-size data units. This is what UBIFS does. Each "UBIFS + data node" is treated as a crypto data unit. Each contains variable + length, possibly compressed data, zero-padded to the next 16-byte + boundary. Users cannot select a sub-block data unit size on UBIFS. + +In the case of compression + encryption, the compressed data is +encrypted. UBIFS compression works as described above. f2fs +compression works a bit differently; it compresses a number of +filesystem blocks into a smaller number of filesystem blocks. +Therefore a f2fs-compressed file still uses fixed-size data units, and +it is encrypted in a similar way to a file containing holes. + +As mentioned in `Key hierarchy`_, the default encryption setting uses +per-file keys. In this case, the IV for each data unit is simply the +index of the data unit in the file. However, users can select an +encryption setting that does not use per-file keys. For these, some +kind of file identifier is incorporated into the IVs as follows: + +- With `DIRECT_KEY policies`_, the data unit index is placed in bits + 0-63 of the IV, and the file's nonce is placed in bits 64-191. + +- With `IV_INO_LBLK_64 policies`_, the data unit index is placed in + bits 0-31 of the IV, and the file's inode number is placed in bits + 32-63. This setting is only allowed when data unit indices and + inode numbers fit in 32 bits. + +- With `IV_INO_LBLK_32 policies`_, the file's inode number is hashed + and added to the data unit index. The resulting value is truncated + to 32 bits and placed in bits 0-31 of the IV. This setting is only + allowed when data unit indices and inode numbers fit in 32 bits. + +The byte order of the IV is always little endian. + +If the user selects FSCRYPT_MODE_AES_128_CBC for the contents mode, an +ESSIV layer is automatically included. In this case, before the IV is +passed to AES-128-CBC, it is encrypted with AES-256 where the AES-256 +key is the SHA-256 hash of the file's contents encryption key. Filenames encryption -------------------- @@ -544,7 +575,8 @@ follows:: __u8 contents_encryption_mode; __u8 filenames_encryption_mode; __u8 flags; - __u8 __reserved[4]; + __u8 log2_data_unit_size; + __u8 __reserved[3]; __u8 master_key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]; }; @@ -586,6 +618,29 @@ This structure must be initialized as follows: The DIRECT_KEY, IV_INO_LBLK_64, and IV_INO_LBLK_32 flags are mutually exclusive. +- ``log2_data_unit_size`` is the log2 of the data unit size in bytes, + or 0 to select the default data unit size. The data unit size is + the granularity of file contents encryption. For example, setting + ``log2_data_unit_size`` to 12 causes file contents be passed to the + underlying encryption algorithm (such as AES-256-XTS) in 4096-byte + data units, each with its own IV. + + Not all filesystems support setting ``log2_data_unit_size``. ext4 + and f2fs support it since Linux v6.7. On filesystems that support + it, the supported nonzero values are 9 through the log2 of the + filesystem block size, inclusively. The default value of 0 selects + the filesystem block size. + + The main use case for ``log2_data_unit_size`` is for selecting a + data unit size smaller than the filesystem block size for + compatibility with inline encryption hardware that only supports + smaller data unit sizes. ``/sys/block/$disk/queue/crypto/`` may be + useful for checking which data unit sizes are supported by a + particular system's inline encryption hardware. + + Leave this field zeroed unless you are certain you need it. Using + an unnecessarily small data unit size reduces performance. + - For v2 encryption policies, ``__reserved`` must be zeroed. - For v1 encryption policies, ``master_key_descriptor`` specifies how @@ -1079,8 +1134,8 @@ The caller must zero all input fields, then fill in ``key_spec``: On success, 0 is returned and the kernel fills in the output fields: - ``status`` indicates whether the key is absent, present, or - incompletely removed. Incompletely removed means that the master - secret has been removed, but some files are still in use; i.e., + incompletely removed. Incompletely removed means that removal has + been initiated, but some files are still in use; i.e., `FS_IOC_REMOVE_ENCRYPTION_KEY`_ returned 0 but set the informational status flag FSCRYPT_KEY_REMOVAL_STATUS_FLAG_FILES_BUSY. diff --git a/Documentation/filesystems/fuse-io.rst b/Documentation/filesystems/fuse-io.rst index 255a368fe5..6464de4266 100644 --- a/Documentation/filesystems/fuse-io.rst +++ b/Documentation/filesystems/fuse-io.rst @@ -15,7 +15,8 @@ The direct-io mode can be selected with the FOPEN_DIRECT_IO flag in the FUSE_OPEN reply. In direct-io mode the page cache is completely bypassed for reads and writes. -No read-ahead takes place. Shared mmap is disabled. +No read-ahead takes place. Shared mmap is disabled by default. To allow shared +mmap, the FUSE_DIRECT_IO_ALLOW_MMAP flag may be enabled in the FUSE_INIT reply. In cached mode reads may be satisfied from the page cache, and data may be read-ahead by the kernel to fill the cache. The cache is always kept consistent diff --git a/Documentation/filesystems/nfs/exporting.rst b/Documentation/filesystems/nfs/exporting.rst index 4b30daee39..f04ce1215a 100644 --- a/Documentation/filesystems/nfs/exporting.rst +++ b/Documentation/filesystems/nfs/exporting.rst @@ -122,12 +122,9 @@ are exportable by setting the s_export_op field in the struct super_block. This field must point to a "struct export_operations" struct which has the following members: - encode_fh (optional) + encode_fh (mandatory) Takes a dentry and creates a filehandle fragment which may later be used - to find or create a dentry for the same object. The default - implementation creates a filehandle fragment that encodes a 32bit inode - and generation number for the inode encoded, and if necessary the - same information for the parent. + to find or create a dentry for the same object. fh_to_dentry (mandatory) Given a filehandle fragment, this should find the implied object and @@ -241,3 +238,10 @@ following flags are defined: all of an inode's dirty data on last close. Exports that behave this way should set EXPORT_OP_FLUSH_ON_CLOSE so that NFSD knows to skip waiting for writeback when closing such files. + + EXPORT_OP_ASYNC_LOCK - Indicates a capable filesystem to do async lock + requests from lockd. Only set EXPORT_OP_ASYNC_LOCK if the filesystem has + it's own ->lock() functionality as core posix_lock_file() implementation + has no async lock request handling yet. For more information about how to + indicate an async lock request from a ->lock() file_operations struct, see + fs/locks.c and comment for the function vfs_lock_file(). diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst index 5b93268e40..b28e5e3c23 100644 --- a/Documentation/filesystems/overlayfs.rst +++ b/Documentation/filesystems/overlayfs.rst @@ -145,7 +145,9 @@ filesystem, an overlay filesystem needs to record in the upper filesystem that files have been removed. This is done using whiteouts and opaque directories (non-directories are always opaque). -A whiteout is created as a character device with 0/0 device number. +A whiteout is created as a character device with 0/0 device number or +as a zero-size regular file with the xattr "trusted.overlay.whiteout". + When a whiteout is found in the upper level of a merged directory, any matching name in the lower level is ignored, and the whiteout itself is also hidden. @@ -154,6 +156,13 @@ A directory is made opaque by setting the xattr "trusted.overlay.opaque" to "y". Where the upper filesystem contains an opaque directory, any directory in the lower filesystem with the same name is ignored. +An opaque directory should not conntain any whiteouts, because they do not +serve any purpose. A merge directory containing regular files with the xattr +"trusted.overlay.whiteout", should be additionally marked by setting the xattr +"trusted.overlay.opaque" to "x" on the merge directory itself. +This is needed to avoid the overhead of checking the "trusted.overlay.whiteout" +on all entries during readdir in the common case. + readdir ------- @@ -344,10 +353,11 @@ escaping the colons with a single backslash. For example: mount -t overlay overlay -olowerdir=/a\:lower\:\:dir /merged -Since kernel version v6.5, directory names containing colons can also -be provided as lower layer using the fsconfig syscall from new mount api: +Since kernel version v6.8, directory names containing colons can also +be configured as lower layer using the "lowerdir+" mount options and the +fsconfig syscall from new mount api. For example: - fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir", "/a:lower::dir", 0); + fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir+", "/a:lower::dir", 0); In the latter case, colons in lower layer directory names will be escaped as an octal characters (\072) when displayed in /proc/self/mountinfo. @@ -416,6 +426,16 @@ Only the data of the files in the "data-only" lower layers may be visible when a "metacopy" file in one of the lower layers above it, has a "redirect" to the absolute path of the "lower data" file in the "data-only" lower layer. +Since kernel version v6.8, "data-only" lower layers can also be added using +the "datadir+" mount options and the fsconfig syscall from new mount api. +For example: + + fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir+", "/l1", 0); + fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir+", "/l2", 0); + fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir+", "/l3", 0); + fsconfig(fs_fd, FSCONFIG_SET_STRING, "datadir+", "/do1", 0); + fsconfig(fs_fd, FSCONFIG_SET_STRING, "datadir+", "/do2", 0); + fs-verity support ---------------------- @@ -504,6 +524,30 @@ directory tree on the same or different underlying filesystem, and even to a different machine. With the "inodes index" feature, trying to mount the copied layers will fail the verification of the lower root file handle. +Nesting overlayfs mounts +------------------------ + +It is possible to use a lower directory that is stored on an overlayfs +mount. For regular files this does not need any special care. However, files +that have overlayfs attributes, such as whiteouts or "overlay.*" xattrs will be +interpreted by the underlying overlayfs mount and stripped out. In order to +allow the second overlayfs mount to see the attributes they must be escaped. + +Overlayfs specific xattrs are escaped by using a special prefix of +"overlay.overlay.". So, a file with a "trusted.overlay.overlay.metacopy" xattr +in the lower dir will be exposed as a regular file with a +"trusted.overlay.metacopy" xattr in the overlayfs mount. This can be nested by +repeating the prefix multiple time, as each instance only removes one prefix. + +A lower dir with a regular whiteout will always be handled by the overlayfs +mount, so to support storing an effective whiteout file in an overlayfs mount an +alternative form of whiteout is supported. This form is a regular, zero-size +file with the "overlay.whiteout" xattr set, inside a directory with the +"overlay.opaque" xattr set to "x" (see `whiteouts and opaque directories`_). +These alternative whiteouts are never created by overlayfs, but can be used by +userspace tools (like containers) that generate lower layers. +These alternative whiteouts can be escaped using the standard xattr escape +mechanism in order to properly nest to any depth. Non-standard behavior --------------------- diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index 41d964b48e..9100969e7d 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -1050,6 +1050,22 @@ kill_anon_super(), or kill_block_super() helpers. **mandatory** +Lock ordering has been changed so that s_umount ranks above open_mutex again. +All places where s_umount was taken under open_mutex have been fixed up. + +--- + +**mandatory** + +export_operations ->encode_fh() no longer has a default implementation to +encode FILEID_INO32_GEN* file handles. +Filesystems that used the default implementation may use the generic helper +generic_encode_ino32_fh() explicitly. + +--- + +**mandatory** + If ->rename() update of .. on cross-directory move needs an exclusion with directory modifications, do *not* lock the subdirectory in question in your ->rename() - it's done by the caller now [that item should've been added in diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index 2b59cff8be..49ef12df63 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -689,9 +689,15 @@ files are there, and which are missing. File Content ============ =============================================================== apm Advanced power management info + bootconfig Kernel command line obtained from boot config, + and, if there were kernel parameters from the + boot loader, a "# Parameters from bootloader:" + line followed by a line containing those + parameters prefixed by "# ". (5.5) buddyinfo Kernel memory allocator information (see text) (2.5) bus Directory containing bus specific information - cmdline Kernel command line + cmdline Kernel command line, both from bootloader and embedded + in the kernel image cpuinfo Info about the CPU devices Available devices (block and character) dma Used DMS channels diff --git a/Documentation/filesystems/xfs-online-fsck-design.rst b/Documentation/filesystems/xfs-online-fsck-design.rst index 1625d11310..a0678101a7 100644 --- a/Documentation/filesystems/xfs-online-fsck-design.rst +++ b/Documentation/filesystems/xfs-online-fsck-design.rst @@ -1585,7 +1585,7 @@ The transaction sequence looks like this: 2. The second transaction contains a physical update to the free space btrees of AG 3 to release the former BMBT block and a second physical update to the free space btrees of AG 7 to release the unmapped file space. - Observe that the the physical updates are resequenced in the correct order + Observe that the physical updates are resequenced in the correct order when possible. Attached to the transaction is a an extent free done (EFD) log item. The EFD contains a pointer to the EFI logged in transaction #1 so that log diff --git a/Documentation/firmware-guide/acpi/enumeration.rst b/Documentation/firmware-guide/acpi/enumeration.rst index 56d9913a33..d79f693909 100644 --- a/Documentation/firmware-guide/acpi/enumeration.rst +++ b/Documentation/firmware-guide/acpi/enumeration.rst @@ -64,6 +64,49 @@ If the driver needs to perform more complex initialization like getting and configuring GPIOs it can get its ACPI handle and extract this information from ACPI tables. +ACPI device objects +=================== + +Generally speaking, there are two categories of devices in a system in which +ACPI is used as an interface between the platform firmware and the OS: Devices +that can be discovered and enumerated natively, through a protocol defined for +the specific bus that they are on (for example, configuration space in PCI), +without the platform firmware assistance, and devices that need to be described +by the platform firmware so that they can be discovered. Still, for any device +known to the platform firmware, regardless of which category it falls into, +there can be a corresponding ACPI device object in the ACPI Namespace in which +case the Linux kernel will create a struct acpi_device object based on it for +that device. + +Those struct acpi_device objects are never used for binding drivers to natively +discoverable devices, because they are represented by other types of device +objects (for example, struct pci_dev for PCI devices) that are bound to by +device drivers (the corresponding struct acpi_device object is then used as +an additional source of information on the configuration of the given device). +Moreover, the core ACPI device enumeration code creates struct platform_device +objects for the majority of devices that are discovered and enumerated with the +help of the platform firmware and those platform device objects can be bound to +by platform drivers in direct analogy with the natively enumerable devices +case. Therefore it is logically inconsistent and so generally invalid to bind +drivers to struct acpi_device objects, including drivers for devices that are +discovered with the help of the platform firmware. + +Historically, ACPI drivers that bound directly to struct acpi_device objects +were implemented for some devices enumerated with the help of the platform +firmware, but this is not recommended for any new drivers. As explained above, +platform device objects are created for those devices as a rule (with a few +exceptions that are not relevant here) and so platform drivers should be used +for handling them, even though the corresponding ACPI device objects are the +only source of device configuration information in that case. + +For every device having a corresponding struct acpi_device object, the pointer +to it is returned by the ACPI_COMPANION() macro, so it is always possible to +get to the device configuration information stored in the ACPI device object +this way. Accordingly, struct acpi_device can be regarded as a part of the +interface between the kernel and the ACPI Namespace, whereas device objects of +other types (for example, struct pci_dev or struct platform_device) are used +for interacting with the rest of the system. + DMA support =========== diff --git a/Documentation/gpu/amdgpu/driver-misc.rst b/Documentation/gpu/amdgpu/driver-misc.rst index 4321c38fef..e40e15f89f 100644 --- a/Documentation/gpu/amdgpu/driver-misc.rst +++ b/Documentation/gpu/amdgpu/driver-misc.rst @@ -26,12 +26,30 @@ serial_number .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c :doc: serial_number +fru_id +------------- + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c + :doc: fru_id + +manufacturer +------------- + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c + :doc: manufacturer + unique_id --------- .. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c :doc: unique_id +board_info +---------- + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c + :doc: board_info + Accelerated Processing Units (APU) Info --------------------------------------- diff --git a/Documentation/gpu/amdgpu/thermal.rst b/Documentation/gpu/amdgpu/thermal.rst index 5e27e4eb39..2f6166f81e 100644 --- a/Documentation/gpu/amdgpu/thermal.rst +++ b/Documentation/gpu/amdgpu/thermal.rst @@ -64,6 +64,36 @@ gpu_metrics .. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c :doc: gpu_metrics +fan_curve +--------- + +.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c + :doc: fan_curve + +acoustic_limit_rpm_threshold +---------------------------- + +.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c + :doc: acoustic_limit_rpm_threshold + +acoustic_target_rpm_threshold +----------------------------- + +.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c + :doc: acoustic_target_rpm_threshold + +fan_target_temperature +---------------------- + +.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c + :doc: fan_target_temperature + +fan_minimum_pwm +--------------- + +.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c + :doc: fan_minimum_pwm + GFXOFF ====== diff --git a/Documentation/gpu/automated_testing.rst b/Documentation/gpu/automated_testing.rst index 469b6fb65c..240e29d5ba 100644 --- a/Documentation/gpu/automated_testing.rst +++ b/Documentation/gpu/automated_testing.rst @@ -67,6 +67,19 @@ Lists the tests that for a given driver on a specific hardware revision are known to behave unreliably. These tests won't cause a job to fail regardless of the result. They will still be run. +Each new flake entry must be associated with a link to the email reporting the +bug to the author of the affected driver, the board name or Device Tree name of +the board, the first kernel version affected, and an approximation of the +failure rate. + +They should be provided under the following format:: + + # Bug Report: $LORE_OR_PATCHWORK_URL + # Board Name: broken-board.dtb + # Version: 6.6-rc1 + # Failure Rate: 100 + flaky-test + drivers/gpu/drm/ci/${DRIVER_NAME}-${HW_REVISION}-skips.txt ----------------------------------------------------------- @@ -86,10 +99,13 @@ https://gitlab.freedesktop.org/janedoe/linux/-/settings/ci_cd), change the CI/CD configuration file from .gitlab-ci.yml to drivers/gpu/drm/ci/gitlab-ci.yml. -3. Next time you push to this repository, you will see a CI pipeline being +3. Request to be added to the drm/ci-ok group so that your user has the +necessary privileges to run the CI on https://gitlab.freedesktop.org/drm/ci-ok + +4. Next time you push to this repository, you will see a CI pipeline being created (eg. https://gitlab.freedesktop.org/janedoe/linux/-/pipelines) -4. The various jobs will be run and when the pipeline is finished, all jobs +5. The various jobs will be run and when the pipeline is finished, all jobs should be green unless a regression has been found. diff --git a/Documentation/gpu/drivers.rst b/Documentation/gpu/drivers.rst index 3a52f48215..45a12e5520 100644 --- a/Documentation/gpu/drivers.rst +++ b/Documentation/gpu/drivers.rst @@ -18,6 +18,7 @@ GPU Driver Documentation xen-front afbc komeda-kms + panfrost .. only:: subproject and html diff --git a/Documentation/gpu/drm-kms.rst b/Documentation/gpu/drm-kms.rst index 690d2ffe72..a98a7e04e8 100644 --- a/Documentation/gpu/drm-kms.rst +++ b/Documentation/gpu/drm-kms.rst @@ -360,6 +360,8 @@ Format Functions Reference .. kernel-doc:: drivers/gpu/drm/drm_fourcc.c :export: +.. _kms_dumb_buffer_objects: + Dumb Buffer Objects =================== diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst index c19b34b1c0..602010cb68 100644 --- a/Documentation/gpu/drm-mm.rst +++ b/Documentation/gpu/drm-mm.rst @@ -466,40 +466,40 @@ DRM MM Range Allocator Function References .. kernel-doc:: drivers/gpu/drm/drm_mm.c :export: -DRM GPU VA Manager -================== +DRM GPUVM +========= Overview -------- -.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c +.. kernel-doc:: drivers/gpu/drm/drm_gpuvm.c :doc: Overview Split and Merge --------------- -.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c +.. kernel-doc:: drivers/gpu/drm/drm_gpuvm.c :doc: Split and Merge Locking ------- -.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c +.. kernel-doc:: drivers/gpu/drm/drm_gpuvm.c :doc: Locking Examples -------- -.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c +.. kernel-doc:: drivers/gpu/drm/drm_gpuvm.c :doc: Examples -DRM GPU VA Manager Function References --------------------------------------- +DRM GPUVM Function References +----------------------------- -.. kernel-doc:: include/drm/drm_gpuva_mgr.h +.. kernel-doc:: include/drm/drm_gpuvm.h :internal: -.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c +.. kernel-doc:: drivers/gpu/drm/drm_gpuvm.c :export: DRM Buddy Allocator diff --git a/Documentation/gpu/drm-uapi.rst b/Documentation/gpu/drm-uapi.rst index 65fb3036a5..370d820be2 100644 --- a/Documentation/gpu/drm-uapi.rst +++ b/Documentation/gpu/drm-uapi.rst @@ -285,6 +285,83 @@ for GPU1 and GPU2 from different vendors, and a third handler for mmapped regular files. Threads cause additional pain with signal handling as well. +Device reset +============ + +The GPU stack is really complex and is prone to errors, from hardware bugs, +faulty applications and everything in between the many layers. Some errors +require resetting the device in order to make the device usable again. This +section describes the expectations for DRM and usermode drivers when a +device resets and how to propagate the reset status. + +Device resets can not be disabled without tainting the kernel, which can lead to +hanging the entire kernel through shrinkers/mmu_notifiers. Userspace role in +device resets is to propagate the message to the application and apply any +special policy for blocking guilty applications, if any. Corollary is that +debugging a hung GPU context require hardware support to be able to preempt such +a GPU context while it's stopped. + +Kernel Mode Driver +------------------ + +The KMD is responsible for checking if the device needs a reset, and to perform +it as needed. Usually a hang is detected when a job gets stuck executing. KMD +should keep track of resets, because userspace can query any time about the +reset status for a specific context. This is needed to propagate to the rest of +the stack that a reset has happened. Currently, this is implemented by each +driver separately, with no common DRM interface. Ideally this should be properly +integrated at DRM scheduler to provide a common ground for all drivers. After a +reset, KMD should reject new command submissions for affected contexts. + +User Mode Driver +---------------- + +After command submission, UMD should check if the submission was accepted or +rejected. After a reset, KMD should reject submissions, and UMD can issue an +ioctl to the KMD to check the reset status, and this can be checked more often +if the UMD requires it. After detecting a reset, UMD will then proceed to report +it to the application using the appropriate API error code, as explained in the +section below about robustness. + +Robustness +---------- + +The only way to try to keep a graphical API context working after a reset is if +it complies with the robustness aspects of the graphical API that it is using. + +Graphical APIs provide ways to applications to deal with device resets. However, +there is no guarantee that the app will use such features correctly, and a +userspace that doesn't support robust interfaces (like a non-robust +OpenGL context or API without any robustness support like libva) leave the +robustness handling entirely to the userspace driver. There is no strong +community consensus on what the userspace driver should do in that case, +since all reasonable approaches have some clear downsides. + +OpenGL +~~~~~~ + +Apps using OpenGL should use the available robust interfaces, like the +extension ``GL_ARB_robustness`` (or ``GL_EXT_robustness`` for OpenGL ES). This +interface tells if a reset has happened, and if so, all the context state is +considered lost and the app proceeds by creating new ones. There's no consensus +on what to do to if robustness is not in use. + +Vulkan +~~~~~~ + +Apps using Vulkan should check for ``VK_ERROR_DEVICE_LOST`` for submissions. +This error code means, among other things, that a device reset has happened and +it needs to recreate the contexts to keep going. + +Reporting causes of resets +-------------------------- + +Apart from propagating the reset through the stack so apps can recover, it's +really useful for driver developers to learn more about what caused the reset in +the first place. DRM devices should make use of devcoredump to store relevant +information about the reset, so this information can be added to user bug +reports. + .. _drm_driver_ioctl: IOCTL Support on Device Nodes @@ -450,12 +527,12 @@ VBlank event handling The DRM core exposes two vertical blank related ioctls: -DRM_IOCTL_WAIT_VBLANK +:c:macro:`DRM_IOCTL_WAIT_VBLANK` This takes a struct drm_wait_vblank structure as its argument, and it is used to block or request a signal when a specified vblank event occurs. -DRM_IOCTL_MODESET_CTL +:c:macro:`DRM_IOCTL_MODESET_CTL` This was only used for user-mode-settind drivers around modesetting changes to allow the kernel to update the vblank interrupt after mode setting, since on many devices the vertical blank counter is @@ -478,11 +555,18 @@ The index is used in cases where a densely packed identifier for a CRTC is needed, for instance a bitmask of CRTC's. The member possible_crtcs of struct drm_mode_get_plane is an example. -DRM_IOCTL_MODE_GETRESOURCES populates a structure with an array of CRTC ID's, -and the CRTC index is its position in this array. +:c:macro:`DRM_IOCTL_MODE_GETRESOURCES` populates a structure with an array of +CRTC ID's, and the CRTC index is its position in this array. .. kernel-doc:: include/uapi/drm/drm.h :internal: .. kernel-doc:: include/uapi/drm/drm_mode.h :internal: + + +dma-buf interoperability +======================== + +Please see Documentation/userspace-api/dma-buf-alloc-exchange.rst for +information on how dma-buf is integrated and exposed within DRM. diff --git a/Documentation/gpu/drm-usage-stats.rst b/Documentation/gpu/drm-usage-stats.rst index 044e6b2ed1..7aca5c7a7b 100644 --- a/Documentation/gpu/drm-usage-stats.rst +++ b/Documentation/gpu/drm-usage-stats.rst @@ -169,3 +169,4 @@ Driver specific implementations ------------------------------- :ref:`i915-usage-stats` +:ref:`panfrost-usage-stats` diff --git a/Documentation/gpu/drm-vm-bind-async.rst b/Documentation/gpu/drm-vm-bind-async.rst new file mode 100644 index 0000000000..3d709d0209 --- /dev/null +++ b/Documentation/gpu/drm-vm-bind-async.rst @@ -0,0 +1,309 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +==================== +Asynchronous VM_BIND +==================== + +Nomenclature: +============= + +* ``VRAM``: On-device memory. Sometimes referred to as device local memory. + +* ``gpu_vm``: A virtual GPU address space. Typically per process, but + can be shared by multiple processes. + +* ``VM_BIND``: An operation or a list of operations to modify a gpu_vm using + an IOCTL. The operations include mapping and unmapping system- or + VRAM memory. + +* ``syncobj``: A container that abstracts synchronization objects. The + synchronization objects can be either generic, like dma-fences or + driver specific. A syncobj typically indicates the type of the + underlying synchronization object. + +* ``in-syncobj``: Argument to a VM_BIND IOCTL, the VM_BIND operation waits + for these before starting. + +* ``out-syncobj``: Argument to a VM_BIND_IOCTL, the VM_BIND operation + signals these when the bind operation is complete. + +* ``dma-fence``: A cross-driver synchronization object. A basic + understanding of dma-fences is required to digest this + document. Please refer to the ``DMA Fences`` section of the + :doc:`dma-buf doc `. + +* ``memory fence``: A synchronization object, different from a dma-fence. + A memory fence uses the value of a specified memory location to determine + signaled status. A memory fence can be awaited and signaled by both + the GPU and CPU. Memory fences are sometimes referred to as + user-fences, userspace-fences or gpu futexes and do not necessarily obey + the dma-fence rule of signaling within a "reasonable amount of time". + The kernel should thus avoid waiting for memory fences with locks held. + +* ``long-running workload``: A workload that may take more than the + current stipulated dma-fence maximum signal delay to complete and + which therefore needs to set the gpu_vm or the GPU execution context in + a certain mode that disallows completion dma-fences. + +* ``exec function``: An exec function is a function that revalidates all + affected gpu_vmas, submits a GPU command batch and registers the + dma_fence representing the GPU command's activity with all affected + dma_resvs. For completeness, although not covered by this document, + it's worth mentioning that an exec function may also be the + revalidation worker that is used by some drivers in compute / + long-running mode. + +* ``bind context``: A context identifier used for the VM_BIND + operation. VM_BIND operations that use the same bind context can be + assumed, where it matters, to complete in order of submission. No such + assumptions can be made for VM_BIND operations using separate bind contexts. + +* ``UMD``: User-mode driver. + +* ``KMD``: Kernel-mode driver. + + +Synchronous / Asynchronous VM_BIND operation +============================================ + +Synchronous VM_BIND +___________________ +With Synchronous VM_BIND, the VM_BIND operations all complete before the +IOCTL returns. A synchronous VM_BIND takes neither in-fences nor +out-fences. Synchronous VM_BIND may block and wait for GPU operations; +for example swap-in or clearing, or even previous binds. + +Asynchronous VM_BIND +____________________ +Asynchronous VM_BIND accepts both in-syncobjs and out-syncobjs. While the +IOCTL may return immediately, the VM_BIND operations wait for the in-syncobjs +before modifying the GPU page-tables, and signal the out-syncobjs when +the modification is done in the sense that the next exec function that +awaits for the out-syncobjs will see the change. Errors are reported +synchronously. +In low-memory situations the implementation may block, performing the +VM_BIND synchronously, because there might not be enough memory +immediately available for preparing the asynchronous operation. + +If the VM_BIND IOCTL takes a list or an array of operations as an argument, +the in-syncobjs needs to signal before the first operation starts to +execute, and the out-syncobjs signal after the last operation +completes. Operations in the operation list can be assumed, where it +matters, to complete in order. + +Since asynchronous VM_BIND operations may use dma-fences embedded in +out-syncobjs and internally in KMD to signal bind completion, any +memory fences given as VM_BIND in-fences need to be awaited +synchronously before the VM_BIND ioctl returns, since dma-fences, +required to signal in a reasonable amount of time, can never be made +to depend on memory fences that don't have such a restriction. + +The purpose of an Asynchronous VM_BIND operation is for user-mode +drivers to be able to pipeline interleaved gpu_vm modifications and +exec functions. For long-running workloads, such pipelining of a bind +operation is not allowed and any in-fences need to be awaited +synchronously. The reason for this is twofold. First, any memory +fences gated by a long-running workload and used as in-syncobjs for the +VM_BIND operation will need to be awaited synchronously anyway (see +above). Second, any dma-fences used as in-syncobjs for VM_BIND +operations for long-running workloads will not allow for pipelining +anyway since long-running workloads don't allow for dma-fences as +out-syncobjs, so while theoretically possible the use of them is +questionable and should be rejected until there is a valuable use-case. +Note that this is not a limitation imposed by dma-fence rules, but +rather a limitation imposed to keep KMD implementation simple. It does +not affect using dma-fences as dependencies for the long-running +workload itself, which is allowed by dma-fence rules, but rather for +the VM_BIND operation only. + +An asynchronous VM_BIND operation may take substantial time to +complete and signal the out_fence. In particular if the operation is +deeply pipelined behind other VM_BIND operations and workloads +submitted using exec functions. In that case, UMD might want to avoid a +subsequent VM_BIND operation to be queued behind the first one if +there are no explicit dependencies. In order to circumvent such a queue-up, a +VM_BIND implementation may allow for VM_BIND contexts to be +created. For each context, VM_BIND operations will be guaranteed to +complete in the order they were submitted, but that is not the case +for VM_BIND operations executing on separate VM_BIND contexts. Instead +KMD will attempt to execute such VM_BIND operations in parallel but +leaving no guarantee that they will actually be executed in +parallel. There may be internal implicit dependencies that only KMD knows +about, for example page-table structure changes. A way to attempt +to avoid such internal dependencies is to have different VM_BIND +contexts use separate regions of a VM. + +Also for VM_BINDS for long-running gpu_vms the user-mode driver should typically +select memory fences as out-fences since that gives greater flexibility for +the kernel mode driver to inject other operations into the bind / +unbind operations. Like for example inserting breakpoints into batch +buffers. The workload execution can then easily be pipelined behind +the bind completion using the memory out-fence as the signal condition +for a GPU semaphore embedded by UMD in the workload. + +There is no difference in the operations supported or in +multi-operation support between asynchronous VM_BIND and synchronous VM_BIND. + +Multi-operation VM_BIND IOCTL error handling and interrupts +=========================================================== + +The VM_BIND operations of the IOCTL may error for various reasons, for +example due to lack of resources to complete and due to interrupted +waits. +In these situations UMD should preferably restart the IOCTL after +taking suitable action. +If UMD has over-committed a memory resource, an -ENOSPC error will be +returned, and UMD may then unbind resources that are not used at the +moment and rerun the IOCTL. On -EINTR, UMD should simply rerun the +IOCTL and on -ENOMEM user-space may either attempt to free known +system memory resources or fail. In case of UMD deciding to fail a +bind operation, due to an error return, no additional action is needed +to clean up the failed operation, and the VM is left in the same state +as it was before the failing IOCTL. +Unbind operations are guaranteed not to return any errors due to +resource constraints, but may return errors due to, for example, +invalid arguments or the gpu_vm being banned. +In the case an unexpected error happens during the asynchronous bind +process, the gpu_vm will be banned, and attempts to use it after banning +will return -ENOENT. + +Example: The Xe VM_BIND uAPI +============================ + +Starting with the VM_BIND operation struct, the IOCTL call can take +zero, one or many such operations. A zero number means only the +synchronization part of the IOCTL is carried out: an asynchronous +VM_BIND updates the syncobjects, whereas a sync VM_BIND waits for the +implicit dependencies to be fulfilled. + +.. code-block:: c + + struct drm_xe_vm_bind_op { + /** + * @obj: GEM object to operate on, MBZ for MAP_USERPTR, MBZ for UNMAP + */ + __u32 obj; + + /** @pad: MBZ */ + __u32 pad; + + union { + /** + * @obj_offset: Offset into the object for MAP. + */ + __u64 obj_offset; + + /** @userptr: user virtual address for MAP_USERPTR */ + __u64 userptr; + }; + + /** + * @range: Number of bytes from the object to bind to addr, MBZ for UNMAP_ALL + */ + __u64 range; + + /** @addr: Address to operate on, MBZ for UNMAP_ALL */ + __u64 addr; + + /** + * @tile_mask: Mask for which tiles to create binds for, 0 == All tiles, + * only applies to creating new VMAs + */ + __u64 tile_mask; + + /* Map (parts of) an object into the GPU virtual address range. + #define XE_VM_BIND_OP_MAP 0x0 + /* Unmap a GPU virtual address range */ + #define XE_VM_BIND_OP_UNMAP 0x1 + /* + * Map a CPU virtual address range into a GPU virtual + * address range. + */ + #define XE_VM_BIND_OP_MAP_USERPTR 0x2 + /* Unmap a gem object from the VM. */ + #define XE_VM_BIND_OP_UNMAP_ALL 0x3 + /* + * Make the backing memory of an address range resident if + * possible. Note that this doesn't pin backing memory. + */ + #define XE_VM_BIND_OP_PREFETCH 0x4 + + /* Make the GPU map readonly. */ + #define XE_VM_BIND_FLAG_READONLY (0x1 << 16) + /* + * Valid on a faulting VM only, do the MAP operation immediately rather + * than deferring the MAP to the page fault handler. + */ + #define XE_VM_BIND_FLAG_IMMEDIATE (0x1 << 17) + /* + * When the NULL flag is set, the page tables are setup with a special + * bit which indicates writes are dropped and all reads return zero. In + * the future, the NULL flags will only be valid for XE_VM_BIND_OP_MAP + * operations, the BO handle MBZ, and the BO offset MBZ. This flag is + * intended to implement VK sparse bindings. + */ + #define XE_VM_BIND_FLAG_NULL (0x1 << 18) + /** @op: Operation to perform (lower 16 bits) and flags (upper 16 bits) */ + __u32 op; + + /** @mem_region: Memory region to prefetch VMA to, instance not a mask */ + __u32 region; + + /** @reserved: Reserved */ + __u64 reserved[2]; + }; + + +The VM_BIND IOCTL argument itself, looks like follows. Note that for +synchronous VM_BIND, the num_syncs and syncs fields must be zero. Here +the ``exec_queue_id`` field is the VM_BIND context discussed previously +that is used to facilitate out-of-order VM_BINDs. + +.. code-block:: c + + struct drm_xe_vm_bind { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @vm_id: The ID of the VM to bind to */ + __u32 vm_id; + + /** + * @exec_queue_id: exec_queue_id, must be of class DRM_XE_ENGINE_CLASS_VM_BIND + * and exec queue must have same vm_id. If zero, the default VM bind engine + * is used. + */ + __u32 exec_queue_id; + + /** @num_binds: number of binds in this IOCTL */ + __u32 num_binds; + + /* If set, perform an async VM_BIND, if clear a sync VM_BIND */ + #define XE_VM_BIND_IOCTL_FLAG_ASYNC (0x1 << 0) + + /** @flag: Flags controlling all operations in this ioctl. */ + __u32 flags; + + union { + /** @bind: used if num_binds == 1 */ + struct drm_xe_vm_bind_op bind; + + /** + * @vector_of_binds: userptr to array of struct + * drm_xe_vm_bind_op if num_binds > 1 + */ + __u64 vector_of_binds; + }; + + /** @num_syncs: amount of syncs to wait for or to signal on completion. */ + __u32 num_syncs; + + /** @pad2: MBZ */ + __u32 pad2; + + /** @syncs: pointer to struct drm_xe_sync array */ + __u64 syncs; + + /** @reserved: Reserved */ + __u64 reserved[2]; + }; diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 378e825754..0ca1550fd9 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -267,19 +267,22 @@ i915 driver. Intel GPU Basics ---------------- -An Intel GPU has multiple engines. There are several engine types. - -- RCS engine is for rendering 3D and performing compute, this is named - `I915_EXEC_RENDER` in user space. -- BCS is a blitting (copy) engine, this is named `I915_EXEC_BLT` in user - space. -- VCS is a video encode and decode engine, this is named `I915_EXEC_BSD` - in user space -- VECS is video enhancement engine, this is named `I915_EXEC_VEBOX` in user - space. -- The enumeration `I915_EXEC_DEFAULT` does not refer to specific engine; - instead it is to be used by user space to specify a default rendering - engine (for 3D) that may or may not be the same as RCS. +An Intel GPU has multiple engines. There are several engine types: + +- Render Command Streamer (RCS). An engine for rendering 3D and + performing compute. +- Blitting Command Streamer (BCS). An engine for performing blitting and/or + copying operations. +- Video Command Streamer. An engine used for video encoding and decoding. Also + sometimes called 'BSD' in hardware documentation. +- Video Enhancement Command Streamer (VECS). An engine for video enhancement. + Also sometimes called 'VEBOX' in hardware documentation. +- Compute Command Streamer (CCS). An engine that has access to the media and + GPGPU pipelines, but not the 3D pipeline. +- Graphics Security Controller (GSCCS). A dedicated engine for internal + communication with GSC controller on security related tasks like + High-bandwidth Digital Content Protection (HDCP), Protected Xe Path (PXP), + and HuC firmware authentication. The Intel GPU family is a family of integrated GPU's using Unified Memory Access. For having the GPU "do work", user space will feed the diff --git a/Documentation/gpu/implementation_guidelines.rst b/Documentation/gpu/implementation_guidelines.rst new file mode 100644 index 0000000000..138e637dcc --- /dev/null +++ b/Documentation/gpu/implementation_guidelines.rst @@ -0,0 +1,9 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +=========================================================== +Misc DRM driver uAPI- and feature implementation guidelines +=========================================================== + +.. toctree:: + + drm-vm-bind-async diff --git a/Documentation/gpu/index.rst b/Documentation/gpu/index.rst index e45ff09152..37e383ccf7 100644 --- a/Documentation/gpu/index.rst +++ b/Documentation/gpu/index.rst @@ -18,6 +18,7 @@ GPU Driver Developer's Guide vga-switcheroo vgaarbiter automated_testing + implementation_guidelines todo rfc/index diff --git a/Documentation/gpu/panfrost.rst b/Documentation/gpu/panfrost.rst new file mode 100644 index 0000000000..b80e41f4b2 --- /dev/null +++ b/Documentation/gpu/panfrost.rst @@ -0,0 +1,40 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +========================= + drm/Panfrost Mali Driver +========================= + +.. _panfrost-usage-stats: + +Panfrost DRM client usage stats implementation +============================================== + +The drm/Panfrost driver implements the DRM client usage stats specification as +documented in :ref:`drm-client-usage-stats`. + +Example of the output showing the implemented key value pairs and entirety of +the currently possible format options: + +:: + pos: 0 + flags: 02400002 + mnt_id: 27 + ino: 531 + drm-driver: panfrost + drm-client-id: 14 + drm-engine-fragment: 1846584880 ns + drm-cycles-fragment: 1424359409 + drm-maxfreq-fragment: 799999987 Hz + drm-curfreq-fragment: 799999987 Hz + drm-engine-vertex-tiler: 71932239 ns + drm-cycles-vertex-tiler: 52617357 + drm-maxfreq-vertex-tiler: 799999987 Hz + drm-curfreq-vertex-tiler: 799999987 Hz + drm-total-memory: 290 MiB + drm-shared-memory: 0 MiB + drm-active-memory: 226 MiB + drm-resident-memory: 36496 KiB + drm-purgeable-memory: 128 KiB + +Possible `drm-engine-` key names are: `fragment`, and `vertex-tiler`. +`drm-curfreq-` values convey the current operating frequency for that engine. diff --git a/Documentation/gpu/rfc/xe.rst b/Documentation/gpu/rfc/xe.rst index 2516fe141d..c29113a0ac 100644 --- a/Documentation/gpu/rfc/xe.rst +++ b/Documentation/gpu/rfc/xe.rst @@ -67,14 +67,8 @@ platforms. When the time comes for Xe, the protection will be lifted on Xe and kept in i915. -Xe driver will be protected with both STAGING Kconfig and force_probe. Changes in -the uAPI are expected while the driver is behind these protections. STAGING will -be removed when the driver uAPI gets to a mature state where we can guarantee the -‘no regression’ rule. Then force_probe will be lifted only for future platforms -that will be productized with Xe driver, but not with i915. - -Xe – Pre-Merge Goals -==================== +Xe – Pre-Merge Goals - Work-in-Progress +======================================= Drm_scheduler ------------- @@ -94,41 +88,6 @@ depend on any other patch touching drm_scheduler itself that was not yet merged through drm-misc. This, by itself, already includes the reach of an agreement for uniform 1 to 1 relationship implementation / usage across drivers. -GPU VA ------- -Two main goals of Xe are meeting together here: - -1) Have an uAPI that aligns with modern UMD needs. - -2) Early upstream engagement. - -RedHat engineers working on Nouveau proposed a new DRM feature to handle keeping -track of GPU virtual address mappings. This is still not merged upstream, but -this aligns very well with our goals and with our VM_BIND. The engagement with -upstream and the port of Xe towards GPUVA is already ongoing. - -As a key measurable result, Xe needs to be aligned with the GPU VA and working in -our tree. Missing Nouveau patches should *not* block Xe and any needed GPUVA -related patch should be independent and present on dri-devel or acked by -maintainers to go along with the first Xe pull request towards drm-next. - -DRM_VM_BIND ------------ -Nouveau, and Xe are all implementing ‘VM_BIND’ and new ‘Exec’ uAPIs in order to -fulfill the needs of the modern uAPI. Xe merge should *not* be blocked on the -development of a common new drm_infrastructure. However, the Xe team needs to -engage with the community to explore the options of a common API. - -As a key measurable result, the DRM_VM_BIND needs to be documented in this file -below, or this entire block deleted if the consensus is for independent drivers -vm_bind ioctls. - -Although having a common DRM level IOCTL for VM_BIND is not a requirement to get -Xe merged, it is mandatory to enforce the overall locking scheme for all major -structs and list (so vm and vma). So, a consensus is needed, and possibly some -common helpers. If helpers are needed, they should be also documented in this -document. - ASYNC VM_BIND ------------- Although having a common DRM level IOCTL for VM_BIND is not a requirement to get @@ -138,8 +97,8 @@ memory fences. Ideally with helper support so people don't get it wrong in all possible ways. As a key measurable result, the benefits of ASYNC VM_BIND and a discussion of -various flavors, error handling and a sample API should be documented here or in -a separate document pointed to by this document. +various flavors, error handling and sample API suggestions are documented in +:doc:`The ASYNC VM_BIND document `. Userptr integration and vm_bind ------------------------------- @@ -212,6 +171,14 @@ This item ties into the GPUVA, VM_BIND, and even long-running compute support. As a key measurable result, we need to have a community consensus documented in this document and the Xe driver prepared for the changes, if necessary. +Xe – uAPI high level overview +============================= + +...Warning: To be done in follow up patches after/when/where the main consensus in various items are individually reached. + +Xe – Pre-Merge Goals - Completed +================================ + Dev_coredump ------------ @@ -229,7 +196,37 @@ infrastructure with overall possible improvements, like multiple file support for better organization of the dumps, snapshot support, dmesg extra print, and whatever may make sense and help the overall infrastructure. -Xe – uAPI high level overview -============================= +DRM_VM_BIND +----------- +Nouveau, and Xe are all implementing ‘VM_BIND’ and new ‘Exec’ uAPIs in order to +fulfill the needs of the modern uAPI. Xe merge should *not* be blocked on the +development of a common new drm_infrastructure. However, the Xe team needs to +engage with the community to explore the options of a common API. -...Warning: To be done in follow up patches after/when/where the main consensus in various items are individually reached. +As a key measurable result, the DRM_VM_BIND needs to be documented in this file +below, or this entire block deleted if the consensus is for independent drivers +vm_bind ioctls. + +Although having a common DRM level IOCTL for VM_BIND is not a requirement to get +Xe merged, it is mandatory to enforce the overall locking scheme for all major +structs and list (so vm and vma). So, a consensus is needed, and possibly some +common helpers. If helpers are needed, they should be also documented in this +document. + +GPU VA +------ +Two main goals of Xe are meeting together here: + +1) Have an uAPI that aligns with modern UMD needs. + +2) Early upstream engagement. + +RedHat engineers working on Nouveau proposed a new DRM feature to handle keeping +track of GPU virtual address mappings. This is still not merged upstream, but +this aligns very well with our goals and with our VM_BIND. The engagement with +upstream and the port of Xe towards GPUVA is already ongoing. + +As a key measurable result, Xe needs to be aligned with the GPU VA and working in +our tree. Missing Nouveau patches should *not* block Xe and any needed GPUVA +related patch should be independent and present on dri-devel or acked by +maintainers to go along with the first Xe pull request towards drm-next. diff --git a/Documentation/hwmon/adt7475.rst b/Documentation/hwmon/adt7475.rst index ef3ea1ea9b..f90f769d82 100644 --- a/Documentation/hwmon/adt7475.rst +++ b/Documentation/hwmon/adt7475.rst @@ -90,7 +90,7 @@ ADT7476: ADT7490: * 6 voltage inputs - * 1 Imon input (not implemented) + * 1 Imon input * PECI support (not implemented) * 2 GPIO pins (not implemented) * system acoustics optimizations (not implemented) @@ -107,6 +107,7 @@ in2 VCC (4) VCC (4) VCC (4) VCC (3) in3 5VIN (20) 5VIN (20) in4 12VIN (21) 12VIN (21) in5 VTT (8) +in6 Imon (19) ==== =========== =========== ========= ========== Special Features diff --git a/Documentation/hwmon/aquacomputer_d5next.rst b/Documentation/hwmon/aquacomputer_d5next.rst index 94dc2d93d1..cb073c7947 100644 --- a/Documentation/hwmon/aquacomputer_d5next.rst +++ b/Documentation/hwmon/aquacomputer_d5next.rst @@ -16,6 +16,8 @@ Supported devices: * Aquacomputer Aquastream XT watercooling pump * Aquacomputer Aquastream Ultimate watercooling pump * Aquacomputer Poweradjust 3 fan controller +* Aquacomputer High Flow USB flow meter +* Aquacomputer MPS Flow devices Author: Aleksa Savic @@ -73,6 +75,11 @@ It also exposes pressure and flow speed readings. The Poweradjust 3 controller exposes a single external temperature sensor. +The High Flow USB exposes an internal and external temperature sensor, and a flow meter. + +The MPS Flow devices expose the same entries as the High Flow USB because they have +the same USB product ID and report sensors equivalently. + Depending on the device, not all sysfs and debugfs entries will be available. Writing to virtual temperature sensors is not currently supported. diff --git a/Documentation/hwmon/asus_ec_sensors.rst b/Documentation/hwmon/asus_ec_sensors.rst index 7e3cd5b668..0bf99ba406 100644 --- a/Documentation/hwmon/asus_ec_sensors.rst +++ b/Documentation/hwmon/asus_ec_sensors.rst @@ -15,6 +15,7 @@ Supported boards: * ROG CROSSHAIR VIII HERO * ROG CROSSHAIR VIII IMPACT * ROG CROSSHAIR X670E HERO + * ROG CROSSHAIR X670E GENE * ROG MAXIMUS XI HERO * ROG MAXIMUS XI HERO (WI-FI) * ROG STRIX B550-E GAMING diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst index 88dadea85c..72f4e6065b 100644 --- a/Documentation/hwmon/index.rst +++ b/Documentation/hwmon/index.rst @@ -121,6 +121,7 @@ Hardware Monitoring Kernel Drivers ltc2947 ltc2978 ltc2990 + ltc2991 ltc3815 ltc4151 ltc4215 @@ -178,6 +179,7 @@ Hardware Monitoring Kernel Drivers peci-cputemp peci-dimmtemp pmbus + powerz powr1220 pxe1610 pwm-fan diff --git a/Documentation/hwmon/ltc2991.rst b/Documentation/hwmon/ltc2991.rst new file mode 100644 index 0000000000..15d8b4d7e4 --- /dev/null +++ b/Documentation/hwmon/ltc2991.rst @@ -0,0 +1,43 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Kernel driver ltc2991 +===================== + +Supported chips: + + * Analog Devices LTC2991 + + Prefix: 'ltc2991' + + Addresses scanned: I2C 0x48 - 0x4f + + Datasheet: https://www.analog.com/media/en/technical-documentation/data-sheets/2991ff.pdf + +Authors: + + - Antoniu Miclaus + + +Description +----------- + +This driver supports hardware monitoring for Analog Devices LTC2991 Octal I2C +Voltage, Current and Temperature Monitor. + +The LTC2991 is used to monitor system temperatures, voltages and currents. +Through the I2C serial interface, the eight monitors can individually measure +supply voltages and can be paired for differential measurements of current sense +resistors or temperature sensing transistors. Additional measurements include +internal temperatureand internal VCC. + + +sysfs-Interface +--------------- + +The following attributes are supported. Limits are read-only. + +=============== ================= +inX_input: voltage input +currX_input: current input +tempX_input: temperature input +=============== ================= diff --git a/Documentation/hwmon/max31827.rst b/Documentation/hwmon/max31827.rst index b0971d05b8..9a1055a007 100644 --- a/Documentation/hwmon/max31827.rst +++ b/Documentation/hwmon/max31827.rst @@ -73,8 +73,8 @@ the conversion frequency to 1 conv/s. The conversion time varies depending on the resolution. The conversion time doubles with every bit of increased resolution. For 10 bit resolution 35ms are needed, while for 12 bit resolution (default) 140ms. When chip is in shutdown mode and a read operation is -requested, one-shot is triggered, the device waits for 140 (conversion time) + 1 -(error) ms, and only after that is the temperature value register read. +requested, one-shot is triggered, the device waits for 140 (conversion time) ms, +and only after that is the temperature value register read. The LSB of the temperature values is 0.0625 degrees Celsius, but the values of the temperatures are displayed in milli-degrees. This means, that some data is diff --git a/Documentation/hwmon/nct6683.rst b/Documentation/hwmon/nct6683.rst index 2e1408d174..3e7f6ee779 100644 --- a/Documentation/hwmon/nct6683.rst +++ b/Documentation/hwmon/nct6683.rst @@ -62,5 +62,6 @@ Intel DH87RL NCT6683D EC firmware version 1.0 build 04/03/13 Intel DH87MC NCT6683D EC firmware version 1.0 build 04/03/13 Intel DB85FL NCT6683D EC firmware version 1.0 build 04/03/13 ASRock X570 NCT6683D EC firmware version 1.0 build 06/28/19 +ASRock X670E NCT6686D EC firmware version 1.0 build 05/19/22 MSI B550 NCT6687D EC firmware version 1.0 build 05/07/20 =============== =============================================== diff --git a/Documentation/hwmon/powerz.rst b/Documentation/hwmon/powerz.rst new file mode 100644 index 0000000000..317084e0b7 --- /dev/null +++ b/Documentation/hwmon/powerz.rst @@ -0,0 +1,30 @@ +.. SPDX-License-Identifier: GPL-2.0-or-later + +Kernel driver POWERZ +==================== + +Supported chips: + + * ChargerLAB POWER-Z KM003C + + Prefix: 'powerz' + + Addresses scanned: - + +Author: + + - Thomas Weißschuh + +Description +----------- + +This driver implements support for the ChargerLAB POWER-Z USB-C power testing +family. + +The device communicates with the custom protocol over USB. + +The channel labels exposed via hwmon match the labels used by the on-device +display and the official POWER-Z PC software. + +As current can flow in both directions through the tester the sign of the +channel "curr1_input" (label "IBUS") indicates the direction. diff --git a/Documentation/hwmon/sch5627.rst b/Documentation/hwmon/sch5627.rst index ecb4fc84d0..8639dff234 100644 --- a/Documentation/hwmon/sch5627.rst +++ b/Documentation/hwmon/sch5627.rst @@ -33,3 +33,13 @@ The hardware monitoring part of the SMSC SCH5627 is accessed by talking through an embedded microcontroller. An application note describing the protocol for communicating with the microcontroller is available upon request. Please mail me if you want a copy. + + +Controlling fan speed +--------------------- + +The SCH5627 allows for partially controlling the fan speed. If a temperature +channel excedes tempX_max, all fans are forced to maximum speed. The same is not +true for tempX_crit, presumably some other measures to cool down the system are +take in this case. +In which way the value of fanX_min affects the fan speed is currently unknown. diff --git a/Documentation/i2c/fault-codes.rst b/Documentation/i2c/fault-codes.rst index 80b14e718b..b0864d1268 100644 --- a/Documentation/i2c/fault-codes.rst +++ b/Documentation/i2c/fault-codes.rst @@ -39,6 +39,10 @@ Also, codes returned by adapter probe methods follow rules which are specific to their host bus (such as PCI, or the platform bus). +EAFNOSUPPORT + Returned by I2C adapters not supporting 10 bit addresses when + they are requested to use such an address. + EAGAIN Returned by I2C adapters when they lose arbitration in master transmit mode: some other master was transmitting different diff --git a/Documentation/i2c/i2c-address-translators.rst b/Documentation/i2c/i2c-address-translators.rst index b22ce9f41e..6845c114e4 100644 --- a/Documentation/i2c/i2c-address-translators.rst +++ b/Documentation/i2c/i2c-address-translators.rst @@ -71,7 +71,7 @@ Transaction: - Physical I2C transaction on bus A, slave address 0x20 - ATR chip detects transaction on address 0x20, finds it in table, propagates transaction on bus B with address translated to 0x10, - keeps clock streched on bus A waiting for reply + keeps clock stretched on bus A waiting for reply - Slave X chip (on bus B) detects transaction at its own physical address 0x10 and replies normally - ATR chip stops clock stretching and forwards reply on bus A, diff --git a/Documentation/kbuild/kbuild.rst b/Documentation/kbuild/kbuild.rst index bd906407e3..9c8d1d046e 100644 --- a/Documentation/kbuild/kbuild.rst +++ b/Documentation/kbuild/kbuild.rst @@ -243,6 +243,12 @@ The output directory is often set using "O=..." on the commandline. The value can be overridden in which case the default value is ignored. +INSTALL_DTBS_PATH +----------------- +INSTALL_DTBS_PATH specifies where to install device tree blobs for +relocations required by build roots. This is not defined in the +makefile but the argument can be passed to make if needed. + KBUILD_ABS_SRCTREE -------------------------------------------------- Kbuild uses a relative path to point to the tree when possible. For instance, diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst index e67eb261c9..ad118b7a18 100644 --- a/Documentation/kbuild/makefiles.rst +++ b/Documentation/kbuild/makefiles.rst @@ -53,7 +53,7 @@ knowledge about the kernel Makefiles, plus detailed knowledge about the public interface for kbuild. *Arch developers* are people who work on an entire architecture, such -as sparc or ia64. Arch developers need to know about the arch Makefile +as sparc or x86. Arch developers need to know about the arch Makefile as well as kbuild Makefiles. *Kbuild developers* are people who work on the kernel build system itself. @@ -937,6 +937,10 @@ Example:: # net/bpfilter/Makefile bpfilter_umh-userldflags += -static +To specify libraries linked to a userspace program, you can use +``-userldlibs``. The ``userldlibs`` syntax specifies libraries +linked to all userspace programs created in the current Makefile. + When linking bpfilter_umh, it will be passed the extra option -static. From command line, :ref:`USERCFLAGS and USERLDFLAGS ` will also be used. @@ -1623,6 +1627,13 @@ INSTALL_MOD_STRIP INSTALL_MOD_STRIP value will be used as the option(s) to the strip command. +INSTALL_DTBS_PATH + This variable specifies a prefix for relocations required by build + roots. It defines a place for installing the device tree blobs. Like + INSTALL_MOD_PATH, it isn't defined in the Makefile, but can be passed + by the user if desired. Otherwise it defaults to the kernel install + path. + Makefile language ================= diff --git a/Documentation/maintainer/maintainer-entry-profile.rst b/Documentation/maintainer/maintainer-entry-profile.rst index 6b64072d4b..7ad4bfc2cc 100644 --- a/Documentation/maintainer/maintainer-entry-profile.rst +++ b/Documentation/maintainer/maintainer-entry-profile.rst @@ -101,7 +101,7 @@ to do something different in the near future. ../doc-guide/maintainer-profile ../nvdimm/maintainer-entry-profile - ../riscv/patch-acceptance + ../arch/riscv/patch-acceptance ../driver-api/media/maintainer-entry-profile ../driver-api/vfio-pci-device-specific-driver-acceptance ../nvme/feature-and-quirk-policy diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 06e14efd86..d414e145f9 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -396,6 +396,10 @@ Memory barriers come in four basic varieties: (2) Address-dependency barriers (historical). + [!] This section is marked as HISTORICAL: For more up-to-date + information, including how compiler transformations related to pointer + comparisons can sometimes cause problems, see + Documentation/RCU/rcu_dereference.rst. An address-dependency barrier is a weaker form of read barrier. In the case where two loads are performed such that the second depends on the @@ -556,6 +560,9 @@ There are certain things that the Linux kernel memory barriers do not guarantee: ADDRESS-DEPENDENCY BARRIERS (HISTORICAL) ---------------------------------------- +[!] This section is marked as HISTORICAL: For more up-to-date information, +including how compiler transformations related to pointer comparisons can +sometimes cause problems, see Documentation/RCU/rcu_dereference.rst. As of v4.15 of the Linux kernel, an smp_mb() was added to READ_ONCE() for DEC Alpha, which means that about the only people who need to pay attention diff --git a/Documentation/misc-devices/eeprom.rst b/Documentation/misc-devices/eeprom.rst deleted file mode 100644 index 008249675c..0000000000 --- a/Documentation/misc-devices/eeprom.rst +++ /dev/null @@ -1,107 +0,0 @@ -==================== -Kernel driver eeprom -==================== - -Supported chips: - - * Any EEPROM chip in the designated address range - - Prefix: 'eeprom' - - Addresses scanned: I2C 0x50 - 0x57 - - Datasheets: Publicly available from: - - Atmel (www.atmel.com), - Catalyst (www.catsemi.com), - Fairchild (www.fairchildsemi.com), - Microchip (www.microchip.com), - Philips (www.semiconductor.philips.com), - Rohm (www.rohm.com), - ST (www.st.com), - Xicor (www.xicor.com), - and others. - - ========= ============= ============================================ - Chip Size (bits) Address - ========= ============= ============================================ - 24C01 1K 0x50 (shadows at 0x51 - 0x57) - 24C01A 1K 0x50 - 0x57 (Typical device on DIMMs) - 24C02 2K 0x50 - 0x57 - 24C04 4K 0x50, 0x52, 0x54, 0x56 - (additional data at 0x51, 0x53, 0x55, 0x57) - 24C08 8K 0x50, 0x54 (additional data at 0x51, 0x52, - 0x53, 0x55, 0x56, 0x57) - 24C16 16K 0x50 (additional data at 0x51 - 0x57) - Sony 2K 0x57 - - Atmel 34C02B 2K 0x50 - 0x57, SW write protect at 0x30-37 - Catalyst 34FC02 2K 0x50 - 0x57, SW write protect at 0x30-37 - Catalyst 34RC02 2K 0x50 - 0x57, SW write protect at 0x30-37 - Fairchild 34W02 2K 0x50 - 0x57, SW write protect at 0x30-37 - Microchip 24AA52 2K 0x50 - 0x57, SW write protect at 0x30-37 - ST M34C02 2K 0x50 - 0x57, SW write protect at 0x30-37 - ========= ============= ============================================ - - -Authors: - - Frodo Looijaard , - - Philip Edelbrock , - - Jean Delvare , - - Greg Kroah-Hartman , - - IBM Corp. - -Description ------------ - -This is a simple EEPROM module meant to enable reading the first 256 bytes -of an EEPROM (on a SDRAM DIMM for example). However, it will access serial -EEPROMs on any I2C adapter. The supported devices are generically called -24Cxx, and are listed above; however the numbering for these -industry-standard devices may vary by manufacturer. - -This module was a programming exercise to get used to the new project -organization laid out by Frodo, but it should be at least completely -effective for decoding the contents of EEPROMs on DIMMs. - -DIMMS will typically contain a 24C01A or 24C02, or the 34C02 variants. -The other devices will not be found on a DIMM because they respond to more -than one address. - -DDC Monitors may contain any device. Often a 24C01, which responds to all 8 -addresses, is found. - -Recent Sony Vaio laptops have an EEPROM at 0x57. We couldn't get the -specification, so it is guess work and far from being complete. - -The Microchip 24AA52/24LCS52, ST M34C02, and others support an additional -software write protect register at 0x30 - 0x37 (0x20 less than the memory -location). The chip responds to "write quick" detection at this address but -does not respond to byte reads. If this register is present, the lower 128 -bytes of the memory array are not write protected. Any byte data write to -this address will write protect the memory array permanently, and the -device will no longer respond at the 0x30-37 address. The eeprom driver -does not support this register. - -Lacking functionality ---------------------- - -* Full support for larger devices (24C04, 24C08, 24C16). These are not - typically found on a PC. These devices will appear as separate devices at - multiple addresses. - -* Support for really large devices (24C32, 24C64, 24C128, 24C256, 24C512). - These devices require two-byte address fields and are not supported. - -* Enable Writing. Again, no technical reason why not, but making it easy - to change the contents of the EEPROMs (on DIMMs anyway) also makes it easy - to disable the DIMMs (potentially preventing the computer from booting) - until the values are restored somehow. - -Use ---- - -After inserting the module (and any other required SMBus/i2c modules), you -should have some EEPROM directories in ``/sys/bus/i2c/devices/*`` of names such -as "0-0050". Inside each of these is a series of files, the eeprom file -contains the binary data from EEPROM. diff --git a/Documentation/misc-devices/index.rst b/Documentation/misc-devices/index.rst index ecc40fbbcf..7de1679798 100644 --- a/Documentation/misc-devices/index.rst +++ b/Documentation/misc-devices/index.rst @@ -17,7 +17,6 @@ fit into other categories. ad525x_dpot apds990x bh1770glc - eeprom c2port dw-xdata-pcie ibmvmc diff --git a/Documentation/mm/damon/design.rst b/Documentation/mm/damon/design.rst index a20383d01a..1f7e0586b5 100644 --- a/Documentation/mm/damon/design.rst +++ b/Documentation/mm/damon/design.rst @@ -154,6 +154,8 @@ The monitoring overhead of this mechanism will arbitrarily increase as the size of the target workload grows. +.. _damon_design_region_based_sampling: + Region Based Sampling ~~~~~~~~~~~~~~~~~~~~~ @@ -163,9 +165,10 @@ assumption (pages in a region have the same access frequencies) is kept, only one page in the region is required to be checked. Thus, for each ``sampling interval``, DAMON randomly picks one page in each region, waits for one ``sampling interval``, checks whether the page is accessed meanwhile, and -increases the access frequency of the region if so. Therefore, the monitoring -overhead is controllable by setting the number of regions. DAMON allows users -to set the minimum and the maximum number of regions for the trade-off. +increases the access frequency counter of the region if so. The counter is +called ``nr_regions`` of the region. Therefore, the monitoring overhead is +controllable by setting the number of regions. DAMON allows users to set the +minimum and the maximum number of regions for the trade-off. This scheme, however, cannot preserve the quality of the output if the assumption is not guaranteed. @@ -190,6 +193,8 @@ In this way, DAMON provides its best-effort quality and minimal overhead while keeping the bounds users set for their trade-off. +.. _damon_design_age_tracking: + Age Tracking ~~~~~~~~~~~~ @@ -254,7 +259,8 @@ works, DAMON provides a feature called Data Access Monitoring-based Operation Schemes (DAMOS). It lets users specify their desired schemes at a high level. For such specifications, DAMON starts monitoring, finds regions having the access pattern of interest, and applies the user-desired operation actions -to the regions as soon as found. +to the regions, for every user-specified time interval called +``apply_interval``. .. _damon_design_damos_action: @@ -471,3 +477,15 @@ modules for proactive reclamation and LRU lists manipulation are provided. For more detail, please read the usage documents for those (:doc:`/admin-guide/mm/damon/reclaim` and :doc:`/admin-guide/mm/damon/lru_sort`). + + +.. _damon_design_execution_model_and_data_structures: + +Execution Model and Data Structures +=================================== + +The monitoring-related information including the monitoring request +specification and DAMON-based operation schemes are stored in a data structure +called DAMON ``context``. DAMON executes each context with a kernel thread +called ``kdamond``. Multiple kdamonds could run in parallel, for different +types of monitoring. diff --git a/Documentation/mm/overcommit-accounting.rst b/Documentation/mm/overcommit-accounting.rst index a4895d6fc1..e2263477f6 100644 --- a/Documentation/mm/overcommit-accounting.rst +++ b/Documentation/mm/overcommit-accounting.rst @@ -8,8 +8,7 @@ The Linux kernel supports the following overcommit handling modes Heuristic overcommit handling. Obvious overcommits of address space are refused. Used for a typical system. It ensures a seriously wild allocation fails while allowing overcommit to - reduce swap usage. root is allowed to allocate slightly more - memory in this mode. This is the default. + reduce swap usage. This is the default. 1 Always overcommit. Appropriate for some scientific diff --git a/Documentation/mm/page_tables.rst b/Documentation/mm/page_tables.rst index 7840c18917..be47b192a5 100644 --- a/Documentation/mm/page_tables.rst +++ b/Documentation/mm/page_tables.rst @@ -152,3 +152,130 @@ Page table handling code that wishes to be architecture-neutral, such as the virtual memory manager, will need to be written so that it traverses all of the currently five levels. This style should also be preferred for architecture-specific code, so as to be robust to future changes. + + +MMU, TLB, and Page Faults +========================= + +The `Memory Management Unit (MMU)` is a hardware component that handles virtual +to physical address translations. It may use relatively small caches in hardware +called `Translation Lookaside Buffers (TLBs)` and `Page Walk Caches` to speed up +these translations. + +When CPU accesses a memory location, it provides a virtual address to the MMU, +which checks if there is the existing translation in the TLB or in the Page +Walk Caches (on architectures that support them). If no translation is found, +MMU uses the page walks to determine the physical address and create the map. + +The dirty bit for a page is set (i.e., turned on) when the page is written to. +Each page of memory has associated permission and dirty bits. The latter +indicate that the page has been modified since it was loaded into memory. + +If nothing prevents it, eventually the physical memory can be accessed and the +requested operation on the physical frame is performed. + +There are several reasons why the MMU can't find certain translations. It could +happen because the CPU is trying to access memory that the current task is not +permitted to, or because the data is not present into physical memory. + +When these conditions happen, the MMU triggers page faults, which are types of +exceptions that signal the CPU to pause the current execution and run a special +function to handle the mentioned exceptions. + +There are common and expected causes of page faults. These are triggered by +process management optimization techniques called "Lazy Allocation" and +"Copy-on-Write". Page faults may also happen when frames have been swapped out +to persistent storage (swap partition or file) and evicted from their physical +locations. + +These techniques improve memory efficiency, reduce latency, and minimize space +occupation. This document won't go deeper into the details of "Lazy Allocation" +and "Copy-on-Write" because these subjects are out of scope as they belong to +Process Address Management. + +Swapping differentiates itself from the other mentioned techniques because it's +undesirable since it's performed as a means to reduce memory under heavy +pressure. + +Swapping can't work for memory mapped by kernel logical addresses. These are a +subset of the kernel virtual space that directly maps a contiguous range of +physical memory. Given any logical address, its physical address is determined +with simple arithmetic on an offset. Accesses to logical addresses are fast +because they avoid the need for complex page table lookups at the expenses of +frames not being evictable and pageable out. + +If the kernel fails to make room for the data that must be present in the +physical frames, the kernel invokes the out-of-memory (OOM) killer to make room +by terminating lower priority processes until pressure reduces under a safe +threshold. + +Additionally, page faults may be also caused by code bugs or by maliciously +crafted addresses that the CPU is instructed to access. A thread of a process +could use instructions to address (non-shared) memory which does not belong to +its own address space, or could try to execute an instruction that want to write +to a read-only location. + +If the above-mentioned conditions happen in user-space, the kernel sends a +`Segmentation Fault` (SIGSEGV) signal to the current thread. That signal usually +causes the termination of the thread and of the process it belongs to. + +This document is going to simplify and show an high altitude view of how the +Linux kernel handles these page faults, creates tables and tables' entries, +check if memory is present and, if not, requests to load data from persistent +storage or from other devices, and updates the MMU and its caches. + +The first steps are architecture dependent. Most architectures jump to +`do_page_fault()`, whereas the x86 interrupt handler is defined by the +`DEFINE_IDTENTRY_RAW_ERRORCODE()` macro which calls `handle_page_fault()`. + +Whatever the routes, all architectures end up to the invocation of +`handle_mm_fault()` which, in turn, (likely) ends up calling +`__handle_mm_fault()` to carry out the actual work of allocating the page +tables. + +The unfortunate case of not being able to call `__handle_mm_fault()` means +that the virtual address is pointing to areas of physical memory which are not +permitted to be accessed (at least from the current context). This +condition resolves to the kernel sending the above-mentioned SIGSEGV signal +to the process and leads to the consequences already explained. + +`__handle_mm_fault()` carries out its work by calling several functions to +find the entry's offsets of the upper layers of the page tables and allocate +the tables that it may need. + +The functions that look for the offset have names like `*_offset()`, where the +"*" is for pgd, p4d, pud, pmd, pte; instead the functions to allocate the +corresponding tables, layer by layer, are called `*_alloc`, using the +above-mentioned convention to name them after the corresponding types of tables +in the hierarchy. + +The page table walk may end at one of the middle or upper layers (PMD, PUD). + +Linux supports larger page sizes than the usual 4KB (i.e., the so called +`huge pages`). When using these kinds of larger pages, higher level pages can +directly map them, with no need to use lower level page entries (PTE). Huge +pages contain large contiguous physical regions that usually span from 2MB to +1GB. They are respectively mapped by the PMD and PUD page entries. + +The huge pages bring with them several benefits like reduced TLB pressure, +reduced page table overhead, memory allocation efficiency, and performance +improvement for certain workloads. However, these benefits come with +trade-offs, like wasted memory and allocation challenges. + +At the very end of the walk with allocations, if it didn't return errors, +`__handle_mm_fault()` finally calls `handle_pte_fault()`, which via `do_fault()` +performs one of `do_read_fault()`, `do_cow_fault()`, `do_shared_fault()`. +"read", "cow", "shared" give hints about the reasons and the kind of fault it's +handling. + +The actual implementation of the workflow is very complex. Its design allows +Linux to handle page faults in a way that is tailored to the specific +characteristics of each architecture, while still sharing a common overall +structure. + +To conclude this high altitude view of how Linux handles page faults, let's +add that the page faults handler can be disabled and enabled respectively with +`pagefault_disable()` and `pagefault_enable()`. + +Several code path make use of the latter two functions because they need to +disable traps into the page faults handler, mostly to prevent deadlocks. diff --git a/Documentation/mm/vmemmap_dedup.rst b/Documentation/mm/vmemmap_dedup.rst index 59891f7242..593ede6d31 100644 --- a/Documentation/mm/vmemmap_dedup.rst +++ b/Documentation/mm/vmemmap_dedup.rst @@ -211,7 +211,7 @@ the device (altmap). The following page sizes are supported in DAX: PAGE_SIZE (4K on x86_64), PMD_SIZE (2M on x86_64) and PUD_SIZE (1G on x86_64). -For powerpc equivalent details see Documentation/powerpc/vmemmap_dedup.rst +For powerpc equivalent details see Documentation/arch/powerpc/vmemmap_dedup.rst The differences with HugeTLB are relatively minor. diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml index 9806c44f60..c58f7153fc 100644 --- a/Documentation/netlink/genetlink-c.yaml +++ b/Documentation/netlink/genetlink-c.yaml @@ -13,6 +13,11 @@ $defs: type: [ string, integer ] pattern: ^[0-9A-Za-z_]+( - 1)?$ minimum: 0 + len-or-limit: + # literal int or limit based on fixed-width type e.g. u8-min, u16-max, etc. + type: [ string, integer ] + pattern: ^[su](8|16|32|64)-(min|max)$ + minimum: 0 # Schema for specs title: Protocol @@ -26,10 +31,6 @@ properties: type: string doc: type: string - version: - description: Generic Netlink family version. Default is 1. - type: integer - minimum: 1 protocol: description: Schema compatibility level. Default is "genetlink". enum: [ genetlink, genetlink-c ] @@ -46,6 +47,12 @@ properties: max-by-define: description: Makes the number of attributes and commands be specified by a define, not an enum value. type: boolean + cmd-max-name: + description: Name of the define for the last operation in the list. + type: string + cmd-cnt-name: + description: The explicit name for constant holding the count of operations (last operation + 1). + type: string # End genetlink-c definitions: @@ -142,13 +149,14 @@ properties: type: array items: type: object - required: [ name, type ] + required: [ name ] additionalProperties: False properties: name: type: string type: &attr-type - enum: [ unused, pad, flag, binary, u8, u16, u32, u64, s32, s64, + enum: [ unused, pad, flag, binary, + uint, sint, u8, u16, u32, u64, s32, s64, string, nest, array-nest, nest-type-value ] doc: description: Documentation of the attribute. @@ -187,13 +195,19 @@ properties: type: string min: description: Min value for an integer attribute. - type: integer + $ref: '#/$defs/len-or-limit' + max: + description: Max value for an integer attribute. + $ref: '#/$defs/len-or-limit' min-len: description: Min length for a binary attribute. $ref: '#/$defs/len-or-define' max-len: description: Max length for a string or a binary attribute. $ref: '#/$defs/len-or-define' + exact-len: + description: Exact length for a string or a binary attribute. + $ref: '#/$defs/len-or-define' sub-type: *attr-type display-hint: &display-hint description: | @@ -215,6 +229,18 @@ properties: not: required: [ name-prefix ] + # type property is only required if not in subset definition + if: + properties: + subset-of: + not: + type: string + then: + properties: + attributes: + items: + required: [ type ] + operations: description: Operations supported by the protocol. type: object @@ -275,6 +301,11 @@ properties: type: array items: enum: [ strict, dump, dump-strict ] + config-cond: + description: | + Name of the kernel config option gating the presence of + the operation, without the 'CONFIG_' prefix. + type: string do: &subop-type description: Main command handler. type: object diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml index 12a0a04560..9387030883 100644 --- a/Documentation/netlink/genetlink-legacy.yaml +++ b/Documentation/netlink/genetlink-legacy.yaml @@ -13,6 +13,11 @@ $defs: type: [ string, integer ] pattern: ^[0-9A-Za-z_]+( - 1)?$ minimum: 0 + len-or-limit: + # literal int or limit based on fixed-width type e.g. u8-min, u16-max, etc. + type: [ string, integer ] + pattern: ^[su](8|16|32|64)-(min|max)$ + minimum: 0 # Schema for specs title: Protocol @@ -26,10 +31,6 @@ properties: type: string doc: type: string - version: - description: Generic Netlink family version. Default is 1. - type: integer - minimum: 1 protocol: description: Schema compatibility level. Default is "genetlink". enum: [ genetlink, genetlink-c, genetlink-legacy ] # Trim @@ -46,6 +47,12 @@ properties: max-by-define: description: Makes the number of attributes and commands be specified by a define, not an enum value. type: boolean + cmd-max-name: + description: Name of the define for the last operation in the list. + type: string + cmd-cnt-name: + description: The explicit name for constant holding the count of operations (last operation + 1). + type: string # End genetlink-c # Start genetlink-legacy kernel-policy: @@ -53,6 +60,10 @@ properties: Defines if the input policy in the kernel is global, per-operation, or split per operation type. Default is split. enum: [ split, per-op, global ] + version: + description: Generic Netlink family version. Default is 1. + type: integer + minimum: 1 # End genetlink-legacy definitions: @@ -180,14 +191,15 @@ properties: type: array items: type: object - required: [ name, type ] + required: [ name ] additionalProperties: False properties: name: type: string type: &attr-type description: The netlink attribute type - enum: [ unused, pad, flag, binary, u8, u16, u32, u64, s32, s64, + enum: [ unused, pad, flag, binary, bitfield32, + uint, sint, u8, u16, u32, u64, s32, s64, string, nest, array-nest, nest-type-value ] doc: description: Documentation of the attribute. @@ -226,13 +238,19 @@ properties: type: string min: description: Min value for an integer attribute. - type: integer + $ref: '#/$defs/len-or-limit' + max: + description: Max value for an integer attribute. + $ref: '#/$defs/len-or-limit' min-len: description: Min length for a binary attribute. $ref: '#/$defs/len-or-define' max-len: description: Max length for a string or a binary attribute. $ref: '#/$defs/len-or-define' + exact-len: + description: Exact length for a string or a binary attribute. + $ref: '#/$defs/len-or-define' sub-type: *attr-type display-hint: *display-hint # Start genetlink-c @@ -254,6 +272,18 @@ properties: not: required: [ name-prefix ] + # type property is only required if not in subset definition + if: + properties: + subset-of: + not: + type: string + then: + properties: + attributes: + items: + required: [ type ] + operations: description: Operations supported by the protocol. type: object @@ -316,12 +346,17 @@ properties: description: Command flags. type: array items: - enum: [ admin-perm ] + enum: [ admin-perm, uns-admin-perm ] dont-validate: description: Kernel attribute validation flags. type: array items: enum: [ strict, dump, dump-strict ] + config-cond: + description: | + Name of the kernel config option gating the presence of + the operation, without the 'CONFIG_' prefix. + type: string # Start genetlink-legacy fixed-header: *fixed-header # End genetlink-legacy diff --git a/Documentation/netlink/genetlink.yaml b/Documentation/netlink/genetlink.yaml index 3d338c48bf..3283bf458f 100644 --- a/Documentation/netlink/genetlink.yaml +++ b/Documentation/netlink/genetlink.yaml @@ -13,6 +13,11 @@ $defs: type: [ string, integer ] pattern: ^[0-9A-Za-z_]+( - 1)?$ minimum: 0 + len-or-limit: + # literal int or limit based on fixed-width type e.g. u8-min, u16-max, etc. + type: [ string, integer ] + pattern: ^[su](8|16|32|64)-(min|max)$ + minimum: 0 # Schema for specs title: Protocol @@ -26,10 +31,6 @@ properties: type: string doc: type: string - version: - description: Generic Netlink family version. Default is 1. - type: integer - minimum: 1 protocol: description: Schema compatibility level. Default is "genetlink". enum: [ genetlink ] @@ -115,13 +116,14 @@ properties: type: array items: type: object - required: [ name, type ] + required: [ name ] additionalProperties: False properties: name: type: string type: &attr-type - enum: [ unused, pad, flag, binary, u8, u16, u32, u64, s32, s64, + enum: [ unused, pad, flag, binary, + uint, sint, u8, u16, u32, u64, s32, s64, string, nest, array-nest, nest-type-value ] doc: description: Documentation of the attribute. @@ -160,13 +162,19 @@ properties: type: string min: description: Min value for an integer attribute. - type: integer + $ref: '#/$defs/len-or-limit' + max: + description: Max value for an integer attribute. + $ref: '#/$defs/len-or-limit' min-len: description: Min length for a binary attribute. $ref: '#/$defs/len-or-define' max-len: description: Max length for a string or a binary attribute. $ref: '#/$defs/len-or-define' + exact-len: + description: Exact length for a string or a binary attribute. + $ref: '#/$defs/len-or-define' sub-type: *attr-type display-hint: &display-hint description: | @@ -184,6 +192,18 @@ properties: not: required: [ name-prefix ] + # type property is only required if not in subset definition + if: + properties: + subset-of: + not: + type: string + then: + properties: + attributes: + items: + required: [ type ] + operations: description: Operations supported by the protocol. type: object @@ -244,6 +264,11 @@ properties: type: array items: enum: [ strict, dump, dump-strict ] + config-cond: + description: | + Name of the kernel config option gating the presence of + the operation, without the 'CONFIG_' prefix. + type: string do: &subop-type description: Main command handler. type: object diff --git a/Documentation/netlink/netlink-raw.yaml b/Documentation/netlink/netlink-raw.yaml index 8967978764..775cce8c54 100644 --- a/Documentation/netlink/netlink-raw.yaml +++ b/Documentation/netlink/netlink-raw.yaml @@ -47,6 +47,12 @@ properties: max-by-define: description: Makes the number of attributes and commands be specified by a define, not an enum value. type: boolean + cmd-max-name: + description: Name of the define for the last operation in the list. + type: string + cmd-cnt-name: + description: The explicit name for constant holding the count of operations (last operation + 1). + type: string # End genetlink-c # Start genetlink-legacy kernel-policy: @@ -187,7 +193,7 @@ properties: type: array items: type: object - required: [ name, type ] + required: [ name ] additionalProperties: False properties: name: @@ -240,6 +246,9 @@ properties: max-len: description: Max length for a string or a binary attribute. $ref: '#/$defs/len-or-define' + exact-len: + description: Exact length for a string or a binary attribute. + $ref: '#/$defs/len-or-define' sub-type: *attr-type display-hint: *display-hint # Start genetlink-c @@ -261,6 +270,18 @@ properties: not: required: [ name-prefix ] + # type property is only required if not in subset definition + if: + properties: + subset-of: + not: + type: string + then: + properties: + attributes: + items: + required: [ type ] + operations: description: Operations supported by the protocol. type: object diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index 065661acb8..572d83a414 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -15,6 +15,165 @@ definitions: name: ingress - name: egress + - + type: enum + name: port-type + entries: + - + name: notset + - + name: auto + - + name: eth + - + name: ib + - + type: enum + name: port-flavour + entries: + - + name: physical + - + name: cpu + - + name: dsa + - + name: pci_pf + - + name: pci_vf + - + name: virtual + - + name: unused + - + name: pci_sf + - + type: enum + name: port-fn-state + entries: + - + name: inactive + - + name: active + - + type: enum + name: port-fn-opstate + entries: + - + name: detached + - + name: attached + - + type: enum + name: port-fn-attr-cap + entries: + - + name: roce-bit + - + name: migratable-bit + - + name: ipsec-crypto-bit + - + name: ipsec-packet-bit + - + type: enum + name: sb-threshold-type + entries: + - + name: static + - + name: dynamic + - + type: enum + name: eswitch-mode + entries: + - + name: legacy + - + name: switchdev + - + type: enum + name: eswitch-inline-mode + entries: + - + name: none + - + name: link + - + name: network + - + name: transport + - + type: enum + name: eswitch-encap-mode + entries: + - + name: none + - + name: basic + - + type: enum + name: dpipe-match-type + entries: + - + name: field-exact + - + type: enum + name: dpipe-action-type + entries: + - + name: field-modify + - + type: enum + name: dpipe-field-mapping-type + entries: + - + name: none + - + name: ifindex + - + type: enum + name: resource-unit + entries: + - + name: entry + - + type: enum + name: reload-action + entries: + - + name: driver-reinit + value: 1 + - + name: fw-activate + - + type: enum + name: param-cmode + entries: + - + name: runtime + - + name: driverinit + - + name: permanent + - + type: enum + name: flash-overwrite + entries: + - + name: settings-bit + - + name: identifiers-bit + - + type: enum + name: trap-action + entries: + - + name: drop + - + name: trap + - + name: mirror attribute-sets: - @@ -31,6 +190,17 @@ attribute-sets: - name: port-index type: u32 + - + name: port-type + type: u16 + enum: port-type + + # TODO: fill in the attributes in between + + - + name: port-split-count + type: u32 + value: 9 # TODO: fill in the attributes in between @@ -45,18 +215,224 @@ attribute-sets: name: sb-pool-index type: u16 value: 17 - - name: sb-pool-type type: u8 enum: sb-pool-type + - + name: sb-pool-size + type: u32 + - + name: sb-pool-threshold-type + type: u8 + enum: sb-threshold-type + - + name: sb-threshold + type: u32 + - + name: sb-tc-index + type: u16 + value: 22 # TODO: fill in the attributes in between - - name: sb-tc-index + name: eswitch-mode type: u16 - value: 22 + value: 25 + enum: eswitch-mode + + - + name: eswitch-inline-mode + type: u16 + enum: eswitch-inline-mode + - + name: dpipe-tables + type: nest + nested-attributes: dl-dpipe-tables + - + name: dpipe-table + type: nest + multi-attr: true + nested-attributes: dl-dpipe-table + - + name: dpipe-table-name + type: string + - + name: dpipe-table-size + type: u64 + - + name: dpipe-table-matches + type: nest + nested-attributes: dl-dpipe-table-matches + - + name: dpipe-table-actions + type: nest + nested-attributes: dl-dpipe-table-actions + - + name: dpipe-table-counters-enabled + type: u8 + - + name: dpipe-entries + type: nest + nested-attributes: dl-dpipe-entries + - + name: dpipe-entry + type: nest + multi-attr: true + nested-attributes: dl-dpipe-entry + - + name: dpipe-entry-index + type: u64 + - + name: dpipe-entry-match-values + type: nest + nested-attributes: dl-dpipe-entry-match-values + - + name: dpipe-entry-action-values + type: nest + nested-attributes: dl-dpipe-entry-action-values + - + name: dpipe-entry-counter + type: u64 + - + name: dpipe-match + type: nest + multi-attr: true + nested-attributes: dl-dpipe-match + - + name: dpipe-match-value + type: nest + multi-attr: true + nested-attributes: dl-dpipe-match-value + - + name: dpipe-match-type + type: u32 + enum: dpipe-match-type + - + name: dpipe-action + type: nest + multi-attr: true + nested-attributes: dl-dpipe-action + - + name: dpipe-action-value + type: nest + multi-attr: true + nested-attributes: dl-dpipe-action-value + - + name: dpipe-action-type + type: u32 + enum: dpipe-action-type + - + name: dpipe-value + type: binary + - + name: dpipe-value-mask + type: binary + - + name: dpipe-value-mapping + type: u32 + - + name: dpipe-headers + type: nest + nested-attributes: dl-dpipe-headers + - + name: dpipe-header + type: nest + multi-attr: true + nested-attributes: dl-dpipe-header + - + name: dpipe-header-name + type: string + - + name: dpipe-header-id + type: u32 + - + name: dpipe-header-fields + type: nest + nested-attributes: dl-dpipe-header-fields + - + name: dpipe-header-global + type: u8 + - + name: dpipe-header-index + type: u32 + - + name: dpipe-field + type: nest + multi-attr: true + nested-attributes: dl-dpipe-field + - + name: dpipe-field-name + type: string + - + name: dpipe-field-id + type: u32 + - + name: dpipe-field-bitwidth + type: u32 + - + name: dpipe-field-mapping-type + type: u32 + enum: dpipe-field-mapping-type + - + name: pad + type: pad + - + name: eswitch-encap-mode + type: u8 + value: 62 + enum: eswitch-encap-mode + - + name: resource-list + type: nest + nested-attributes: dl-resource-list + - + name: resource + type: nest + multi-attr: true + nested-attributes: dl-resource + - + name: resource-name + type: string + - + name: resource-id + type: u64 + - + name: resource-size + type: u64 + - + name: resource-size-new + type: u64 + - + name: resource-size-valid + type: u8 + - + name: resource-size-min + type: u64 + - + name: resource-size-max + type: u64 + - + name: resource-size-gran + type: u64 + - + name: resource-unit + type: u8 + enum: resource-unit + - + name: resource-occ + type: u64 + - + name: dpipe-table-resource-id + type: u64 + - + name: dpipe-table-resource-units + type: u64 + - + name: port-flavour + type: u16 + enum: port-flavour # TODO: fill in the attributes in between @@ -67,17 +443,41 @@ attribute-sets: # TODO: fill in the attributes in between + - + name: param-type + type: u8 + value: 83 + + # TODO: fill in the attributes in between + + - + name: param-value-cmode + type: u8 + enum: param-cmode + value: 87 - name: region-name type: string - value: 88 # TODO: fill in the attributes in between + - + name: region-snapshot-id + type: u32 + value: 92 + + # TODO: fill in the attributes in between + + - + name: region-chunk-addr + type: u64 + value: 96 + - + name: region-chunk-len + type: u64 - name: info-driver-name type: string - value: 98 - name: info-serial-number type: string @@ -105,6 +505,29 @@ attribute-sets: # TODO: fill in the attributes in between + - + name: fmsg + type: nest + nested-attributes: dl-fmsg + value: 106 + - + name: fmsg-obj-nest-start + type: flag + - + name: fmsg-pair-nest-start + type: flag + - + name: fmsg-arr-nest-start + type: flag + - + name: fmsg-nest-end + type: flag + - + name: fmsg-obj-name + type: string + + # TODO: fill in the attributes in between + - name: health-reporter-name type: string @@ -112,10 +535,37 @@ attribute-sets: # TODO: fill in the attributes in between + - + name: health-reporter-graceful-period + type: u64 + value: 120 + - + name: health-reporter-auto-recover + type: u8 + - + name: flash-update-file-name + type: string + - + name: flash-update-component + type: string + + # TODO: fill in the attributes in between + + - + name: port-pci-pf-number + type: u16 + value: 127 + + # TODO: fill in the attributes in between + - name: trap-name type: string value: 130 + - + name: trap-action + type: u8 + enum: trap-action # TODO: fill in the attributes in between @@ -131,23 +581,68 @@ attribute-sets: # TODO: fill in the attributes in between - - name: trap-policer-id + name: netns-fd + type: u32 + value: 138 + - + name: netns-pid + type: u32 + - + name: netns-id type: u32 - value: 142 # TODO: fill in the attributes in between - - name: reload-action + name: health-reporter-auto-dump type: u8 - value: 153 + value: 141 + - + name: trap-policer-id + type: u32 + - + name: trap-policer-rate + type: u64 + - + name: trap-policer-burst + type: u64 + - + name: port-function + type: nest + nested-attributes: dl-port-function # TODO: fill in the attributes in between + - + name: port-controller-number + type: u32 + value: 150 + + # TODO: fill in the attributes in between + + - + name: flash-update-overwrite-mask + type: bitfield32 + enum: flash-overwrite + enum-as-flags: True + value: 152 + - + name: reload-action + type: u8 + enum: reload-action + - + name: reload-actions-performed + type: bitfield32 + enum: reload-action + enum-as-flags: True + - + name: reload-limits + type: bitfield32 + enum: reload-action + enum-as-flags: True - name: dev-stats type: nest - value: 156 nested-attributes: dl-dev-stats - name: reload-stats @@ -181,10 +676,26 @@ attribute-sets: # TODO: fill in the attributes in between + - + name: port-pci-sf-number + type: u32 + value: 164 + + # TODO: fill in the attributes in between + + - + name: rate-tx-share + type: u64 + value: 166 + - + name: rate-tx-max + type: u64 - name: rate-node-name type: string - value: 168 + - + name: rate-parent-node-name + type: string # TODO: fill in the attributes in between @@ -193,60 +704,329 @@ attribute-sets: type: u32 value: 171 + # TODO: fill in the attributes in between + + - + name: linecard-type + type: string + value: 173 + + # TODO: fill in the attributes in between + + - + name: selftests + type: nest + value: 176 + nested-attributes: dl-selftest-id + - + name: rate-tx-priority + type: u32 + - + name: rate-tx-weight + type: u32 + - + name: region-direct + type: flag + - name: dl-dev-stats subset-of: devlink attributes: - name: reload-stats - type: nest - name: remote-reload-stats - type: nest - name: dl-reload-stats subset-of: devlink attributes: - name: reload-action-info - type: nest - name: dl-reload-act-info subset-of: devlink attributes: - name: reload-action - type: u8 - name: reload-action-stats - type: nest - name: dl-reload-act-stats subset-of: devlink attributes: - name: reload-stats-entry - type: nest - name: dl-reload-stats-entry subset-of: devlink attributes: - name: reload-stats-limit + - + name: reload-stats-value + - + name: dl-info-version + subset-of: devlink + attributes: + - + name: info-version-name + - + name: info-version-value + - + name: dl-port-function + name-prefix: devlink-port-fn-attr- + attr-max-name: devlink-port-function-attr-max + attributes: + - + name-prefix: devlink-port-function-attr- + name: hw-addr + type: binary + value: 1 + - + name: state + type: u8 + enum: port-fn-state + - + name: opstate type: u8 + enum: port-fn-opstate + - + name: caps + type: bitfield32 + enum: port-fn-attr-cap + enum-as-flags: True + + - + name: dl-dpipe-tables + subset-of: devlink + attributes: + - + name: dpipe-table + + - + name: dl-dpipe-table + subset-of: devlink + attributes: + - + name: dpipe-table-name + - + name: dpipe-table-size + - + name: dpipe-table-name + - + name: dpipe-table-size + - + name: dpipe-table-matches + - + name: dpipe-table-actions + - + name: dpipe-table-counters-enabled + - + name: dpipe-table-resource-id + - + name: dpipe-table-resource-units + + - + name: dl-dpipe-table-matches + subset-of: devlink + attributes: + - + name: dpipe-match + + - + name: dl-dpipe-table-actions + subset-of: devlink + attributes: + - + name: dpipe-action + + - + name: dl-dpipe-entries + subset-of: devlink + attributes: + - + name: dpipe-entry + + - + name: dl-dpipe-entry + subset-of: devlink + attributes: + - + name: dpipe-entry-index + - + name: dpipe-entry-match-values + - + name: dpipe-entry-action-values + - + name: dpipe-entry-counter + + - + name: dl-dpipe-entry-match-values + subset-of: devlink + attributes: + - + name: dpipe-match-value + + - + name: dl-dpipe-entry-action-values + subset-of: devlink + attributes: + - + name: dpipe-action-value + + - + name: dl-dpipe-match + subset-of: devlink + attributes: + - + name: dpipe-match-type + - + name: dpipe-header-id + - + name: dpipe-header-global + - + name: dpipe-header-index + - + name: dpipe-field-id + + - + name: dl-dpipe-match-value + subset-of: devlink + attributes: + - + name: dpipe-match + - + name: dpipe-value + - + name: dpipe-value-mask + - + name: dpipe-value-mapping + + - + name: dl-dpipe-action + subset-of: devlink + attributes: + - + name: dpipe-action-type + - + name: dpipe-header-id + - + name: dpipe-header-global + - + name: dpipe-header-index + - + name: dpipe-field-id + + - + name: dl-dpipe-action-value + subset-of: devlink + attributes: + - + name: dpipe-action + - + name: dpipe-value + - + name: dpipe-value-mask + - + name: dpipe-value-mapping + + - + name: dl-dpipe-headers + subset-of: devlink + attributes: + - + name: dpipe-header + + - + name: dl-dpipe-header + subset-of: devlink + attributes: + - + name: dpipe-header-name + - + name: dpipe-header-id + - + name: dpipe-header-global + - + name: dpipe-header-fields + + - + name: dl-dpipe-header-fields + subset-of: devlink + attributes: + - + name: dpipe-field + + - + name: dl-dpipe-field + subset-of: devlink + attributes: + - + name: dpipe-field-name + - + name: dpipe-field-id + - + name: dpipe-field-bitwidth + - + name: dpipe-field-mapping-type + + - + name: dl-resource + subset-of: devlink + attributes: + # - + # name: resource-list + # This is currently unsupported due to circular dependency + - + name: resource-name + - + name: resource-id + - + name: resource-size + - + name: resource-size-new + - + name: resource-size-valid + - + name: resource-size-min + - + name: resource-size-max + - + name: resource-size-gran + - + name: resource-unit + - + name: resource-occ + + - + name: dl-resource-list + subset-of: devlink + attributes: + - + name: resource + + - + name: dl-fmsg + subset-of: devlink + attributes: + - + name: fmsg-obj-nest-start + - + name: fmsg-pair-nest-start + - + name: fmsg-arr-nest-start + - + name: fmsg-nest-end - - name: reload-stats-value - type: u32 + name: fmsg-obj-name + - - name: dl-info-version - subset-of: devlink + name: dl-selftest-id + name-prefix: devlink-attr-selftest-id- attributes: - - name: info-version-name - type: string - - - name: info-version-value - type: string + name: flash + type: flag operations: enum-model: directional @@ -255,10 +1035,7 @@ operations: name: get doc: Get devlink instances. attribute-set: devlink - dont-validate: - - strict - - dump - + dont-validate: [ strict, dump ] do: pre: devlink-nl-pre-doit post: devlink-nl-post-doit @@ -273,7 +1050,6 @@ operations: - bus-name - dev-name - reload-failed - - reload-action - dev-stats dump: reply: *get-reply @@ -282,9 +1058,7 @@ operations: name: port-get doc: Get devlink port instances. attribute-set: devlink - dont-validate: - - strict - + dont-validate: [ strict ] do: pre: devlink-nl-pre-doit-port post: devlink-nl-post-doit @@ -303,16 +1077,90 @@ operations: reply: value: 3 # due to a bug, port dump returns DEVLINK_CMD_NEW attributes: *port-id-attrs + - + name: port-set + doc: Set devlink port instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit-port + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - port-index + - port-type + - port-function + + - + name: port-new + doc: Create devlink port instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - port-index + - port-flavour + - port-pci-pf-number + - port-pci-sf-number + - port-controller-number + reply: + value: 7 + attributes: *port-id-attrs + + - + name: port-del + doc: Delete devlink port instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit-port + post: devlink-nl-post-doit + request: + attributes: *port-id-attrs + + - + name: port-split + doc: Split devlink port instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit-port + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - port-index + - port-split-count - # TODO: fill in the operations in between + - + name: port-unsplit + doc: Unplit devlink port instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit-port + post: devlink-nl-post-doit + request: + attributes: *port-id-attrs - name: sb-get doc: Get shared buffer instances. attribute-set: devlink - dont-validate: - - strict - + dont-validate: [ strict ] do: pre: devlink-nl-pre-doit post: devlink-nl-post-doit @@ -330,15 +1178,11 @@ operations: attributes: *dev-id-attrs reply: *sb-get-reply - # TODO: fill in the operations in between - - name: sb-pool-get doc: Get shared buffer pool instances. attribute-set: devlink - dont-validate: - - strict - + dont-validate: [ strict ] do: pre: devlink-nl-pre-doit post: devlink-nl-post-doit @@ -357,15 +1201,29 @@ operations: attributes: *dev-id-attrs reply: *sb-pool-get-reply - # TODO: fill in the operations in between + - + name: sb-pool-set + doc: Set shared buffer pool instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - sb-index + - sb-pool-index + - sb-pool-threshold-type + - sb-pool-size - name: sb-port-pool-get doc: Get shared buffer port-pool combinations and threshold. attribute-set: devlink - dont-validate: - - strict - + dont-validate: [ strict ] do: pre: devlink-nl-pre-doit-port post: devlink-nl-post-doit @@ -374,81 +1232,318 @@ operations: attributes: &sb-port-pool-id-attrs - bus-name - dev-name - - port-index - - sb-index - - sb-pool-index - reply: &sb-port-pool-get-reply - value: 21 - attributes: *sb-port-pool-id-attrs - dump: - request: - attributes: *dev-id-attrs - reply: *sb-port-pool-get-reply - - # TODO: fill in the operations in between + - port-index + - sb-index + - sb-pool-index + reply: &sb-port-pool-get-reply + value: 21 + attributes: *sb-port-pool-id-attrs + dump: + request: + attributes: *dev-id-attrs + reply: *sb-port-pool-get-reply + + - + name: sb-port-pool-set + doc: Set shared buffer port-pool combinations and threshold. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit-port + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - port-index + - sb-index + - sb-pool-index + - sb-threshold + + - + name: sb-tc-pool-bind-get + doc: Get shared buffer port-TC to pool bindings and threshold. + attribute-set: devlink + dont-validate: [ strict ] + do: + pre: devlink-nl-pre-doit-port + post: devlink-nl-post-doit + request: + value: 23 + attributes: &sb-tc-pool-bind-id-attrs + - bus-name + - dev-name + - port-index + - sb-index + - sb-pool-type + - sb-tc-index + reply: &sb-tc-pool-bind-get-reply + value: 25 + attributes: *sb-tc-pool-bind-id-attrs + dump: + request: + attributes: *dev-id-attrs + reply: *sb-tc-pool-bind-get-reply + + - + name: sb-tc-pool-bind-set + doc: Set shared buffer port-TC to pool bindings and threshold. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit-port + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - port-index + - sb-index + - sb-pool-index + - sb-pool-type + - sb-tc-index + - sb-threshold + + - + name: sb-occ-snapshot + doc: Take occupancy snapshot of shared buffer. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + value: 27 + attributes: + - bus-name + - dev-name + - sb-index + + - + name: sb-occ-max-clear + doc: Clear occupancy watermarks of shared buffer. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - sb-index + + - + name: eswitch-get + doc: Get eswitch attributes. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: *dev-id-attrs + reply: + value: 29 + attributes: &eswitch-attrs + - bus-name + - dev-name + - eswitch-mode + - eswitch-inline-mode + - eswitch-encap-mode + + - + name: eswitch-set + doc: Set eswitch attributes. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: *eswitch-attrs + + - + name: dpipe-table-get + doc: Get dpipe table attributes. + attribute-set: devlink + dont-validate: [ strict ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - dpipe-table-name + reply: + value: 31 + attributes: + - bus-name + - dev-name + - dpipe-tables + + - + name: dpipe-entries-get + doc: Get dpipe entries attributes. + attribute-set: devlink + dont-validate: [ strict ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - dpipe-table-name + reply: + attributes: + - bus-name + - dev-name + - dpipe-entries + + - + name: dpipe-headers-get + doc: Get dpipe headers attributes. + attribute-set: devlink + dont-validate: [ strict ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + reply: + attributes: + - bus-name + - dev-name + - dpipe-headers + + - + name: dpipe-table-counters-set + doc: Set dpipe counter attributes. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - dpipe-table-name + - dpipe-table-counters-enabled + + - + name: resource-set + doc: Set resource attributes. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - resource-id + - resource-size + + - + name: resource-dump + doc: Get resource attributes. + attribute-set: devlink + dont-validate: [ strict ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + reply: + value: 36 + attributes: + - bus-name + - dev-name + - resource-list - - name: sb-tc-pool-bind-get - doc: Get shared buffer port-TC to pool bindings and threshold. + name: reload + doc: Reload devlink. attribute-set: devlink - dont-validate: - - strict - + dont-validate: [ strict ] + flags: [ admin-perm ] do: - pre: devlink-nl-pre-doit-port + pre: devlink-nl-pre-doit post: devlink-nl-post-doit request: - value: 23 - attributes: &sb-tc-pool-bind-id-attrs + attributes: - bus-name - dev-name - - port-index - - sb-index - - sb-pool-type - - sb-tc-index - reply: &sb-tc-pool-bind-get-reply - value: 25 - attributes: *sb-tc-pool-bind-id-attrs - dump: - request: - attributes: *dev-id-attrs - reply: *sb-tc-pool-bind-get-reply - - # TODO: fill in the operations in between + - reload-action + - reload-limits + - netns-pid + - netns-fd + - netns-id + reply: + attributes: + - bus-name + - dev-name + - reload-actions-performed - name: param-get doc: Get param instances. attribute-set: devlink - dont-validate: - - strict - + dont-validate: [ strict ] do: pre: devlink-nl-pre-doit post: devlink-nl-post-doit request: - value: 38 attributes: ¶m-id-attrs - bus-name - dev-name - param-name reply: ¶m-get-reply - value: 38 attributes: *param-id-attrs dump: request: attributes: *dev-id-attrs reply: *param-get-reply - # TODO: fill in the operations in between + - + name: param-set + doc: Set param instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - param-name + - param-type + # param-value-data is missing here as the type is variable + - param-value-cmode - name: region-get doc: Get region instances. attribute-set: devlink - dont-validate: - - strict - + dont-validate: [ strict ] do: pre: devlink-nl-pre-doit-port-optional post: devlink-nl-post-doit @@ -467,16 +1562,97 @@ operations: attributes: *dev-id-attrs reply: *region-get-reply - # TODO: fill in the operations in between + - + name: region-new + doc: Create region snapshot. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit-port-optional + post: devlink-nl-post-doit + request: + value: 44 + attributes: ®ion-snapshot-id-attrs + - bus-name + - dev-name + - port-index + - region-name + - region-snapshot-id + reply: + value: 44 + attributes: *region-snapshot-id-attrs + + - + name: region-del + doc: Delete region snapshot. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit-port-optional + post: devlink-nl-post-doit + request: + attributes: *region-snapshot-id-attrs + + - + name: region-read + doc: Read region data. + attribute-set: devlink + dont-validate: [ dump-strict ] + flags: [ admin-perm ] + dump: + request: + attributes: + - bus-name + - dev-name + - port-index + - region-name + - region-snapshot-id + - region-direct + - region-chunk-addr + - region-chunk-len + reply: + value: 46 + attributes: + - bus-name + - dev-name + - port-index + - region-name + + - + name: port-param-get + doc: Get port param instances. + attribute-set: devlink + dont-validate: [ strict, dump-strict ] + do: + pre: devlink-nl-pre-doit-port + post: devlink-nl-post-doit + request: + attributes: *port-id-attrs + reply: + attributes: *port-id-attrs + dump: + reply: + attributes: *port-id-attrs + + - + name: port-param-set + doc: Set port param instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit-port + post: devlink-nl-post-doit + request: + attributes: *port-id-attrs - name: info-get doc: Get device information, like driver name, hardware and firmware versions etc. attribute-set: devlink - dont-validate: - - strict - - dump - + dont-validate: [ strict, dump ] do: pre: devlink-nl-pre-doit post: devlink-nl-post-doit @@ -500,9 +1676,7 @@ operations: name: health-reporter-get doc: Get health reporter instances. attribute-set: devlink - dont-validate: - - strict - + dont-validate: [ strict ] do: pre: devlink-nl-pre-doit-port-optional post: devlink-nl-post-doit @@ -519,15 +1693,97 @@ operations: attributes: *port-id-attrs reply: *health-reporter-get-reply - # TODO: fill in the operations in between + - + name: health-reporter-set + doc: Set health reporter instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit-port-optional + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - port-index + - health-reporter-name + - health-reporter-graceful-period + - health-reporter-auto-recover + - health-reporter-auto-dump + + - + name: health-reporter-recover + doc: Recover health reporter instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit-port-optional + post: devlink-nl-post-doit + request: + attributes: *health-reporter-id-attrs + + - + name: health-reporter-diagnose + doc: Diagnose health reporter instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit-port-optional + post: devlink-nl-post-doit + request: + attributes: *health-reporter-id-attrs + + - + name: health-reporter-dump-get + doc: Dump health reporter instances. + attribute-set: devlink + dont-validate: [ dump-strict ] + flags: [ admin-perm ] + dump: + request: + attributes: *health-reporter-id-attrs + reply: + value: 56 + attributes: + - fmsg + + - + name: health-reporter-dump-clear + doc: Clear dump of health reporter instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit-port-optional + post: devlink-nl-post-doit + request: + attributes: *health-reporter-id-attrs + + - + name: flash-update + doc: Flash update devlink instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - flash-update-file-name + - flash-update-component + - flash-update-overwrite-mask - name: trap-get doc: Get trap instances. attribute-set: devlink - dont-validate: - - strict - + dont-validate: [ strict ] do: pre: devlink-nl-pre-doit post: devlink-nl-post-doit @@ -545,15 +1801,27 @@ operations: attributes: *dev-id-attrs reply: *trap-get-reply - # TODO: fill in the operations in between + - + name: trap-set + doc: Set trap instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - trap-name + - trap-action - name: trap-group-get doc: Get trap group instances. attribute-set: devlink - dont-validate: - - strict - + dont-validate: [ strict ] do: pre: devlink-nl-pre-doit post: devlink-nl-post-doit @@ -571,15 +1839,28 @@ operations: attributes: *dev-id-attrs reply: *trap-group-get-reply - # TODO: fill in the operations in between + - + name: trap-group-set + doc: Set trap group instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - trap-group-name + - trap-action + - trap-policer-id - name: trap-policer-get doc: Get trap policer instances. attribute-set: devlink - dont-validate: - - strict - + dont-validate: [ strict ] do: pre: devlink-nl-pre-doit post: devlink-nl-post-doit @@ -597,15 +1878,41 @@ operations: attributes: *dev-id-attrs reply: *trap-policer-get-reply - # TODO: fill in the operations in between + - + name: trap-policer-set + doc: Get trap policer instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - trap-policer-id + - trap-policer-rate + - trap-policer-burst + + - + name: health-reporter-test + doc: Test health reporter instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit-port-optional + post: devlink-nl-post-doit + request: + value: 73 + attributes: *health-reporter-id-attrs - name: rate-get doc: Get rate instances. attribute-set: devlink - dont-validate: - - strict - + dont-validate: [ strict ] do: pre: devlink-nl-pre-doit post: devlink-nl-post-doit @@ -624,15 +1931,66 @@ operations: attributes: *dev-id-attrs reply: *rate-get-reply - # TODO: fill in the operations in between + - + name: rate-set + doc: Set rate instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - rate-node-name + - rate-tx-share + - rate-tx-max + - rate-tx-priority + - rate-tx-weight + - rate-parent-node-name + + - + name: rate-new + doc: Create rate instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - rate-node-name + - rate-tx-share + - rate-tx-max + - rate-tx-priority + - rate-tx-weight + - rate-parent-node-name + + - + name: rate-del + doc: Delete rate instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - rate-node-name - name: linecard-get doc: Get line card instances. attribute-set: devlink - dont-validate: - - strict - + dont-validate: [ strict ] do: pre: devlink-nl-pre-doit post: devlink-nl-post-doit @@ -650,16 +2008,27 @@ operations: attributes: *dev-id-attrs reply: *linecard-get-reply - # TODO: fill in the operations in between + - + name: linecard-set + doc: Set line card instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - linecard-index + - linecard-type - name: selftests-get doc: Get device selftest instances. attribute-set: devlink - dont-validate: - - strict - - dump - + dont-validate: [ strict, dump ] do: pre: devlink-nl-pre-doit post: devlink-nl-post-doit @@ -671,3 +2040,18 @@ operations: attributes: *dev-id-attrs dump: reply: *selftests-get-reply + + - + name: selftests-run + doc: Run device selftest instances. + attribute-set: devlink + dont-validate: [ strict ] + flags: [ admin-perm ] + do: + pre: devlink-nl-pre-doit + post: devlink-nl-post-doit + request: + attributes: + - bus-name + - dev-name + - selftests diff --git a/Documentation/netlink/specs/dpll.yaml b/Documentation/netlink/specs/dpll.yaml new file mode 100644 index 0000000000..2b4c4bcd83 --- /dev/null +++ b/Documentation/netlink/specs/dpll.yaml @@ -0,0 +1,506 @@ +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) + +name: dpll + +doc: DPLL subsystem. + +definitions: + - + type: enum + name: mode + doc: | + working modes a dpll can support, differentiates if and how dpll selects + one of its inputs to syntonize with it, valid values for DPLL_A_MODE + attribute + entries: + - + name: manual + doc: input can be only selected by sending a request to dpll + value: 1 + - + name: automatic + doc: highest prio input pin auto selected by dpll + render-max: true + - + type: enum + name: lock-status + doc: | + provides information of dpll device lock status, valid values for + DPLL_A_LOCK_STATUS attribute + entries: + - + name: unlocked + doc: | + dpll was not yet locked to any valid input (or forced by setting + DPLL_A_MODE to DPLL_MODE_DETACHED) + value: 1 + - + name: locked + doc: | + dpll is locked to a valid signal, but no holdover available + - + name: locked-ho-acq + doc: | + dpll is locked and holdover acquired + - + name: holdover + doc: | + dpll is in holdover state - lost a valid lock or was forced + by disconnecting all the pins (latter possible only + when dpll lock-state was already DPLL_LOCK_STATUS_LOCKED_HO_ACQ, + if dpll lock-state was not DPLL_LOCK_STATUS_LOCKED_HO_ACQ, the + dpll's lock-state shall remain DPLL_LOCK_STATUS_UNLOCKED) + render-max: true + - + type: const + name: temp-divider + value: 1000 + doc: | + temperature divider allowing userspace to calculate the + temperature as float with three digit decimal precision. + Value of (DPLL_A_TEMP / DPLL_TEMP_DIVIDER) is integer part of + temperature value. + Value of (DPLL_A_TEMP % DPLL_TEMP_DIVIDER) is fractional part of + temperature value. + - + type: enum + name: type + doc: type of dpll, valid values for DPLL_A_TYPE attribute + entries: + - + name: pps + doc: dpll produces Pulse-Per-Second signal + value: 1 + - + name: eec + doc: dpll drives the Ethernet Equipment Clock + render-max: true + - + type: enum + name: pin-type + doc: | + defines possible types of a pin, valid values for DPLL_A_PIN_TYPE + attribute + entries: + - + name: mux + doc: aggregates another layer of selectable pins + value: 1 + - + name: ext + doc: external input + - + name: synce-eth-port + doc: ethernet port PHY's recovered clock + - + name: int-oscillator + doc: device internal oscillator + - + name: gnss + doc: GNSS recovered clock + render-max: true + - + type: enum + name: pin-direction + doc: | + defines possible direction of a pin, valid values for + DPLL_A_PIN_DIRECTION attribute + entries: + - + name: input + doc: pin used as a input of a signal + value: 1 + - + name: output + doc: pin used to output the signal + render-max: true + - + type: const + name: pin-frequency-1-hz + value: 1 + - + type: const + name: pin-frequency-10-khz + value: 10000 + - + type: const + name: pin-frequency-77_5-khz + value: 77500 + - + type: const + name: pin-frequency-10-mhz + value: 10000000 + - + type: enum + name: pin-state + doc: | + defines possible states of a pin, valid values for + DPLL_A_PIN_STATE attribute + entries: + - + name: connected + doc: pin connected, active input of phase locked loop + value: 1 + - + name: disconnected + doc: pin disconnected, not considered as a valid input + - + name: selectable + doc: pin enabled for automatic input selection + render-max: true + - + type: flags + name: pin-capabilities + doc: | + defines possible capabilities of a pin, valid flags on + DPLL_A_PIN_CAPABILITIES attribute + entries: + - + name: direction-can-change + doc: pin direction can be changed + - + name: priority-can-change + doc: pin priority can be changed + - + name: state-can-change + doc: pin state can be changed + - + type: const + name: phase-offset-divider + value: 1000 + doc: | + phase offset divider allows userspace to calculate a value of + measured signal phase difference between a pin and dpll device + as a fractional value with three digit decimal precision. + Value of (DPLL_A_PHASE_OFFSET / DPLL_PHASE_OFFSET_DIVIDER) is an + integer part of a measured phase offset value. + Value of (DPLL_A_PHASE_OFFSET % DPLL_PHASE_OFFSET_DIVIDER) is a + fractional part of a measured phase offset value. + +attribute-sets: + - + name: dpll + enum-name: dpll_a + attributes: + - + name: id + type: u32 + - + name: module-name + type: string + - + name: pad + type: pad + - + name: clock-id + type: u64 + - + name: mode + type: u32 + enum: mode + - + name: mode-supported + type: u32 + enum: mode + multi-attr: true + - + name: lock-status + type: u32 + enum: lock-status + - + name: temp + type: s32 + - + name: type + type: u32 + enum: type + - + name: pin + enum-name: dpll_a_pin + attributes: + - + name: id + type: u32 + - + name: parent-id + type: u32 + - + name: module-name + type: string + - + name: pad + type: pad + - + name: clock-id + type: u64 + - + name: board-label + type: string + - + name: panel-label + type: string + - + name: package-label + type: string + - + name: type + type: u32 + enum: pin-type + - + name: direction + type: u32 + enum: pin-direction + - + name: frequency + type: u64 + - + name: frequency-supported + type: nest + multi-attr: true + nested-attributes: frequency-range + - + name: frequency-min + type: u64 + - + name: frequency-max + type: u64 + - + name: prio + type: u32 + - + name: state + type: u32 + enum: pin-state + - + name: capabilities + type: u32 + - + name: parent-device + type: nest + multi-attr: true + nested-attributes: pin-parent-device + - + name: parent-pin + type: nest + multi-attr: true + nested-attributes: pin-parent-pin + - + name: phase-adjust-min + type: s32 + - + name: phase-adjust-max + type: s32 + - + name: phase-adjust + type: s32 + - + name: phase-offset + type: s64 + - + name: pin-parent-device + subset-of: pin + attributes: + - + name: parent-id + - + name: direction + - + name: prio + - + name: state + - + name: phase-offset + - + name: pin-parent-pin + subset-of: pin + attributes: + - + name: parent-id + - + name: state + - + name: frequency-range + subset-of: pin + attributes: + - + name: frequency-min + - + name: frequency-max + +operations: + enum-name: dpll_cmd + list: + - + name: device-id-get + doc: | + Get id of dpll device that matches given attributes + attribute-set: dpll + flags: [ admin-perm ] + + do: + pre: dpll-lock-doit + post: dpll-unlock-doit + request: + attributes: + - module-name + - clock-id + - type + reply: + attributes: + - id + + - + name: device-get + doc: | + Get list of DPLL devices (dump) or attributes of a single dpll device + attribute-set: dpll + flags: [ admin-perm ] + + do: + pre: dpll-pre-doit + post: dpll-post-doit + request: + attributes: + - id + reply: &dev-attrs + attributes: + - id + - module-name + - mode + - mode-supported + - lock-status + - temp + - clock-id + - type + + dump: + reply: *dev-attrs + + - + name: device-set + doc: Set attributes for a DPLL device + attribute-set: dpll + flags: [ admin-perm ] + + do: + pre: dpll-pre-doit + post: dpll-post-doit + request: + attributes: + - id + - + name: device-create-ntf + doc: Notification about device appearing + notify: device-get + mcgrp: monitor + - + name: device-delete-ntf + doc: Notification about device disappearing + notify: device-get + mcgrp: monitor + - + name: device-change-ntf + doc: Notification about device configuration being changed + notify: device-get + mcgrp: monitor + - + name: pin-id-get + doc: | + Get id of a pin that matches given attributes + attribute-set: pin + flags: [ admin-perm ] + + do: + pre: dpll-lock-doit + post: dpll-unlock-doit + request: + attributes: + - module-name + - clock-id + - board-label + - panel-label + - package-label + - type + reply: + attributes: + - id + + - + name: pin-get + doc: | + Get list of pins and its attributes. + - dump request without any attributes given - list all the pins in the + system + - dump request with target dpll - list all the pins registered with + a given dpll device + - do request with target dpll and target pin - single pin attributes + attribute-set: pin + flags: [ admin-perm ] + + do: + pre: dpll-pin-pre-doit + post: dpll-pin-post-doit + request: + attributes: + - id + reply: &pin-attrs + attributes: + - id + - board-label + - panel-label + - package-label + - type + - frequency + - frequency-supported + - capabilities + - parent-device + - parent-pin + - phase-adjust-min + - phase-adjust-max + - phase-adjust + + dump: + request: + attributes: + - id + reply: *pin-attrs + + - + name: pin-set + doc: Set attributes of a target pin + attribute-set: pin + flags: [ admin-perm ] + + do: + pre: dpll-pin-pre-doit + post: dpll-pin-post-doit + request: + attributes: + - id + - frequency + - direction + - prio + - state + - parent-device + - parent-pin + - phase-adjust + - + name: pin-create-ntf + doc: Notification about pin appearing + notify: pin-get + mcgrp: monitor + - + name: pin-delete-ntf + doc: Notification about pin disappearing + notify: pin-get + mcgrp: monitor + - + name: pin-change-ntf + doc: Notification about pin configuration being changed + notify: pin-get + mcgrp: monitor + +mcast-groups: + list: + - + name: monitor diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 837b565577..5c7a65b009 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -818,13 +818,10 @@ attribute-sets: attributes: - name: hist-bkt-low - type: u32 - name: hist-bkt-hi - type: u32 - name: hist-val - type: u64 - name: stats attributes: diff --git a/Documentation/netlink/specs/handshake.yaml b/Documentation/netlink/specs/handshake.yaml index 6d89e30f5f..b934cc513e 100644 --- a/Documentation/netlink/specs/handshake.yaml +++ b/Documentation/netlink/specs/handshake.yaml @@ -34,16 +34,16 @@ attribute-sets: attributes: - name: cert - type: u32 + type: s32 - name: privkey - type: u32 + type: s32 - name: accept attributes: - name: sockfd - type: u32 + type: s32 - name: handler-class type: u32 @@ -79,7 +79,7 @@ attribute-sets: type: u32 - name: sockfd - type: u32 + type: s32 - name: remote-auth type: u32 diff --git a/Documentation/netlink/specs/mptcp.yaml b/Documentation/netlink/specs/mptcp.yaml new file mode 100644 index 0000000000..49f90cfb46 --- /dev/null +++ b/Documentation/netlink/specs/mptcp.yaml @@ -0,0 +1,393 @@ +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) + +name: mptcp_pm +protocol: genetlink-legacy +doc: Multipath TCP. + +c-family-name: mptcp-pm-name +c-version-name: mptcp-pm-ver +max-by-define: true +kernel-policy: per-op +cmd-cnt-name: --mptcp-pm-cmd-after-last + +definitions: + - + type: enum + name: event-type + enum-name: mptcp-event-type + name-prefix: mptcp-event- + entries: + - + name: unspec + doc: unused event + - + name: created + doc: + token, family, saddr4 | saddr6, daddr4 | daddr6, sport, dport + A new MPTCP connection has been created. It is the good time to + allocate memory and send ADD_ADDR if needed. Depending on the + traffic-patterns it can take a long time until the + MPTCP_EVENT_ESTABLISHED is sent. + - + name: established + doc: + token, family, saddr4 | saddr6, daddr4 | daddr6, sport, dport + A MPTCP connection is established (can start new subflows). + - + name: closed + doc: + token + A MPTCP connection has stopped. + - + name: announced + value: 6 + doc: + token, rem_id, family, daddr4 | daddr6 [, dport] + A new address has been announced by the peer. + - + name: removed + doc: + token, rem_id + An address has been lost by the peer. + - + name: sub-established + value: 10 + doc: + token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | daddr6, sport, + dport, backup, if_idx [, error] + A new subflow has been established. 'error' should not be set. + - + name: sub-closed + doc: + token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | daddr6, sport, + dport, backup, if_idx [, error] + A subflow has been closed. An error (copy of sk_err) could be set if an + error has been detected for this subflow. + - + name: sub-priority + value: 13 + doc: + token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | daddr6, sport, + dport, backup, if_idx [, error] + The priority of a subflow has changed. 'error' should not be set. + - + name: listener-created + value: 15 + doc: + family, sport, saddr4 | saddr6 + A new PM listener is created. + - + name: listener-closed + doc: + family, sport, saddr4 | saddr6 + A PM listener is closed. + +attribute-sets: + - + name: address + name-prefix: mptcp-pm-addr-attr- + attributes: + - + name: unspec + type: unused + value: 0 + - + name: family + type: u16 + - + name: id + type: u8 + - + name: addr4 + type: u32 + byte-order: big-endian + - + name: addr6 + type: binary + checks: + exact-len: 16 + - + name: port + type: u16 + byte-order: big-endian + - + name: flags + type: u32 + - + name: if-idx + type: s32 + - + name: subflow-attribute + name-prefix: mptcp-subflow-attr- + attributes: + - + name: unspec + type: unused + value: 0 + - + name: token-rem + type: u32 + - + name: token-loc + type: u32 + - + name: relwrite-seq + type: u32 + - + name: map-seq + type: u64 + - + name: map-sfseq + type: u32 + - + name: ssn-offset + type: u32 + - + name: map-datalen + type: u16 + - + name: flags + type: u32 + - + name: id-rem + type: u8 + - + name: id-loc + type: u8 + - + name: pad + type: pad + - + name: endpoint + name-prefix: mptcp-pm-endpoint- + attributes: + - + name: addr + type: nest + nested-attributes: address + - + name: attr + name-prefix: mptcp-pm-attr- + attr-cnt-name: --mptcp-attr-after-last + attributes: + - + name: unspec + type: unused + value: 0 + - + name: addr + type: nest + nested-attributes: address + - + name: rcv-add-addrs + type: u32 + - + name: subflows + type: u32 + - + name: token + type: u32 + - + name: loc-id + type: u8 + - + name: addr-remote + type: nest + nested-attributes: address + - + name: event-attr + enum-name: mptcp-event-attr + name-prefix: mptcp-attr- + attributes: + - + name: unspec + type: unused + value: 0 + - + name: token + type: u32 + - + name: family + type: u16 + - + name: loc-id + type: u8 + - + name: rem-id + type: u8 + - + name: saddr4 + type: u32 + byte-order: big-endian + - + name: saddr6 + type: binary + checks: + min-len: 16 + - + name: daddr4 + type: u32 + byte-order: big-endian + - + name: daddr6 + type: binary + checks: + min-len: 16 + - + name: sport + type: u16 + byte-order: big-endian + - + name: dport + type: u16 + byte-order: big-endian + - + name: backup + type: u8 + - + name: error + type: u8 + - + name: flags + type: u16 + - + name: timeout + type: u32 + - + name: if_idx + type: u32 + - + name: reset-reason + type: u32 + - + name: reset-flags + type: u32 + - + name: server-side + type: u8 + +operations: + list: + - + name: unspec + doc: unused + value: 0 + - + name: add-addr + doc: Add endpoint + attribute-set: endpoint + dont-validate: [ strict ] + flags: [ uns-admin-perm ] + do: &add-addr-attrs + request: + attributes: + - addr + - + name: del-addr + doc: Delete endpoint + attribute-set: endpoint + dont-validate: [ strict ] + flags: [ uns-admin-perm ] + do: *add-addr-attrs + - + name: get-addr + doc: Get endpoint information + attribute-set: endpoint + dont-validate: [ strict ] + flags: [ uns-admin-perm ] + do: &get-addr-attrs + request: + attributes: + - addr + reply: + attributes: + - addr + dump: + reply: + attributes: + - addr + - + name: flush-addrs + doc: flush addresses + attribute-set: endpoint + dont-validate: [ strict ] + flags: [ uns-admin-perm ] + do: *add-addr-attrs + - + name: set-limits + doc: Set protocol limits + attribute-set: attr + dont-validate: [ strict ] + flags: [ uns-admin-perm ] + do: &mptcp-limits + request: + attributes: + - rcv-add-addrs + - subflows + - + name: get-limits + doc: Get protocol limits + attribute-set: attr + dont-validate: [ strict ] + do: &mptcp-get-limits + request: + attributes: + - rcv-add-addrs + - subflows + reply: + attributes: + - rcv-add-addrs + - subflows + - + name: set-flags + doc: Change endpoint flags + attribute-set: attr + dont-validate: [ strict ] + flags: [ uns-admin-perm ] + do: &mptcp-set-flags + request: + attributes: + - addr + - token + - addr-remote + - + name: announce + doc: announce new sf + attribute-set: attr + dont-validate: [ strict ] + flags: [ uns-admin-perm ] + do: &announce-add + request: + attributes: + - addr + - token + - + name: remove + doc: announce removal + attribute-set: attr + dont-validate: [ strict ] + flags: [ uns-admin-perm ] + do: + request: + attributes: + - token + - loc-id + - + name: subflow-create + doc: todo + attribute-set: attr + dont-validate: [ strict ] + flags: [ uns-admin-perm ] + do: &sf-create + request: + attributes: + - addr + - token + - addr-remote + - + name: subflow-destroy + doc: todo + attribute-set: attr + dont-validate: [ strict ] + flags: [ uns-admin-perm ] + do: *sf-create diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 1c7284fd53..14511b13f3 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -42,6 +42,19 @@ definitions: doc: This feature informs if netdev implements non-linear XDP buffer support in ndo_xdp_xmit callback. + - + type: flags + name: xdp-rx-metadata + render-max: true + entries: + - + name: timestamp + doc: + Device is capable of exposing receive HW timestamp via bpf_xdp_metadata_rx_timestamp(). + - + name: hash + doc: + Device is capable of exposing receive packet hash via bpf_xdp_metadata_rx_hash(). attribute-sets: - @@ -61,13 +74,18 @@ attribute-sets: doc: Bitmask of enabled xdp-features. type: u64 enum: xdp-act - enum-as-flags: true - name: xdp-zc-max-segs doc: max fragment count supported by ZC driver type: u32 checks: min: 1 + - + name: xdp-rx-metadata-features + doc: Bitmask of supported XDP receive metadata features. + See Documentation/networking/xdp-rx-metadata.rst for more details. + type: u64 + enum: xdp-rx-metadata operations: list: @@ -84,6 +102,7 @@ operations: - ifindex - xdp-features - xdp-zc-max-segs + - xdp-rx-metadata-features dump: reply: *dev-all - diff --git a/Documentation/netlink/specs/nfsd.yaml b/Documentation/netlink/specs/nfsd.yaml new file mode 100644 index 0000000000..05acc73e2e --- /dev/null +++ b/Documentation/netlink/specs/nfsd.yaml @@ -0,0 +1,89 @@ +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) + +name: nfsd +protocol: genetlink +uapi-header: linux/nfsd_netlink.h + +doc: NFSD configuration over generic netlink. + +attribute-sets: + - + name: rpc-status + attributes: + - + name: xid + type: u32 + byte-order: big-endian + - + name: flags + type: u32 + - + name: prog + type: u32 + - + name: version + type: u8 + - + name: proc + type: u32 + - + name: service_time + type: s64 + - + name: pad + type: pad + - + name: saddr4 + type: u32 + byte-order: big-endian + display-hint: ipv4 + - + name: daddr4 + type: u32 + byte-order: big-endian + display-hint: ipv4 + - + name: saddr6 + type: binary + display-hint: ipv6 + - + name: daddr6 + type: binary + display-hint: ipv6 + - + name: sport + type: u16 + byte-order: big-endian + - + name: dport + type: u16 + byte-order: big-endian + - + name: compound-ops + type: u32 + multi-attr: true + +operations: + list: + - + name: rpc-status-get + doc: dump pending nfsd rpc + attribute-set: rpc-status + dump: + pre: nfsd-nl-rpc-status-get-start + post: nfsd-nl-rpc-status-get-done + reply: + attributes: + - xid + - flags + - prog + - version + - proc + - service_time + - saddr4 + - daddr4 + - saddr6 + - daddr6 + - sport + - dport + - compound-ops diff --git a/Documentation/networking/device_drivers/appletalk/cops.rst b/Documentation/networking/device_drivers/appletalk/cops.rst deleted file mode 100644 index 964ba80599..0000000000 --- a/Documentation/networking/device_drivers/appletalk/cops.rst +++ /dev/null @@ -1,80 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -======================================== -The COPS LocalTalk Linux driver (cops.c) -======================================== - -By Jay Schulist - -This driver has two modes and they are: Dayna mode and Tangent mode. -Each mode corresponds with the type of card. It has been found -that there are 2 main types of cards and all other cards are -the same and just have different names or only have minor differences -such as more IO ports. As this driver is tested it will -become more clear exactly what cards are supported. - -Right now these cards are known to work with the COPS driver. The -LT-200 cards work in a somewhat more limited capacity than the -DL200 cards, which work very well and are in use by many people. - -TANGENT driver mode: - - Tangent ATB-II, Novell NL-1000, Daystar Digital LT-200 - -DAYNA driver mode: - - Dayna DL2000/DaynaTalk PC (Half Length), COPS LT-95, - - Farallon PhoneNET PC III, Farallon PhoneNET PC II - -Other cards possibly supported mode unknown though: - - Dayna DL2000 (Full length) - -The COPS driver defaults to using Dayna mode. To change the driver's -mode if you built a driver with dual support use board_type=1 or -board_type=2 for Dayna or Tangent with insmod. - -Operation/loading of the driver -=============================== - -Use modprobe like this: /sbin/modprobe cops.o (IO #) (IRQ #) -If you do not specify any options the driver will try and use the IO = 0x240, -IRQ = 5. As of right now I would only use IRQ 5 for the card, if autoprobing. - -To load multiple COPS driver Localtalk cards you can do one of the following:: - - insmod cops io=0x240 irq=5 - insmod -o cops2 cops io=0x260 irq=3 - -Or in lilo.conf put something like this:: - - append="ether=5,0x240,lt0 ether=3,0x260,lt1" - -Then bring up the interface with ifconfig. It will look something like this:: - - lt0 Link encap:UNSPEC HWaddr 00-00-00-00-00-00-00-F7-00-00-00-00-00-00-00-00 - inet addr:192.168.1.2 Bcast:192.168.1.255 Mask:255.255.255.0 - UP BROADCAST RUNNING NOARP MULTICAST MTU:600 Metric:1 - RX packets:0 errors:0 dropped:0 overruns:0 frame:0 - TX packets:0 errors:0 dropped:0 overruns:0 carrier:0 coll:0 - -Netatalk Configuration -====================== - -You will need to configure atalkd with something like the following to make -it work with the cops.c driver. - -* For single LTalk card use:: - - dummy -seed -phase 2 -net 2000 -addr 2000.10 -zone "1033" - lt0 -seed -phase 1 -net 1000 -addr 1000.50 -zone "1033" - -* For multiple cards, Ethernet and LocalTalk:: - - eth0 -seed -phase 2 -net 3000 -addr 3000.20 -zone "1033" - lt0 -seed -phase 1 -net 1000 -addr 1000.50 -zone "1033" - -* For multiple LocalTalk cards, and an Ethernet card. - -* Order seems to matter here, Ethernet last:: - - lt0 -seed -phase 1 -net 1000 -addr 1000.10 -zone "LocalTalk1" - lt1 -seed -phase 1 -net 2000 -addr 2000.20 -zone "LocalTalk2" - eth0 -seed -phase 2 -net 3000 -addr 3000.30 -zone "EtherTalk" diff --git a/Documentation/networking/device_drivers/appletalk/index.rst b/Documentation/networking/device_drivers/appletalk/index.rst deleted file mode 100644 index c196baeb08..0000000000 --- a/Documentation/networking/device_drivers/appletalk/index.rst +++ /dev/null @@ -1,18 +0,0 @@ -.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) - -AppleTalk Device Drivers -======================== - -Contents: - -.. toctree:: - :maxdepth: 2 - - cops - -.. only:: subproject and html - - Indices - ======= - - * :ref:`genindex` diff --git a/Documentation/networking/device_drivers/ethernet/index.rst b/Documentation/networking/device_drivers/ethernet/index.rst index 9827e81608..43de285b8a 100644 --- a/Documentation/networking/device_drivers/ethernet/index.rst +++ b/Documentation/networking/device_drivers/ethernet/index.rst @@ -32,6 +32,7 @@ Contents: intel/e1000 intel/e1000e intel/fm10k + intel/idpf intel/igb intel/igbvf intel/ixgbe diff --git a/Documentation/networking/device_drivers/ethernet/intel/idpf.rst b/Documentation/networking/device_drivers/ethernet/intel/idpf.rst new file mode 100644 index 0000000000..adb16e2abd --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/intel/idpf.rst @@ -0,0 +1,160 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +========================================================================== +idpf Linux* Base Driver for the Intel(R) Infrastructure Data Path Function +========================================================================== + +Intel idpf Linux driver. +Copyright(C) 2023 Intel Corporation. + +.. contents:: + +The idpf driver serves as both the Physical Function (PF) and Virtual Function +(VF) driver for the Intel(R) Infrastructure Data Path Function. + +Driver information can be obtained using ethtool, lspci, and ip. + +For questions related to hardware requirements, refer to the documentation +supplied with your Intel adapter. All hardware requirements listed apply to use +with Linux. + + +Identifying Your Adapter +======================== +For information on how to identify your adapter, and for the latest Intel +network drivers, refer to the Intel Support website: +http://www.intel.com/support + + +Additional Features and Configurations +====================================== + +ethtool +------- +The driver utilizes the ethtool interface for driver configuration and +diagnostics, as well as displaying statistical information. The latest ethtool +version is required for this functionality. If you don't have one yet, you can +obtain it at: +https://kernel.org/pub/software/network/ethtool/ + + +Viewing Link Messages +--------------------- +Link messages will not be displayed to the console if the distribution is +restricting system messages. In order to see network driver link messages on +your console, set dmesg to eight by entering the following:: + + # dmesg -n 8 + +.. note:: + This setting is not saved across reboots. + + +Jumbo Frames +------------ +Jumbo Frames support is enabled by changing the Maximum Transmission Unit (MTU) +to a value larger than the default value of 1500. + +Use the ip command to increase the MTU size. For example, enter the following +where is the interface number:: + + # ip link set mtu 9000 dev + # ip link set up dev + +.. note:: + The maximum MTU setting for jumbo frames is 9706. This corresponds to the + maximum jumbo frame size of 9728 bytes. + +.. note:: + This driver will attempt to use multiple page sized buffers to receive + each jumbo packet. This should help to avoid buffer starvation issues when + allocating receive packets. + +.. note:: + Packet loss may have a greater impact on throughput when you use jumbo + frames. If you observe a drop in performance after enabling jumbo frames, + enabling flow control may mitigate the issue. + + +Performance Optimization +======================== +Driver defaults are meant to fit a wide variety of workloads, but if further +optimization is required, we recommend experimenting with the following +settings. + + +Interrupt Rate Limiting +----------------------- +This driver supports an adaptive interrupt throttle rate (ITR) mechanism that +is tuned for general workloads. The user can customize the interrupt rate +control for specific workloads, via ethtool, adjusting the number of +microseconds between interrupts. + +To set the interrupt rate manually, you must disable adaptive mode:: + + # ethtool -C adaptive-rx off adaptive-tx off + +For lower CPU utilization: + - Disable adaptive ITR and lower Rx and Tx interrupts. The examples below + affect every queue of the specified interface. + + - Setting rx-usecs and tx-usecs to 80 will limit interrupts to about + 12,500 interrupts per second per queue:: + + # ethtool -C adaptive-rx off adaptive-tx off rx-usecs 80 + tx-usecs 80 + +For reduced latency: + - Disable adaptive ITR and ITR by setting rx-usecs and tx-usecs to 0 + using ethtool:: + + # ethtool -C adaptive-rx off adaptive-tx off rx-usecs 0 + tx-usecs 0 + +Per-queue interrupt rate settings: + - The following examples are for queues 1 and 3, but you can adjust other + queues. + + - To disable Rx adaptive ITR and set static Rx ITR to 10 microseconds or + about 100,000 interrupts/second, for queues 1 and 3:: + + # ethtool --per-queue queue_mask 0xa --coalesce adaptive-rx off + rx-usecs 10 + + - To show the current coalesce settings for queues 1 and 3:: + + # ethtool --per-queue queue_mask 0xa --show-coalesce + + + +Virtualized Environments +------------------------ +In addition to the other suggestions in this section, the following may be +helpful to optimize performance in VMs. + + - Using the appropriate mechanism (vcpupin) in the VM, pin the CPUs to + individual LCPUs, making sure to use a set of CPUs included in the + device's local_cpulist: /sys/class/net//device/local_cpulist. + + - Configure as many Rx/Tx queues in the VM as available. (See the idpf driver + documentation for the number of queues supported.) For example:: + + # ethtool -L rx tx + + +Support +======= +For general information, go to the Intel support website at: +http://www.intel.com/support/ + +If an issue is identified with the released source code on a supported kernel +with a supported adapter, email the specific information related to the issue +to intel-wired-lan@lists.osuosl.org. + + +Trademarks +========== +Intel is a trademark or registered trademark of Intel Corporation or its +subsidiaries in the United States and/or other countries. + +* Other names and brands may be claimed as the property of others. diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/kconfig.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/kconfig.rst index 0a42c3395f..20d3b7e870 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/kconfig.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/kconfig.rst @@ -67,7 +67,7 @@ Enabling the driver and kconfig options | Enables :ref:`IPSec XFRM cryptography-offload acceleration `. -**CONFIG_MLX5_EN_MACSEC=(y/n)** +**CONFIG_MLX5_MACSEC=(y/n)** | Build support for MACsec cryptography-offload acceleration in the NIC. diff --git a/Documentation/networking/device_drivers/ethernet/neterion/s2io.rst b/Documentation/networking/device_drivers/ethernet/neterion/s2io.rst index c5673ec455..d731b5a985 100644 --- a/Documentation/networking/device_drivers/ethernet/neterion/s2io.rst +++ b/Documentation/networking/device_drivers/ethernet/neterion/s2io.rst @@ -64,8 +64,8 @@ c. Multi-buffer receive mode. Scattering of packet across multiple IBM xSeries). d. MSI/MSI-X. Can be enabled on platforms which support this feature - (IA64, Xeon) resulting in noticeable performance improvement(up to 7% - on certain platforms). + resulting in noticeable performance improvement (up to 7% on certain + platforms). e. Statistics. Comprehensive MAC-level and software statistics displayed using "ethtool -S" option. diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst index 601eacaf12..0dd30a84ce 100644 --- a/Documentation/networking/device_drivers/index.rst +++ b/Documentation/networking/device_drivers/index.rst @@ -8,7 +8,6 @@ Contents: .. toctree:: :maxdepth: 2 - appletalk/index atm/index cable/index can/index @@ -16,7 +15,6 @@ Contents: ethernet/index fddi/index hamradio/index - qlogic/index wifi/index wwan/index diff --git a/Documentation/networking/device_drivers/qlogic/index.rst b/Documentation/networking/device_drivers/qlogic/index.rst deleted file mode 100644 index ad05b04286..0000000000 --- a/Documentation/networking/device_drivers/qlogic/index.rst +++ /dev/null @@ -1,18 +0,0 @@ -.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) - -QLogic QLGE Device Drivers -=============================================== - -Contents: - -.. toctree:: - :maxdepth: 2 - - qlge - -.. only:: subproject and html - - Indices - ======= - - * :ref:`genindex` diff --git a/Documentation/networking/device_drivers/qlogic/qlge.rst b/Documentation/networking/device_drivers/qlogic/qlge.rst deleted file mode 100644 index 0b888253d1..0000000000 --- a/Documentation/networking/device_drivers/qlogic/qlge.rst +++ /dev/null @@ -1,118 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -======================================= -QLogic QLGE 10Gb Ethernet device driver -======================================= - -This driver use drgn and devlink for debugging. - -Dump kernel data structures in drgn ------------------------------------ - -To dump kernel data structures, the following Python script can be used -in drgn: - -.. code-block:: python - - def align(x, a): - """the alignment a should be a power of 2 - """ - mask = a - 1 - return (x+ mask) & ~mask - - def struct_size(struct_type): - struct_str = "struct {}".format(struct_type) - return sizeof(Object(prog, struct_str, address=0x0)) - - def netdev_priv(netdevice): - NETDEV_ALIGN = 32 - return netdevice.value_() + align(struct_size("net_device"), NETDEV_ALIGN) - - name = 'xxx' - qlge_device = None - netdevices = prog['init_net'].dev_base_head.address_of_() - for netdevice in list_for_each_entry("struct net_device", netdevices, "dev_list"): - if netdevice.name.string_().decode('ascii') == name: - print(netdevice.name) - - ql_adapter = Object(prog, "struct ql_adapter", address=netdev_priv(qlge_device)) - -The struct ql_adapter will be printed in drgn as follows, - - >>> ql_adapter - (struct ql_adapter){ - .ricb = (struct ricb){ - .base_cq = (u8)0, - .flags = (u8)120, - .mask = (__le16)26637, - .hash_cq_id = (u8 [1024]){ 172, 142, 255, 255 }, - .ipv6_hash_key = (__le32 [10]){}, - .ipv4_hash_key = (__le32 [4]){}, - }, - .flags = (unsigned long)0, - .wol = (u32)0, - .nic_stats = (struct nic_stats){ - .tx_pkts = (u64)0, - .tx_bytes = (u64)0, - .tx_mcast_pkts = (u64)0, - .tx_bcast_pkts = (u64)0, - .tx_ucast_pkts = (u64)0, - .tx_ctl_pkts = (u64)0, - .tx_pause_pkts = (u64)0, - ... - }, - .active_vlans = (unsigned long [64]){ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52780853100545, 18446744073709551615, - 18446619461681283072, 0, 42949673024, 2147483647, - }, - .rx_ring = (struct rx_ring [17]){ - { - .cqicb = (struct cqicb){ - .msix_vect = (u8)0, - .reserved1 = (u8)0, - .reserved2 = (u8)0, - .flags = (u8)0, - .len = (__le16)0, - .rid = (__le16)0, - ... - }, - .cq_base = (void *)0x0, - .cq_base_dma = (dma_addr_t)0, - } - ... - } - } - -coredump via devlink --------------------- - - -And the coredump obtained via devlink in json format looks like, - -.. code:: shell - - $ devlink health dump show DEVICE reporter coredump -p -j - { - "Core Registers": { - "segment": 1, - "values": [ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ] - }, - "Test Logic Regs": { - "segment": 2, - "values": [ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ] - }, - "RMII Registers": { - "segment": 3, - "values": [ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ] - }, - ... - "Sem Registers": { - "segment": 50, - "values": [ 0,0,0,0 ] - } - } - -When the module parameter qlge_force_coredump is set to be true, the MPI -RISC reset before coredumping. So coredumping will much longer since -devlink tool has to wait for 5 secs for the resetting to be -finished. diff --git a/Documentation/networking/devlink/devlink-port.rst b/Documentation/networking/devlink/devlink-port.rst index e33ad2401a..562f46b412 100644 --- a/Documentation/networking/devlink/devlink-port.rst +++ b/Documentation/networking/devlink/devlink-port.rst @@ -126,7 +126,7 @@ Users may also set the RoCE capability of the function using `devlink port function set roce` command. Users may also set the function as migratable using -'devlink port function set migratable' command. +`devlink port function set migratable` command. Users may also set the IPsec crypto capability of the function using `devlink port function set ipsec_crypto` command. diff --git a/Documentation/networking/devlink/i40e.rst b/Documentation/networking/devlink/i40e.rst new file mode 100644 index 0000000000..d3cb5bb519 --- /dev/null +++ b/Documentation/networking/devlink/i40e.rst @@ -0,0 +1,59 @@ +.. SPDX-License-Identifier: GPL-2.0 + +==================== +i40e devlink support +==================== + +This document describes the devlink features implemented by the ``i40e`` +device driver. + +Info versions +============= + +The ``i40e`` driver reports the following versions + +.. list-table:: devlink info versions implemented + :widths: 5 5 5 90 + + * - Name + - Type + - Example + - Description + * - ``board.id`` + - fixed + - K15190-000 + - The Product Board Assembly (PBA) identifier of the board. + * - ``fw.mgmt`` + - running + - 9.130 + - 2-digit version number of the management firmware that controls the + PHY, link, etc. + * - ``fw.mgmt.api`` + - running + - 1.15 + - 2-digit version number of the API exported over the AdminQ by the + management firmware. Used by the driver to identify what commands + are supported. + * - ``fw.mgmt.build`` + - running + - 73618 + - Build number of the source for the management firmware. + * - ``fw.undi`` + - running + - 1.3429.0 + - Version of the Option ROM containing the UEFI driver. The version is + reported in ``major.minor.patch`` format. The major version is + incremented whenever a major breaking change occurs, or when the + minor version would overflow. The minor version is incremented for + non-breaking changes and reset to 1 when the major version is + incremented. The patch version is normally 0 but is incremented when + a fix is delivered as a patch against an older base Option ROM. + * - ``fw.psid.api`` + - running + - 9.30 + - Version defining the format of the flash contents. + * - ``fw.bundle_id`` + - running + - 0x8000e5f3 + - Unique identifier of the firmware image file that was loaded onto + the device. Also referred to as the EETRACK identifier of the NVM. diff --git a/Documentation/networking/devlink/index.rst b/Documentation/networking/devlink/index.rst index b49749e2b9..e14d7a701b 100644 --- a/Documentation/networking/devlink/index.rst +++ b/Documentation/networking/devlink/index.rst @@ -18,6 +18,34 @@ netlink commands. Drivers are encouraged to use the devlink instance lock for their own needs. +Drivers need to be cautious when taking devlink instance lock and +taking RTNL lock at the same time. Devlink instance lock needs to be taken +first, only after that RTNL lock could be taken. + +Nested instances +---------------- + +Some objects, like linecards or port functions, could have another +devlink instances created underneath. In that case, drivers should make +sure to respect following rules: + + - Lock ordering should be maintained. If driver needs to take instance + lock of both nested and parent instances at the same time, devlink + instance lock of the parent instance should be taken first, only then + instance lock of the nested instance could be taken. + - Driver should use object-specific helpers to setup the + nested relationship: + + - ``devl_nested_devlink_set()`` - called to setup devlink -> nested + devlink relationship (could be user for multiple nested instances. + - ``devl_port_fn_devlink_set()`` - called to setup port function -> + nested devlink relationship. + - ``devlink_linecard_nested_dl_set()`` - called to setup linecard -> + nested devlink relationship. + +The nested devlink info is exposed to the userspace over object-specific +attributes of devlink netlink. + Interface documentation ----------------------- @@ -52,6 +80,7 @@ parameters, info versions, and other features it supports. bnxt etas_es58x hns3 + i40e ionic ice mlx4 diff --git a/Documentation/networking/dsa/b53.rst b/Documentation/networking/dsa/b53.rst index b41637cdb8..1cb3ff648f 100644 --- a/Documentation/networking/dsa/b53.rst +++ b/Documentation/networking/dsa/b53.rst @@ -52,7 +52,7 @@ VLAN programming would basically change the CPU port's default PVID and make it untagged, undesirable. In difference to the configuration described in :ref:`dsa-vlan-configuration` -the default VLAN 1 has to be removed from the slave interface configuration in +the default VLAN 1 has to be removed from the user interface configuration in single port and gateway configuration, while there is no need to add an extra VLAN configuration in the bridge showcase. @@ -68,13 +68,13 @@ By default packages are tagged with vid 1: ip link add link eth0 name eth0.2 type vlan id 2 ip link add link eth0 name eth0.3 type vlan id 3 - # The master interface needs to be brought up before the slave ports. + # The conduit interface needs to be brought up before the user ports. ip link set eth0 up ip link set eth0.1 up ip link set eth0.2 up ip link set eth0.3 up - # bring up the slave interfaces + # bring up the user interfaces ip link set wan up ip link set lan1 up ip link set lan2 up @@ -113,11 +113,11 @@ bridge # tag traffic on CPU port ip link add link eth0 name eth0.1 type vlan id 1 - # The master interface needs to be brought up before the slave ports. + # The conduit interface needs to be brought up before the user ports. ip link set eth0 up ip link set eth0.1 up - # bring up the slave interfaces + # bring up the user interfaces ip link set wan up ip link set lan1 up ip link set lan2 up @@ -149,12 +149,12 @@ gateway ip link add link eth0 name eth0.1 type vlan id 1 ip link add link eth0 name eth0.2 type vlan id 2 - # The master interface needs to be brought up before the slave ports. + # The conduit interface needs to be brought up before the user ports. ip link set eth0 up ip link set eth0.1 up ip link set eth0.2 up - # bring up the slave interfaces + # bring up the user interfaces ip link set wan up ip link set lan1 up ip link set lan2 up diff --git a/Documentation/networking/dsa/bcm_sf2.rst b/Documentation/networking/dsa/bcm_sf2.rst index dee234039e..d257143569 100644 --- a/Documentation/networking/dsa/bcm_sf2.rst +++ b/Documentation/networking/dsa/bcm_sf2.rst @@ -67,7 +67,7 @@ MDIO indirect accesses ---------------------- Due to a limitation in how Broadcom switches have been designed, external -Broadcom switches connected to a SF2 require the use of the DSA slave MDIO bus +Broadcom switches connected to a SF2 require the use of the DSA user MDIO bus in order to properly configure them. By default, the SF2 pseudo-PHY address, and an external switch pseudo-PHY address will both be snooping for incoming MDIO transactions, since they are at the same address (30), resulting in some kind of diff --git a/Documentation/networking/dsa/configuration.rst b/Documentation/networking/dsa/configuration.rst index d2934c40f0..6cc4ded3cc 100644 --- a/Documentation/networking/dsa/configuration.rst +++ b/Documentation/networking/dsa/configuration.rst @@ -31,38 +31,38 @@ at https://www.kernel.org/pub/linux/utils/net/iproute2/ Through DSA every port of a switch is handled like a normal linux Ethernet interface. The CPU port is the switch port connected to an Ethernet MAC chip. -The corresponding linux Ethernet interface is called the master interface. -All other corresponding linux interfaces are called slave interfaces. +The corresponding linux Ethernet interface is called the conduit interface. +All other corresponding linux interfaces are called user interfaces. -The slave interfaces depend on the master interface being up in order for them -to send or receive traffic. Prior to kernel v5.12, the state of the master +The user interfaces depend on the conduit interface being up in order for them +to send or receive traffic. Prior to kernel v5.12, the state of the conduit interface had to be managed explicitly by the user. Starting with kernel v5.12, the behavior is as follows: -- when a DSA slave interface is brought up, the master interface is +- when a DSA user interface is brought up, the conduit interface is automatically brought up. -- when the master interface is brought down, all DSA slave interfaces are +- when the conduit interface is brought down, all DSA user interfaces are automatically brought down. In this documentation the following Ethernet interfaces are used: *eth0* - the master interface + the conduit interface *eth1* - another master interface + another conduit interface *lan1* - a slave interface + a user interface *lan2* - another slave interface + another user interface *lan3* - a third slave interface + a third user interface *wan* - A slave interface dedicated for upstream traffic + A user interface dedicated for upstream traffic Further Ethernet interfaces can be configured similar. The configured IPs and networks are: @@ -96,11 +96,11 @@ without using a VLAN based configuration. ip addr add 192.0.2.5/30 dev lan2 ip addr add 192.0.2.9/30 dev lan3 - # For kernels earlier than v5.12, the master interface needs to be - # brought up manually before the slave ports. + # For kernels earlier than v5.12, the conduit interface needs to be + # brought up manually before the user ports. ip link set eth0 up - # bring up the slave interfaces + # bring up the user interfaces ip link set lan1 up ip link set lan2 up ip link set lan3 up @@ -108,11 +108,11 @@ without using a VLAN based configuration. *bridge* .. code-block:: sh - # For kernels earlier than v5.12, the master interface needs to be - # brought up manually before the slave ports. + # For kernels earlier than v5.12, the conduit interface needs to be + # brought up manually before the user ports. ip link set eth0 up - # bring up the slave interfaces + # bring up the user interfaces ip link set lan1 up ip link set lan2 up ip link set lan3 up @@ -134,11 +134,11 @@ without using a VLAN based configuration. *gateway* .. code-block:: sh - # For kernels earlier than v5.12, the master interface needs to be - # brought up manually before the slave ports. + # For kernels earlier than v5.12, the conduit interface needs to be + # brought up manually before the user ports. ip link set eth0 up - # bring up the slave interfaces + # bring up the user interfaces ip link set wan up ip link set lan1 up ip link set lan2 up @@ -178,14 +178,14 @@ configuration. ip link add link eth0 name eth0.2 type vlan id 2 ip link add link eth0 name eth0.3 type vlan id 3 - # For kernels earlier than v5.12, the master interface needs to be - # brought up manually before the slave ports. + # For kernels earlier than v5.12, the conduit interface needs to be + # brought up manually before the user ports. ip link set eth0 up ip link set eth0.1 up ip link set eth0.2 up ip link set eth0.3 up - # bring up the slave interfaces + # bring up the user interfaces ip link set lan1 up ip link set lan2 up ip link set lan3 up @@ -221,12 +221,12 @@ configuration. # tag traffic on CPU port ip link add link eth0 name eth0.1 type vlan id 1 - # For kernels earlier than v5.12, the master interface needs to be - # brought up manually before the slave ports. + # For kernels earlier than v5.12, the conduit interface needs to be + # brought up manually before the user ports. ip link set eth0 up ip link set eth0.1 up - # bring up the slave interfaces + # bring up the user interfaces ip link set lan1 up ip link set lan2 up ip link set lan3 up @@ -261,13 +261,13 @@ configuration. ip link add link eth0 name eth0.1 type vlan id 1 ip link add link eth0 name eth0.2 type vlan id 2 - # For kernels earlier than v5.12, the master interface needs to be - # brought up manually before the slave ports. + # For kernels earlier than v5.12, the conduit interface needs to be + # brought up manually before the user ports. ip link set eth0 up ip link set eth0.1 up ip link set eth0.2 up - # bring up the slave interfaces + # bring up the user interfaces ip link set wan up ip link set lan1 up ip link set lan2 up @@ -380,22 +380,22 @@ affinities according to the available CPU ports. Secondly, it is possible to perform load balancing between CPU ports on a per packet basis, rather than statically assigning user ports to CPU ports. -This can be achieved by placing the DSA masters under a LAG interface (bonding +This can be achieved by placing the DSA conduits under a LAG interface (bonding or team). DSA monitors this operation and creates a mirror of this software LAG -on the CPU ports facing the physical DSA masters that constitute the LAG slave +on the CPU ports facing the physical DSA conduits that constitute the LAG slave devices. To make use of multiple CPU ports, the firmware (device tree) description of -the switch must mark all the links between CPU ports and their DSA masters +the switch must mark all the links between CPU ports and their DSA conduits using the ``ethernet`` reference/phandle. At startup, only a single CPU port -and DSA master will be used - the numerically first port from the firmware +and DSA conduit will be used - the numerically first port from the firmware description which has an ``ethernet`` property. It is up to the user to -configure the system for the switch to use other masters. +configure the system for the switch to use other conduits. DSA uses the ``rtnl_link_ops`` mechanism (with a "dsa" ``kind``) to allow -changing the DSA master of a user port. The ``IFLA_DSA_MASTER`` u32 netlink -attribute contains the ifindex of the master device that handles each slave -device. The DSA master must be a valid candidate based on firmware node +changing the DSA conduit of a user port. The ``IFLA_DSA_CONDUIT`` u32 netlink +attribute contains the ifindex of the conduit device that handles each user +device. The DSA conduit must be a valid candidate based on firmware node information, or a LAG interface which contains only slaves which are valid candidates. @@ -403,7 +403,7 @@ Using iproute2, the following manipulations are possible: .. code-block:: sh - # See the DSA master in current use + # See the DSA conduit in current use ip -d link show dev swp0 (...) dsa master eth0 @@ -414,7 +414,7 @@ Using iproute2, the following manipulations are possible: ip link set swp2 type dsa master eth1 ip link set swp3 type dsa master eth0 - # CPU ports in LAG, using explicit assignment of the DSA master + # CPU ports in LAG, using explicit assignment of the DSA conduit ip link add bond0 type bond mode balance-xor && ip link set bond0 up ip link set eth1 down && ip link set eth1 master bond0 ip link set swp0 type dsa master bond0 @@ -426,7 +426,7 @@ Using iproute2, the following manipulations are possible: (...) dsa master bond0 - # CPU ports in LAG, relying on implicit migration of the DSA master + # CPU ports in LAG, relying on implicit migration of the DSA conduit ip link add bond0 type bond mode balance-xor && ip link set bond0 up ip link set eth0 down && ip link set eth0 master bond0 ip link set eth1 down && ip link set eth1 master bond0 @@ -435,24 +435,24 @@ Using iproute2, the following manipulations are possible: dsa master bond0 Notice that in the case of CPU ports under a LAG, the use of the -``IFLA_DSA_MASTER`` netlink attribute is not strictly needed, but rather, DSA -reacts to the ``IFLA_MASTER`` attribute change of its present master (``eth0``) +``IFLA_DSA_CONDUIT`` netlink attribute is not strictly needed, but rather, DSA +reacts to the ``IFLA_MASTER`` attribute change of its present conduit (``eth0``) and migrates all user ports to the new upper of ``eth0``, ``bond0``. Similarly, when ``bond0`` is destroyed using ``RTM_DELLINK``, DSA migrates the user ports -that were assigned to this interface to the first physical DSA master which is +that were assigned to this interface to the first physical DSA conduit which is eligible, based on the firmware description (it effectively reverts to the startup configuration). In a setup with more than 2 physical CPU ports, it is therefore possible to mix -static user to CPU port assignment with LAG between DSA masters. It is not -possible to statically assign a user port towards a DSA master that has any -upper interfaces (this includes LAG devices - the master must always be the LAG +static user to CPU port assignment with LAG between DSA conduits. It is not +possible to statically assign a user port towards a DSA conduit that has any +upper interfaces (this includes LAG devices - the conduit must always be the LAG in this case). -Live changing of the DSA master (and thus CPU port) affinity of a user port is +Live changing of the DSA conduit (and thus CPU port) affinity of a user port is permitted, in order to allow dynamic redistribution in response to traffic. -Physical DSA masters are allowed to join and leave at any time a LAG interface -used as a DSA master; however, DSA will reject a LAG interface as a valid -candidate for being a DSA master unless it has at least one physical DSA master +Physical DSA conduits are allowed to join and leave at any time a LAG interface +used as a DSA conduit; however, DSA will reject a LAG interface as a valid +candidate for being a DSA conduit unless it has at least one physical DSA conduit as a slave device. diff --git a/Documentation/networking/dsa/dsa.rst b/Documentation/networking/dsa/dsa.rst index a94ddf8334..7b2e69cd7e 100644 --- a/Documentation/networking/dsa/dsa.rst +++ b/Documentation/networking/dsa/dsa.rst @@ -25,7 +25,7 @@ presence of a management port connected to an Ethernet controller capable of receiving Ethernet frames from the switch. This is a very common setup for all kinds of Ethernet switches found in Small Home and Office products: routers, gateways, or even top-of-rack switches. This host Ethernet controller will -be later referred to as "master" and "cpu" in DSA terminology and code. +be later referred to as "conduit" and "cpu" in DSA terminology and code. The D in DSA stands for Distributed, because the subsystem has been designed with the ability to configure and manage cascaded switches on top of each other @@ -35,7 +35,7 @@ of multiple switches connected to each other is called a "switch tree". For each front-panel port, DSA creates specialized network devices which are used as controlling and data-flowing endpoints for use by the Linux networking -stack. These specialized network interfaces are referred to as "slave" network +stack. These specialized network interfaces are referred to as "user" network interfaces in DSA terminology and code. The ideal case for using DSA is when an Ethernet switch supports a "switch tag" @@ -56,12 +56,16 @@ Note that DSA does not currently create network interfaces for the "cpu" and - the "cpu" port is the Ethernet switch facing side of the management controller, and as such, would create a duplication of feature, since you - would get two interfaces for the same conduit: master netdev, and "cpu" netdev + would get two interfaces for the same conduit: conduit netdev, and "cpu" netdev - the "dsa" port(s) are just conduits between two or more switches, and as such cannot really be used as proper network interfaces either, only the downstream, or the top-most upstream interface makes sense with that model +NB: for the past 15 years, the DSA subsystem had been making use of the terms +"master" (rather than "conduit") and "slave" (rather than "user"). These terms +have been removed from the DSA codebase and phased out of the uAPI. + Switch tagging protocols ------------------------ @@ -80,14 +84,14 @@ methods of the ``struct dsa_device_ops`` structure, which are detailed below. Tagging protocols generally fall in one of three categories: 1. The switch-specific frame header is located before the Ethernet header, - shifting to the right (from the perspective of the DSA master's frame + shifting to the right (from the perspective of the DSA conduit's frame parser) the MAC DA, MAC SA, EtherType and the entire L2 payload. 2. The switch-specific frame header is located before the EtherType, keeping - the MAC DA and MAC SA in place from the DSA master's perspective, but + the MAC DA and MAC SA in place from the DSA conduit's perspective, but shifting the 'real' EtherType and L2 payload to the right. 3. The switch-specific frame header is located at the tail of the packet, keeping all frame headers in place and not altering the view of the packet - that the DSA master's frame parser has. + that the DSA conduit's frame parser has. A tagging protocol may tag all packets with switch tags of the same length, or the tag length might vary (for example packets with PTP timestamps might @@ -95,7 +99,7 @@ require an extended switch tag, or there might be one tag length on TX and a different one on RX). Either way, the tagging protocol driver must populate the ``struct dsa_device_ops::needed_headroom`` and/or ``struct dsa_device_ops::needed_tailroom`` with the length in octets of the longest switch frame header/trailer. The DSA -framework will automatically adjust the MTU of the master interface to +framework will automatically adjust the MTU of the conduit interface to accommodate for this extra size in order for DSA user ports to support the standard MTU (L2 payload length) of 1500 octets. The ``needed_headroom`` and ``needed_tailroom`` properties are also used to request from the network stack, @@ -140,18 +144,18 @@ adding or removing the ``ETH_P_EDSA`` EtherType and some padding octets). It is possible to construct cascaded setups of DSA switches even if their tagging protocols are not compatible with one another. In this case, there are no DSA links in this fabric, and each switch constitutes a disjoint DSA switch -tree. The DSA links are viewed as simply a pair of a DSA master (the out-facing +tree. The DSA links are viewed as simply a pair of a DSA conduit (the out-facing port of the upstream DSA switch) and a CPU port (the in-facing port of the downstream DSA switch). The tagging protocol of the attached DSA switch tree can be viewed through the -``dsa/tagging`` sysfs attribute of the DSA master:: +``dsa/tagging`` sysfs attribute of the DSA conduit:: cat /sys/class/net/eth0/dsa/tagging If the hardware and driver are capable, the tagging protocol of the DSA switch tree can be changed at runtime. This is done by writing the new tagging -protocol name to the same sysfs device attribute as above (the DSA master and +protocol name to the same sysfs device attribute as above (the DSA conduit and all attached switch ports must be down while doing this). It is desirable that all tagging protocols are testable with the ``dsa_loop`` @@ -159,7 +163,7 @@ mockup driver, which can be attached to any network interface. The goal is that any network interface should be capable of transmitting the same packet in the same way, and the tagger should decode the same received packet in the same way regardless of the driver used for the switch control path, and the driver used -for the DSA master. +for the DSA conduit. The transmission of a packet goes through the tagger's ``xmit`` function. The passed ``struct sk_buff *skb`` has ``skb->data`` pointing at @@ -183,44 +187,44 @@ virtual DSA user network interface corresponding to the physical front-facing switch port that the packet was received on. Since tagging protocols in category 1 and 2 break software (and most often also -hardware) packet dissection on the DSA master, features such as RPS (Receive -Packet Steering) on the DSA master would be broken. The DSA framework deals +hardware) packet dissection on the DSA conduit, features such as RPS (Receive +Packet Steering) on the DSA conduit would be broken. The DSA framework deals with this by hooking into the flow dissector and shifting the offset at which -the IP header is to be found in the tagged frame as seen by the DSA master. +the IP header is to be found in the tagged frame as seen by the DSA conduit. This behavior is automatic based on the ``overhead`` value of the tagging protocol. If not all packets are of equal size, the tagger can implement the ``flow_dissect`` method of the ``struct dsa_device_ops`` and override this default behavior by specifying the correct offset incurred by each individual RX packet. Tail taggers do not cause issues to the flow dissector. -Checksum offload should work with category 1 and 2 taggers when the DSA master +Checksum offload should work with category 1 and 2 taggers when the DSA conduit driver declares NETIF_F_HW_CSUM in vlan_features and looks at csum_start and csum_offset. For those cases, DSA will shift the checksum start and offset by -the tag size. If the DSA master driver still uses the legacy NETIF_F_IP_CSUM +the tag size. If the DSA conduit driver still uses the legacy NETIF_F_IP_CSUM or NETIF_F_IPV6_CSUM in vlan_features, the offload might only work if the offload hardware already expects that specific tag (perhaps due to matching -vendors). DSA slaves inherit those flags from the master port, and it is up to +vendors). DSA user ports inherit those flags from the conduit, and it is up to the driver to correctly fall back to software checksum when the IP header is not where the hardware expects. If that check is ineffective, the packets might go to the network without a proper checksum (the checksum field will have the pseudo IP header sum). For category 3, when the offload hardware does not already expect the switch tag in use, the checksum must be calculated before any -tag is inserted (i.e. inside the tagger). Otherwise, the DSA master would +tag is inserted (i.e. inside the tagger). Otherwise, the DSA conduit would include the tail tag in the (software or hardware) checksum calculation. Then, when the tag gets stripped by the switch during transmission, it will leave an incorrect IP checksum in place. Due to various reasons (most common being category 1 taggers being associated -with DSA-unaware masters, mangling what the master perceives as MAC DA), the -tagging protocol may require the DSA master to operate in promiscuous mode, to +with DSA-unaware conduits, mangling what the conduit perceives as MAC DA), the +tagging protocol may require the DSA conduit to operate in promiscuous mode, to receive all frames regardless of the value of the MAC DA. This can be done by -setting the ``promisc_on_master`` property of the ``struct dsa_device_ops``. -Note that this assumes a DSA-unaware master driver, which is the norm. +setting the ``promisc_on_conduit`` property of the ``struct dsa_device_ops``. +Note that this assumes a DSA-unaware conduit driver, which is the norm. -Master network devices ----------------------- +Conduit network devices +----------------------- -Master network devices are regular, unmodified Linux network device drivers for +Conduit network devices are regular, unmodified Linux network device drivers for the CPU/management Ethernet interface. Such a driver might occasionally need to know whether DSA is enabled (e.g.: to enable/disable specific offload features), but the DSA subsystem has been proven to work with industry standard drivers: @@ -232,14 +236,14 @@ Ethernet switch. Networking stack hooks ---------------------- -When a master netdev is used with DSA, a small hook is placed in the +When a conduit netdev is used with DSA, a small hook is placed in the networking stack is in order to have the DSA subsystem process the Ethernet switch specific tagging protocol. DSA accomplishes this by registering a specific (and fake) Ethernet type (later becoming ``skb->protocol``) with the networking stack, this is also known as a ``ptype`` or ``packet_type``. A typical Ethernet Frame receive sequence looks like this: -Master network device (e.g.: e1000e): +Conduit network device (e.g.: e1000e): 1. Receive interrupt fires: @@ -269,16 +273,16 @@ Master network device (e.g.: e1000e): - inspect and strip switch tag protocol to determine originating port - locate per-port network device - - invoke ``eth_type_trans()`` with the DSA slave network device + - invoke ``eth_type_trans()`` with the DSA user network device - invoked ``netif_receive_skb()`` -Past this point, the DSA slave network devices get delivered regular Ethernet +Past this point, the DSA user network devices get delivered regular Ethernet frames that can be processed by the networking stack. -Slave network devices ---------------------- +User network devices +-------------------- -Slave network devices created by DSA are stacked on top of their master network +User network devices created by DSA are stacked on top of their conduit network device, each of these network interfaces will be responsible for being a controlling and data-flowing end-point for each front-panel port of the switch. These interfaces are specialized in order to: @@ -289,31 +293,31 @@ These interfaces are specialized in order to: Wake-on-LAN, register dumps... - manage external/internal PHY: link, auto-negotiation, etc. -These slave network devices have custom net_device_ops and ethtool_ops function +These user network devices have custom net_device_ops and ethtool_ops function pointers which allow DSA to introduce a level of layering between the networking stack/ethtool and the switch driver implementation. -Upon frame transmission from these slave network devices, DSA will look up which +Upon frame transmission from these user network devices, DSA will look up which switch tagging protocol is currently registered with these network devices and invoke a specific transmit routine which takes care of adding the relevant switch tag in the Ethernet frames. -These frames are then queued for transmission using the master network device +These frames are then queued for transmission using the conduit network device ``ndo_start_xmit()`` function. Since they contain the appropriate switch tag, the Ethernet switch will be able to process these incoming frames from the management interface and deliver them to the physical switch port. When using multiple CPU ports, it is possible to stack a LAG (bonding/team) -device between the DSA slave devices and the physical DSA masters. The LAG -device is thus also a DSA master, but the LAG slave devices continue to be DSA -masters as well (just with no user port assigned to them; this is needed for -recovery in case the LAG DSA master disappears). Thus, the data path of the LAG -DSA master is used asymmetrically. On RX, the ``ETH_P_XDSA`` handler, which -calls ``dsa_switch_rcv()``, is invoked early (on the physical DSA master; -LAG slave). Therefore, the RX data path of the LAG DSA master is not used. -On the other hand, TX takes place linearly: ``dsa_slave_xmit`` calls -``dsa_enqueue_skb``, which calls ``dev_queue_xmit`` towards the LAG DSA master. -The latter calls ``dev_queue_xmit`` towards one physical DSA master or the +device between the DSA user devices and the physical DSA conduits. The LAG +device is thus also a DSA conduit, but the LAG slave devices continue to be DSA +conduits as well (just with no user port assigned to them; this is needed for +recovery in case the LAG DSA conduit disappears). Thus, the data path of the LAG +DSA conduit is used asymmetrically. On RX, the ``ETH_P_XDSA`` handler, which +calls ``dsa_switch_rcv()``, is invoked early (on the physical DSA conduit; +LAG slave). Therefore, the RX data path of the LAG DSA conduit is not used. +On the other hand, TX takes place linearly: ``dsa_user_xmit`` calls +``dsa_enqueue_skb``, which calls ``dev_queue_xmit`` towards the LAG DSA conduit. +The latter calls ``dev_queue_xmit`` towards one physical DSA conduit or the other, and in both cases, the packet exits the system through a hardware path towards the switch. @@ -352,11 +356,11 @@ perspective:: || swp0 | | swp1 | | swp2 | | swp3 || ++------+-+------+-+------+-+------++ -Slave MDIO bus --------------- +User MDIO bus +------------- -In order to be able to read to/from a switch PHY built into it, DSA creates a -slave MDIO bus which allows a specific switch driver to divert and intercept +In order to be able to read to/from a switch PHY built into it, DSA creates an +user MDIO bus which allows a specific switch driver to divert and intercept MDIO reads/writes towards specific PHY addresses. In most MDIO-connected switches, these functions would utilize direct or indirect PHY addressing mode to return standard MII registers from the switch builtin PHYs, allowing the PHY @@ -364,7 +368,7 @@ library and/or to return link status, link partner pages, auto-negotiation results, etc. For Ethernet switches which have both external and internal MDIO buses, the -slave MII bus can be utilized to mux/demux MDIO reads and writes towards either +user MII bus can be utilized to mux/demux MDIO reads and writes towards either internal or external MDIO devices this switch might be connected to: internal PHYs, external PHYs, or even external switches. @@ -381,10 +385,10 @@ DSA data structures are defined in ``include/net/dsa.h`` as well as - ``dsa_platform_data``: platform device configuration data which can reference a collection of dsa_chip_data structures if multiple switches are cascaded, - the master network device this switch tree is attached to needs to be + the conduit network device this switch tree is attached to needs to be referenced -- ``dsa_switch_tree``: structure assigned to the master network device under +- ``dsa_switch_tree``: structure assigned to the conduit network device under ``dsa_ptr``, this structure references a dsa_platform_data structure as well as the tagging protocol supported by the switch tree, and which receive/transmit function hooks should be invoked, information about the directly attached @@ -392,7 +396,7 @@ DSA data structures are defined in ``include/net/dsa.h`` as well as referenced to address individual switches in the tree. - ``dsa_switch``: structure describing a switch device in the tree, referencing - a ``dsa_switch_tree`` as a backpointer, slave network devices, master network + a ``dsa_switch_tree`` as a backpointer, user network devices, conduit network device, and a reference to the backing``dsa_switch_ops`` - ``dsa_switch_ops``: structure referencing function pointers, see below for a @@ -404,7 +408,7 @@ Design limitations Lack of CPU/DSA network devices ------------------------------- -DSA does not currently create slave network devices for the CPU or DSA ports, as +DSA does not currently create user network devices for the CPU or DSA ports, as described before. This might be an issue in the following cases: - inability to fetch switch CPU port statistics counters using ethtool, which @@ -419,7 +423,7 @@ described before. This might be an issue in the following cases: Common pitfalls using DSA setups -------------------------------- -Once a master network device is configured to use DSA (dev->dsa_ptr becomes +Once a conduit network device is configured to use DSA (dev->dsa_ptr becomes non-NULL), and the switch behind it expects a tagging protocol, this network interface can only exclusively be used as a conduit interface. Sending packets directly through this interface (e.g.: opening a socket using this interface) @@ -440,7 +444,7 @@ DSA currently leverages the following subsystems: MDIO/PHY library ---------------- -Slave network devices exposed by DSA may or may not be interfacing with PHY +User network devices exposed by DSA may or may not be interfacing with PHY devices (``struct phy_device`` as defined in ``include/linux/phy.h)``, but the DSA subsystem deals with all possible combinations: @@ -450,7 +454,7 @@ subsystem deals with all possible combinations: - special, non-autonegotiated or non MDIO-managed PHY devices: SFPs, MoCA; a.k.a fixed PHYs -The PHY configuration is done by the ``dsa_slave_phy_setup()`` function and the +The PHY configuration is done by the ``dsa_user_phy_setup()`` function and the logic basically looks like this: - if Device Tree is used, the PHY device is looked up using the standard @@ -463,7 +467,7 @@ logic basically looks like this: and connected transparently using the special fixed MDIO bus driver - finally, if the PHY is built into the switch, as is very common with - standalone switch packages, the PHY is probed using the slave MII bus created + standalone switch packages, the PHY is probed using the user MII bus created by DSA @@ -472,7 +476,7 @@ SWITCHDEV DSA directly utilizes SWITCHDEV when interfacing with the bridge layer, and more specifically with its VLAN filtering portion when configuring VLANs on top -of per-port slave network devices. As of today, the only SWITCHDEV objects +of per-port user network devices. As of today, the only SWITCHDEV objects supported by DSA are the FDB and VLAN objects. Devlink @@ -589,8 +593,8 @@ is torn down when the first switch unregisters. It is mandatory for DSA switch drivers to implement the ``shutdown()`` callback of their respective bus, and call ``dsa_switch_shutdown()`` from it (a minimal version of the full teardown performed by ``dsa_unregister_switch()``). -The reason is that DSA keeps a reference on the master net device, and if the -driver for the master device decides to unbind on shutdown, DSA's reference +The reason is that DSA keeps a reference on the conduit net device, and if the +driver for the conduit device decides to unbind on shutdown, DSA's reference will block that operation from finalizing. Either ``dsa_switch_shutdown()`` or ``dsa_unregister_switch()`` must be called, @@ -615,7 +619,7 @@ Switch configuration tag formats. - ``change_tag_protocol``: when the default tagging protocol has compatibility - problems with the master or other issues, the driver may support changing it + problems with the conduit or other issues, the driver may support changing it at runtime, either through a device tree property or through sysfs. In that case, further calls to ``get_tag_protocol`` should report the protocol in current use. @@ -643,22 +647,22 @@ Switch configuration PHY cannot be found. In this case, probing of the DSA switch continues without that particular port. -- ``port_change_master``: method through which the affinity (association used +- ``port_change_conduit``: method through which the affinity (association used for traffic termination purposes) between a user port and a CPU port can be changed. By default all user ports from a tree are assigned to the first available CPU port that makes sense for them (most of the times this means the user ports of a tree are all assigned to the same CPU port, except for H topologies as described in commit 2c0b03258b8b). The ``port`` argument - represents the index of the user port, and the ``master`` argument represents - the new DSA master ``net_device``. The CPU port associated with the new - master can be retrieved by looking at ``struct dsa_port *cpu_dp = - master->dsa_ptr``. Additionally, the master can also be a LAG device where - all the slave devices are physical DSA masters. LAG DSA masters also have a - valid ``master->dsa_ptr`` pointer, however this is not unique, but rather a - duplicate of the first physical DSA master's (LAG slave) ``dsa_ptr``. In case - of a LAG DSA master, a further call to ``port_lag_join`` will be emitted + represents the index of the user port, and the ``conduit`` argument represents + the new DSA conduit ``net_device``. The CPU port associated with the new + conduit can be retrieved by looking at ``struct dsa_port *cpu_dp = + conduit->dsa_ptr``. Additionally, the conduit can also be a LAG device where + all the slave devices are physical DSA conduits. LAG DSA also have a + valid ``conduit->dsa_ptr`` pointer, however this is not unique, but rather a + duplicate of the first physical DSA conduit's (LAG slave) ``dsa_ptr``. In case + of a LAG DSA conduit, a further call to ``port_lag_join`` will be emitted separately for the physical CPU ports associated with the physical DSA - masters, requesting them to create a hardware LAG associated with the LAG + conduits, requesting them to create a hardware LAG associated with the LAG interface. PHY devices and link management @@ -670,16 +674,16 @@ PHY devices and link management should return a 32-bit bitmask of "flags" that is private between the switch driver and the Ethernet PHY driver in ``drivers/net/phy/\*``. -- ``phy_read``: Function invoked by the DSA slave MDIO bus when attempting to read +- ``phy_read``: Function invoked by the DSA user MDIO bus when attempting to read the switch port MDIO registers. If unavailable, return 0xffff for each read. For builtin switch Ethernet PHYs, this function should allow reading the link status, auto-negotiation results, link partner pages, etc. -- ``phy_write``: Function invoked by the DSA slave MDIO bus when attempting to write +- ``phy_write``: Function invoked by the DSA user MDIO bus when attempting to write to the switch port MDIO registers. If unavailable return a negative error code. -- ``adjust_link``: Function invoked by the PHY library when a slave network device +- ``adjust_link``: Function invoked by the PHY library when a user network device is attached to a PHY device. This function is responsible for appropriately configuring the switch port link parameters: speed, duplex, pause based on what the ``phy_device`` is providing. @@ -698,14 +702,14 @@ Ethtool operations typically return statistics strings, private flags strings, etc. - ``get_ethtool_stats``: ethtool function used to query per-port statistics and - return their values. DSA overlays slave network devices general statistics: + return their values. DSA overlays user network devices general statistics: RX/TX counters from the network device, with switch driver specific statistics per port - ``get_sset_count``: ethtool function used to query the number of statistics items - ``get_wol``: ethtool function used to obtain Wake-on-LAN settings per-port, this - function may for certain implementations also query the master network device + function may for certain implementations also query the conduit network device Wake-on-LAN settings if this interface needs to participate in Wake-on-LAN - ``set_wol``: ethtool function used to configure Wake-on-LAN settings per-port, @@ -747,13 +751,13 @@ Power management should resume all Ethernet switch activities and re-configure the switch to be in a fully active state -- ``port_enable``: function invoked by the DSA slave network device ndo_open +- ``port_enable``: function invoked by the DSA user network device ndo_open function when a port is administratively brought up, this function should fully enable a given switch port. DSA takes care of marking the port with ``BR_STATE_BLOCKING`` if the port is a bridge member, or ``BR_STATE_FORWARDING`` if it was not, and propagating these changes down to the hardware -- ``port_disable``: function invoked by the DSA slave network device ndo_close +- ``port_disable``: function invoked by the DSA user network device ndo_close function when a port is administratively brought down, this function should fully disable a given switch port. DSA takes care of marking the port with ``BR_STATE_DISABLED`` and propagating changes to the hardware if this port is diff --git a/Documentation/networking/dsa/lan9303.rst b/Documentation/networking/dsa/lan9303.rst index e3c820db28..ab81b4e013 100644 --- a/Documentation/networking/dsa/lan9303.rst +++ b/Documentation/networking/dsa/lan9303.rst @@ -4,7 +4,7 @@ LAN9303 Ethernet switch driver The LAN9303 is a three port 10/100 Mbps ethernet switch with integrated phys for the two external ethernet ports. The third port is an RMII/MII interface to a -host master network interface (e.g. fixed link). +host conduit network interface (e.g. fixed link). Driver details diff --git a/Documentation/networking/dsa/sja1105.rst b/Documentation/networking/dsa/sja1105.rst index e0219c1452..8ab60eef07 100644 --- a/Documentation/networking/dsa/sja1105.rst +++ b/Documentation/networking/dsa/sja1105.rst @@ -79,7 +79,7 @@ The hardware tags all traffic internally with a port-based VLAN (pvid), or it decodes the VLAN information from the 802.1Q tag. Advanced VLAN classification is not possible. Once attributed a VLAN tag, frames are checked against the port's membership rules and dropped at ingress if they don't match any VLAN. -This behavior is available when switch ports are enslaved to a bridge with +This behavior is available when switch ports join a bridge with ``vlan_filtering 1``. Normally the hardware is not configurable with respect to VLAN awareness, but @@ -122,7 +122,7 @@ on egress. Using ``vlan_filtering=1``, the behavior is the other way around: offloaded flows can be steered to TX queues based on the VLAN PCP, but the DSA net devices are no longer able to do that. To inject frames into a hardware TX queue with VLAN awareness active, it is necessary to create a VLAN -sub-interface on the DSA master port, and send normal (0x8100) VLAN-tagged +sub-interface on the DSA conduit port, and send normal (0x8100) VLAN-tagged towards the switch, with the VLAN PCP bits set appropriately. Management traffic (having DMAC 01-80-C2-xx-xx-xx or 01-19-1B-xx-xx-xx) is the @@ -389,7 +389,7 @@ MDIO bus and PHY management The SJA1105 does not have an MDIO bus and does not perform in-band AN either. Therefore there is no link state notification coming from the switch device. A board would need to hook up the PHYs connected to the switch to any other -MDIO bus available to Linux within the system (e.g. to the DSA master's MDIO +MDIO bus available to Linux within the system (e.g. to the DSA conduit's MDIO bus). Link state management then works by the driver manually keeping in sync (over SPI commands) the MAC link speed with the settings negotiated by the PHY. diff --git a/Documentation/networking/filter.rst b/Documentation/networking/filter.rst index f69da50748..7d8c538049 100644 --- a/Documentation/networking/filter.rst +++ b/Documentation/networking/filter.rst @@ -650,8 +650,8 @@ before a conversion to the new layout is being done behind the scenes! Currently, the classic BPF format is being used for JITing on most 32-bit architectures, whereas x86-64, aarch64, s390x, powerpc64, -sparc64, arm32, riscv64, riscv32 perform JIT compilation from eBPF -instruction set. +sparc64, arm32, riscv64, riscv32, loongarch64 perform JIT compilation +from eBPF instruction set. Testing ------- diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index 5b75c3f7a1..683eb42309 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -59,7 +59,6 @@ Contents: gtp ila ioam6-sysctl - ipddp ip_dynaddr ipsec ip-sysctl @@ -107,6 +106,7 @@ Contents: sysfs-tagging tc-actions-env-rules tc-queue-filters + tcp_ao tcp-thin team timestamping diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index a66054d076..7afff42612 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -745,6 +745,13 @@ tcp_comp_sack_nr - INTEGER Default : 44 +tcp_backlog_ack_defer - BOOLEAN + If set, user thread processing socket backlog tries sending + one ACK for the whole queue. This helps to avoid potential + long latencies at end of a TCP socket syscall. + + Default : true + tcp_slow_start_after_idle - BOOLEAN If set, provide RFC2861 behavior and time out the congestion window after an idle period. An idle period is defined at @@ -1176,6 +1183,19 @@ tcp_plb_cong_thresh - INTEGER Default: 128 +tcp_pingpong_thresh - INTEGER + The number of estimated data replies sent for estimated incoming data + requests that must happen before TCP considers that a connection is a + "ping-pong" (request-response) connection for which delayed + acknowledgments can provide benefits. + + This threshold is 1 by default, but some applications may need a higher + threshold for optimal performance. + + Possible Values: 1 - 255 + + Default: 1 + UDP variables ============= @@ -2304,6 +2324,17 @@ accept_ra_pinfo - BOOLEAN - enabled if accept_ra is enabled. - disabled if accept_ra is disabled. +ra_honor_pio_life - BOOLEAN + Whether to use RFC4862 Section 5.5.3e to determine the valid + lifetime of an address matching a prefix sent in a Router + Advertisement Prefix Information Option. + + - If enabled, the PIO valid lifetime will always be honored. + - If disabled, RFC4862 section 5.5.3e is used to determine + the valid lifetime of the address. + + Default: 0 (disabled) + accept_ra_rt_info_min_plen - INTEGER Minimum prefix length of Route Information in RA. @@ -2471,12 +2502,18 @@ use_tempaddr - INTEGER * -1 (for point-to-point devices and loopback devices) temp_valid_lft - INTEGER - valid lifetime (in seconds) for temporary addresses. + valid lifetime (in seconds) for temporary addresses. If less than the + minimum required lifetime (typically 5 seconds), temporary addresses + will not be created. Default: 172800 (2 days) temp_prefered_lft - INTEGER - Preferred lifetime (in seconds) for temporary addresses. + Preferred lifetime (in seconds) for temporary addresses. If + temp_prefered_lft is less than the minimum required lifetime (typically + 5 seconds), temporary addresses will not be created. If + temp_prefered_lft is greater than temp_valid_lft, the preferred lifetime + is temp_valid_lft. Default: 86400 (1 day) diff --git a/Documentation/networking/ipddp.rst b/Documentation/networking/ipddp.rst deleted file mode 100644 index be7091b779..0000000000 --- a/Documentation/networking/ipddp.rst +++ /dev/null @@ -1,78 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -========================================================= -AppleTalk-IP Decapsulation and AppleTalk-IP Encapsulation -========================================================= - -Documentation ipddp.c - -This file is written by Jay Schulist - -Introduction ------------- - -AppleTalk-IP (IPDDP) is the method computers connected to AppleTalk -networks can use to communicate via IP. AppleTalk-IP is simply IP datagrams -inside AppleTalk packets. - -Through this driver you can either allow your Linux box to communicate -IP over an AppleTalk network or you can provide IP gatewaying functions -for your AppleTalk users. - -You can currently encapsulate or decapsulate AppleTalk-IP on LocalTalk, -EtherTalk and PPPTalk. The only limit on the protocol is that of what -kernel AppleTalk layer and drivers are available. - -Each mode requires its own user space software. - -Compiling AppleTalk-IP Decapsulation/Encapsulation -================================================== - -AppleTalk-IP decapsulation needs to be compiled into your kernel. You -will need to turn on AppleTalk-IP driver support. Then you will need to -select ONE of the two options; IP to AppleTalk-IP encapsulation support or -AppleTalk-IP to IP decapsulation support. If you compile the driver -statically you will only be able to use the driver for the function you have -enabled in the kernel. If you compile the driver as a module you can -select what mode you want it to run in via a module loading param. -ipddp_mode=1 for AppleTalk-IP encapsulation and ipddp_mode=2 for -AppleTalk-IP to IP decapsulation. - -Basic instructions for user space tools -======================================= - -I will briefly describe the operation of the tools, but you will -need to consult the supporting documentation for each set of tools. - -Decapsulation - You will need to download a software package called -MacGate. In this distribution there will be a tool called MacRoute -which enables you to add routes to the kernel for your Macs by hand. -Also the tool MacRegGateWay is included to register the -proper IP Gateway and IP addresses for your machine. Included in this -distribution is a patch to netatalk-1.4b2+asun2.0a17.2 (available from -ftp.u.washington.edu/pub/user-supported/asun/) this patch is optional -but it allows automatic adding and deleting of routes for Macs. (Handy -for locations with large Mac installations) - -Encapsulation - You will need to download a software daemon called ipddpd. -This software expects there to be an AppleTalk-IP gateway on the network. -You will also need to add the proper routes to route your Linux box's IP -traffic out the ipddp interface. - -Common Uses of ipddp.c ----------------------- -Of course AppleTalk-IP decapsulation and encapsulation, but specifically -decapsulation is being used most for connecting LocalTalk networks to -IP networks. Although it has been used on EtherTalk networks to allow -Macs that are only able to tunnel IP over EtherTalk. - -Encapsulation has been used to allow a Linux box stuck on a LocalTalk -network to use IP. It should work equally well if you are stuck on an -EtherTalk only network. - -Further Assistance -------------------- -You can contact me (Jay Schulist ) with any -questions regarding decapsulation or encapsulation. Bradford W. Johnson - originally wrote the ipddp.c driver for IP -encapsulation in AppleTalk. diff --git a/Documentation/networking/mptcp-sysctl.rst b/Documentation/networking/mptcp-sysctl.rst index 15f1919d64..69975ce25a 100644 --- a/Documentation/networking/mptcp-sysctl.rst +++ b/Documentation/networking/mptcp-sysctl.rst @@ -25,6 +25,17 @@ add_addr_timeout - INTEGER (seconds) Default: 120 +close_timeout - INTEGER (seconds) + Set the make-after-break timeout: in absence of any close or + shutdown syscall, MPTCP sockets will maintain the status + unchanged for such time, after the last subflow removal, before + moving to TCP_CLOSE. + + The default value matches TCP_TIMEWAIT_LEN. This is a per-namespace + sysctl. + + Default: 60 + checksum_enabled - BOOLEAN Control whether DSS checksum can be enabled. diff --git a/Documentation/networking/msg_zerocopy.rst b/Documentation/networking/msg_zerocopy.rst index b3ea96af9b..78fb70e748 100644 --- a/Documentation/networking/msg_zerocopy.rst +++ b/Documentation/networking/msg_zerocopy.rst @@ -7,7 +7,8 @@ Intro ===== The MSG_ZEROCOPY flag enables copy avoidance for socket send calls. -The feature is currently implemented for TCP and UDP sockets. +The feature is currently implemented for TCP, UDP and VSOCK (with +virtio transport) sockets. Opportunity and Caveats @@ -174,7 +175,9 @@ read_notification() call in the previous snippet. A notification is encoded in the standard error format, sock_extended_err. The level and type fields in the control data are protocol family -specific, IP_RECVERR or IPV6_RECVERR. +specific, IP_RECVERR or IPV6_RECVERR (for TCP or UDP socket). +For VSOCK socket, cmsg_level will be SOL_VSOCK and cmsg_type will be +VSOCK_RECVERR. Error origin is the new type SO_EE_ORIGIN_ZEROCOPY. ee_errno is zero, as explained before, to avoid blocking read and write system calls on @@ -235,12 +238,15 @@ Implementation Loopback -------- +For TCP and UDP: Data sent to local sockets can be queued indefinitely if the receive process does not read its socket. Unbound notification latency is not acceptable. For this reason all packets generated with MSG_ZEROCOPY that are looped to a local socket will incur a deferred copy. This includes looping onto packet sockets (e.g., tcpdump) and tun devices. +For VSOCK: +Data path sent to local sockets is the same as for non-local sockets. Testing ======= @@ -254,3 +260,6 @@ instance when run with msg_zerocopy.sh between a veth pair across namespaces, the test will not show any improvement. For testing, the loopback restriction can be temporarily relaxed by making skb_orphan_frags_rx identical to skb_orphan_frags. + +For VSOCK type of socket example can be found in +tools/testing/vsock/vsock_test_zerocopy.c. diff --git a/Documentation/networking/netconsole.rst b/Documentation/networking/netconsole.rst index 7a9de0568e..390730a743 100644 --- a/Documentation/networking/netconsole.rst +++ b/Documentation/networking/netconsole.rst @@ -99,9 +99,6 @@ Dynamic reconfiguration: Dynamic reconfigurability is a useful addition to netconsole that enables remote logging targets to be dynamically added, removed, or have their parameters reconfigured at runtime from a configfs-based userspace interface. -[ Note that the parameters of netconsole targets that were specified/created -from the boot/module option are not exposed via this interface, and hence -cannot be modified dynamically. ] To include this feature, select CONFIG_NETCONSOLE_DYNAMIC when building the netconsole module (or kernel, if netconsole is built-in). @@ -155,6 +152,25 @@ You can also update the local interface dynamically. This is especially useful if you want to use interfaces that have newly come up (and may not have existed when netconsole was loaded / initialized). +Netconsole targets defined at boot time (or module load time) with the +`netconsole=` param are assigned the name `cmdline`. For example, the +first target in the parameter is named `cmdline0`. You can control and modify +these targets by creating configfs directories with the matching name. + +Let's suppose you have two netconsole targets defined at boot time:: + + netconsole=4444@10.0.0.1/eth1,9353@10.0.0.2/12:34:56:78:9a:bc;4444@10.0.0.1/eth1,9353@10.0.0.3/12:34:56:78:9a:bc + +You can modify these targets in runtime by creating the following targets:: + + mkdir cmdline0 + cat cmdline0/remote_ip + 10.0.0.2 + + mkdir cmdline1 + cat cmdline1/remote_ip + 10.0.0.3 + Extended console: ================= diff --git a/Documentation/networking/page_pool.rst b/Documentation/networking/page_pool.rst index 215ebc9275..60993cb56b 100644 --- a/Documentation/networking/page_pool.rst +++ b/Documentation/networking/page_pool.rst @@ -58,7 +58,9 @@ a page will cause no race conditions is enough. .. kernel-doc:: include/net/page_pool/helpers.h :identifiers: page_pool_put_page page_pool_put_full_page - page_pool_recycle_direct page_pool_dev_alloc_pages + page_pool_recycle_direct page_pool_free_va + page_pool_dev_alloc_pages page_pool_dev_alloc_frag + page_pool_dev_alloc page_pool_dev_alloc_va page_pool_get_dma_addr page_pool_get_dma_dir .. kernel-doc:: net/core/page_pool.c diff --git a/Documentation/networking/pktgen.rst b/Documentation/networking/pktgen.rst index 1225f0f63f..c945218946 100644 --- a/Documentation/networking/pktgen.rst +++ b/Documentation/networking/pktgen.rst @@ -178,6 +178,7 @@ Examples:: IPSEC # IPsec encapsulation (needs CONFIG_XFRM) NODE_ALLOC # node specific memory allocation NO_TIMESTAMP # disable timestamping + SHARED # enable shared SKB pgset 'flag ![name]' Clear a flag to determine behaviour. Note that you might need to use single quote in interactive mode, so that your shell wouldn't expand @@ -288,6 +289,16 @@ To avoid breaking existing testbed scripts for using AH type and tunnel mode, you can use "pgset spi SPI_VALUE" to specify which transformation mode to employ. +Disable shared SKB +================== +By default, SKBs sent by pktgen are shared (user count > 1). +To test with non-shared SKBs, remove the "SHARED" flag by simply setting:: + + pg_set "flag !SHARED" + +However, if the "clone_skb" or "burst" parameters are configured, the skb +still needs to be held by pktgen for further access. Hence the skb must be +shared. Current commands and configuration options ========================================== @@ -357,6 +368,7 @@ Current commands and configuration options IPSEC NODE_ALLOC NO_TIMESTAMP + SHARED spi (ipsec) diff --git a/Documentation/networking/scaling.rst b/Documentation/networking/scaling.rst index 92c9fb46d6..03ae19a689 100644 --- a/Documentation/networking/scaling.rst +++ b/Documentation/networking/scaling.rst @@ -105,6 +105,48 @@ a separate CPU. For interrupt handling, HT has shown no benefit in initial tests, so limit the number of queues to the number of CPU cores in the system. +Dedicated RSS contexts +~~~~~~~~~~~~~~~~~~~~~~ + +Modern NICs support creating multiple co-existing RSS configurations +which are selected based on explicit matching rules. This can be very +useful when application wants to constrain the set of queues receiving +traffic for e.g. a particular destination port or IP address. +The example below shows how to direct all traffic to TCP port 22 +to queues 0 and 1. + +To create an additional RSS context use:: + + # ethtool -X eth0 hfunc toeplitz context new + New RSS context is 1 + +Kernel reports back the ID of the allocated context (the default, always +present RSS context has ID of 0). The new context can be queried and +modified using the same APIs as the default context:: + + # ethtool -x eth0 context 1 + RX flow hash indirection table for eth0 with 13 RX ring(s): + 0: 0 1 2 3 4 5 6 7 + 8: 8 9 10 11 12 0 1 2 + [...] + # ethtool -X eth0 equal 2 context 1 + # ethtool -x eth0 context 1 + RX flow hash indirection table for eth0 with 13 RX ring(s): + 0: 0 1 0 1 0 1 0 1 + 8: 0 1 0 1 0 1 0 1 + [...] + +To make use of the new context direct traffic to it using an n-tuple +filter:: + + # ethtool -N eth0 flow-type tcp6 dst-port 22 context 1 + Added rule with ID 1023 + +When done, remove the context and the rule:: + + # ethtool -N eth0 delete 1023 + # ethtool -X eth0 context 1 delete + RPS: Receive Packet Steering ============================ diff --git a/Documentation/networking/sfp-phylink.rst b/Documentation/networking/sfp-phylink.rst index 55b65f607a..8054d33f44 100644 --- a/Documentation/networking/sfp-phylink.rst +++ b/Documentation/networking/sfp-phylink.rst @@ -200,10 +200,12 @@ this documentation. when the in-band link state changes - otherwise the link will never come up. - The :c:func:`validate` method should mask the supplied supported mask, - and ``state->advertising`` with the supported ethtool link modes. - These are the new ethtool link modes, so bitmask operations must be - used. For an example, see ``drivers/net/ethernet/marvell/mvneta.c``. + The :c:func:`mac_get_caps` method is optional, and if provided should + return the phylink MAC capabilities that are supported for the passed + ``interface`` mode. In general, there is no need to implement this method. + Phylink will use these capabilities in combination with permissible + capabilities for ``interface`` to determine the allowable ethtool link + modes. The :c:func:`mac_link_state` method is used to read the link state from the MAC, and report back the settings that the MAC is currently diff --git a/Documentation/networking/smc-sysctl.rst b/Documentation/networking/smc-sysctl.rst index 6d8acdbe9b..769149d987 100644 --- a/Documentation/networking/smc-sysctl.rst +++ b/Documentation/networking/smc-sysctl.rst @@ -44,18 +44,16 @@ smcr_testlink_time - INTEGER wmem - INTEGER Initial size of send buffer used by SMC sockets. - The default value inherits from net.ipv4.tcp_wmem[1]. The minimum value is 16KiB and there is no hard limit for max value, but only allowed 512KiB for SMC-R and 1MiB for SMC-D. - Default: 16K + Default: 64KiB rmem - INTEGER Initial size of receive buffer (RMB) used by SMC sockets. - The default value inherits from net.ipv4.tcp_rmem[1]. The minimum value is 16KiB and there is no hard limit for max value, but only allowed 512KiB for SMC-R and 1MiB for SMC-D. - Default: 128K + Default: 64KiB diff --git a/Documentation/networking/tcp_ao.rst b/Documentation/networking/tcp_ao.rst new file mode 100644 index 0000000000..8a58321acc --- /dev/null +++ b/Documentation/networking/tcp_ao.rst @@ -0,0 +1,444 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================================================== +TCP Authentication Option Linux implementation (RFC5925) +======================================================== + +TCP Authentication Option (TCP-AO) provides a TCP extension aimed at verifying +segments between trusted peers. It adds a new TCP header option with +a Message Authentication Code (MAC). MACs are produced from the content +of a TCP segment using a hashing function with a password known to both peers. +The intent of TCP-AO is to deprecate TCP-MD5 providing better security, +key rotation and support for variety of hashing algorithms. + +1. Introduction +=============== + +.. table:: Short and Limited Comparison of TCP-AO and TCP-MD5 + + +----------------------+------------------------+-----------------------+ + | | TCP-MD5 | TCP-AO | + +======================+========================+=======================+ + |Supported hashing |MD5 |Must support HMAC-SHA1 | + |algorithms |(cryptographically weak)|(chosen-prefix attacks)| + | | |and CMAC-AES-128 (only | + | | |side-channel attacks). | + | | |May support any hashing| + | | |algorithm. | + +----------------------+------------------------+-----------------------+ + |Length of MACs (bytes)|16 |Typically 12-16. | + | | |Other variants that fit| + | | |TCP header permitted. | + +----------------------+------------------------+-----------------------+ + |Number of keys per |1 |Many | + |TCP connection | | | + +----------------------+------------------------+-----------------------+ + |Possibility to change |Non-practical (both |Supported by protocol | + |an active key |peers have to change | | + | |them during MSL) | | + +----------------------+------------------------+-----------------------+ + |Protection against |No |Yes: ignoring them | + |ICMP 'hard errors' | |by default on | + | | |established connections| + +----------------------+------------------------+-----------------------+ + |Protection against |No |Yes: pseudo-header | + |traffic-crossing | |includes TCP ports. | + |attack | | | + +----------------------+------------------------+-----------------------+ + |Protection against |No |Sequence Number | + |replayed TCP segments | |Extension (SNE) and | + | | |Initial Sequence | + | | |Numbers (ISNs) | + +----------------------+------------------------+-----------------------+ + |Supports |Yes |No. ISNs+SNE are needed| + |Connectionless Resets | |to correctly sign RST. | + +----------------------+------------------------+-----------------------+ + |Standards |RFC 2385 |RFC 5925, RFC 5926 | + +----------------------+------------------------+-----------------------+ + + +1.1 Frequently Asked Questions (FAQ) with references to RFC 5925 +---------------------------------------------------------------- + +Q: Can either SendID or RecvID be non-unique for the same 4-tuple +(srcaddr, srcport, dstaddr, dstport)? + +A: No [3.1]:: + + >> The IDs of MKTs MUST NOT overlap where their TCP connection + identifiers overlap. + +Q: Can Master Key Tuple (MKT) for an active connection be removed? + +A: No, unless it's copied to Transport Control Block (TCB) [3.1]:: + + It is presumed that an MKT affecting a particular connection cannot + be destroyed during an active connection -- or, equivalently, that + its parameters are copied to an area local to the connection (i.e., + instantiated) and so changes would affect only new connections. + +Q: If an old MKT needs to be deleted, how should it be done in order +to not remove it for an active connection? (As it can be still in use +at any moment later) + +A: Not specified by RFC 5925, seems to be a problem for key management +to ensure that no one uses such MKT before trying to remove it. + +Q: Can an old MKT exist forever and be used by another peer? + +A: It can, it's a key management task to decide when to remove an old key [6.1]:: + + Deciding when to start using a key is a performance issue. Deciding + when to remove an MKT is a security issue. Invalid MKTs are expected + to be removed. TCP-AO provides no mechanism to coordinate their removal, + as we consider this a key management operation. + +also [6.1]:: + + The only way to avoid reuse of previously used MKTs is to remove the MKT + when it is no longer considered permitted. + +Linux TCP-AO will try its best to prevent you from removing a key that's +being used, considering it a key management failure. But since keeping +an outdated key may become a security issue and as a peer may +unintentionally prevent the removal of an old key by always setting +it as RNextKeyID - a forced key removal mechanism is provided, where +userspace has to supply KeyID to use instead of the one that's being removed +and the kernel will atomically delete the old key, even if the peer is +still requesting it. There are no guarantees for force-delete as the peer +may yet not have the new key - the TCP connection may just break. +Alternatively, one may choose to shut down the socket. + +Q: What happens when a packet is received on a new connection with no known +MKT's RecvID? + +A: RFC 5925 specifies that by default it is accepted with a warning logged, but +the behaviour can be configured by the user [7.5.1.a]:: + + If the segment is a SYN, then this is the first segment of a new + connection. Find the matching MKT for this segment, using the segment's + socket pair and its TCP-AO KeyID, matched against the MKT's TCP connection + identifier and the MKT's RecvID. + + i. If there is no matching MKT, remove TCP-AO from the segment. + Proceed with further TCP handling of the segment. + NOTE: this presumes that connections that do not match any MKT + should be silently accepted, as noted in Section 7.3. + +[7.3]:: + + >> A TCP-AO implementation MUST allow for configuration of the behavior + of segments with TCP-AO but that do not match an MKT. The initial default + of this configuration SHOULD be to silently accept such connections. + If this is not the desired case, an MKT can be included to match such + connections, or the connection can indicate that TCP-AO is required. + Alternately, the configuration can be changed to discard segments with + the AO option not matching an MKT. + +[10.2.b]:: + + Connections not matching any MKT do not require TCP-AO. Further, incoming + segments with TCP-AO are not discarded solely because they include + the option, provided they do not match any MKT. + +Note that Linux TCP-AO implementation differs in this aspect. Currently, TCP-AO +segments with unknown key signatures are discarded with warnings logged. + +Q: Does the RFC imply centralized kernel key management in any way? +(i.e. that a key on all connections MUST be rotated at the same time?) + +A: Not specified. MKTs can be managed in userspace, the only relevant part to +key changes is [7.3]:: + + >> All TCP segments MUST be checked against the set of MKTs for matching + TCP connection identifiers. + +Q: What happens when RNextKeyID requested by a peer is unknown? Should +the connection be reset? + +A: It should not, no action needs to be performed [7.5.2.e]:: + + ii. If they differ, determine whether the RNextKeyID MKT is ready. + + 1. If the MKT corresponding to the segment’s socket pair and RNextKeyID + is not available, no action is required (RNextKeyID of a received + segment needs to match the MKT’s SendID). + +Q: How current_key is set and when does it change? It is a user-triggered +change, or is it by a request from the remote peer? Is it set by the user +explicitly, or by a matching rule? + +A: current_key is set by RNextKeyID [6.1]:: + + Rnext_key is changed only by manual user intervention or MKT management + protocol operation. It is not manipulated by TCP-AO. Current_key is updated + by TCP-AO when processing received TCP segments as discussed in the segment + processing description in Section 7.5. Note that the algorithm allows + the current_key to change to a new MKT, then change back to a previously + used MKT (known as "backing up"). This can occur during an MKT change when + segments are received out of order, and is considered a feature of TCP-AO, + because reordering does not result in drops. + +[7.5.2.e.ii]:: + + 2. If the matching MKT corresponding to the segment’s socket pair and + RNextKeyID is available: + + a. Set current_key to the RNextKeyID MKT. + +Q: If both peers have multiple MKTs matching the connection's socket pair +(with different KeyIDs), how should the sender/receiver pick KeyID to use? + +A: Some mechanism should pick the "desired" MKT [3.3]:: + + Multiple MKTs may match a single outgoing segment, e.g., when MKTs + are being changed. Those MKTs cannot have conflicting IDs (as noted + elsewhere), and some mechanism must determine which MKT to use for each + given outgoing segment. + + >> An outgoing TCP segment MUST match at most one desired MKT, indicated + by the segment’s socket pair. The segment MAY match multiple MKTs, provided + that exactly one MKT is indicated as desired. Other information in + the segment MAY be used to determine the desired MKT when multiple MKTs + match; such information MUST NOT include values in any TCP option fields. + +Q: Can TCP-MD5 connection migrate to TCP-AO (and vice-versa): + +A: No [1]:: + + TCP MD5-protected connections cannot be migrated to TCP-AO because TCP MD5 + does not support any changes to a connection’s security algorithm + once established. + +Q: If all MKTs are removed on a connection, can it become a non-TCP-AO signed +connection? + +A: [7.5.2] doesn't have the same choice as SYN packet handling in [7.5.1.i] +that would allow accepting segments without a sign (which would be insecure). +While switching to non-TCP-AO connection is not prohibited directly, it seems +what the RFC means. Also, there's a requirement for TCP-AO connections to +always have one current_key [3.3]:: + + TCP-AO requires that every protected TCP segment match exactly one MKT. + +[3.3]:: + + >> An incoming TCP segment including TCP-AO MUST match exactly one MKT, + indicated solely by the segment’s socket pair and its TCP-AO KeyID. + +[4.4]:: + + One or more MKTs. These are the MKTs that match this connection’s + socket pair. + +Q: Can a non-TCP-AO connection become a TCP-AO-enabled one? + +A: No: for already established non-TCP-AO connection it would be impossible +to switch using TCP-AO as the traffic key generation requires the initial +sequence numbers. Paraphrasing, starting using TCP-AO would require +re-establishing the TCP connection. + +2. In-kernel MKTs database vs database in userspace +=================================================== + +Linux TCP-AO support is implemented using ``setsockopt()s``, in a similar way +to TCP-MD5. It means that a userspace application that wants to use TCP-AO +should perform ``setsockopt()`` on a TCP socket when it wants to add, +remove or rotate MKTs. This approach moves the key management responsibility +to userspace as well as decisions on corner cases, i.e. what to do if +the peer doesn't respect RNextKeyID; moving more code to userspace, especially +responsible for the policy decisions. Besides, it's flexible and scales well +(with less locking needed than in the case of an in-kernel database). One also +should keep in mind that mainly intended users are BGP processes, not any +random applications, which means that compared to IPsec tunnels, +no transparency is really needed and modern BGP daemons already have +``setsockopt()s`` for TCP-MD5 support. + +.. table:: Considered pros and cons of the approaches + + +----------------------+------------------------+-----------------------+ + | | ``setsockopt()`` | in-kernel DB | + +======================+========================+=======================+ + | Extendability | ``setsockopt()`` | Netlink messages are | + | | commands should be | simple and extendable | + | | extendable syscalls | | + +----------------------+------------------------+-----------------------+ + | Required userspace | BGP or any application | could be transparent | + | changes | that wants TCP-AO needs| as tunnels, providing | + | | to perform | something like | + | | ``setsockopt()s`` | ``ip tcpao add key`` | + | | and do key management | (delete/show/rotate) | + +----------------------+------------------------+-----------------------+ + |MKTs removal or adding| harder for userspace | harder for kernel | + +----------------------+------------------------+-----------------------+ + | Dump-ability | ``getsockopt()`` | Netlink .dump() | + | | | callback | + +----------------------+------------------------+-----------------------+ + | Limits on kernel | equal | + | resources/memory | | + +----------------------+------------------------+-----------------------+ + | Scalability | contention on | contention on | + | | ``TCP_LISTEN`` sockets | the whole database | + +----------------------+------------------------+-----------------------+ + | Monitoring & warnings| ``TCP_DIAG`` | same Netlink socket | + +----------------------+------------------------+-----------------------+ + | Matching of MKTs | half-problem: only | hard | + | | listen sockets | | + +----------------------+------------------------+-----------------------+ + + +3. uAPI +======= + +Linux provides a set of ``setsockopt()s`` and ``getsockopt()s`` that let +userspace manage TCP-AO on a per-socket basis. In order to add/delete MKTs +``TCP_AO_ADD_KEY`` and ``TCP_AO_DEL_KEY`` TCP socket options must be used +It is not allowed to add a key on an established non-TCP-AO connection +as well as to remove the last key from TCP-AO connection. + +``setsockopt(TCP_AO_DEL_KEY)`` command may specify ``tcp_ao_del::current_key`` ++ ``tcp_ao_del::set_current`` and/or ``tcp_ao_del::rnext`` ++ ``tcp_ao_del::set_rnext`` which makes such delete "forced": it +provides userspace a way to delete a key that's being used and atomically set +another one instead. This is not intended for normal use and should be used +only when the peer ignores RNextKeyID and keeps requesting/using an old key. +It provides a way to force-delete a key that's not trusted but may break +the TCP-AO connection. + +The usual/normal key-rotation can be performed with ``setsockopt(TCP_AO_INFO)``. +It also provides a uAPI to change per-socket TCP-AO settings, such as +ignoring ICMPs, as well as clear per-socket TCP-AO packet counters. +The corresponding ``getsockopt(TCP_AO_INFO)`` can be used to get those +per-socket TCP-AO settings. + +Another useful command is ``getsockopt(TCP_AO_GET_KEYS)``. One can use it +to list all MKTs on a TCP socket or use a filter to get keys for a specific +peer and/or sndid/rcvid, VRF L3 interface or get current_key/rnext_key. + +To repair TCP-AO connections ``setsockopt(TCP_AO_REPAIR)`` is available, +provided that the user previously has checkpointed/dumped the socket with +``getsockopt(TCP_AO_REPAIR)``. + +A tip here for scaled TCP_LISTEN sockets, that may have some thousands TCP-AO +keys, is: use filters in ``getsockopt(TCP_AO_GET_KEYS)`` and asynchronous +delete with ``setsockopt(TCP_AO_DEL_KEY)``. + +Linux TCP-AO also provides a bunch of segment counters that can be helpful +with troubleshooting/debugging issues. Every MKT has good/bad counters +that reflect how many packets passed/failed verification. +Each TCP-AO socket has the following counters: +- for good segments (properly signed) +- for bad segments (failed TCP-AO verification) +- for segments with unknown keys +- for segments where an AO signature was expected, but wasn't found +- for the number of ignored ICMPs + +TCP-AO per-socket counters are also duplicated with per-netns counters, +exposed with SNMP. Those are ``TCPAOGood``, ``TCPAOBad``, ``TCPAOKeyNotFound``, +``TCPAORequired`` and ``TCPAODroppedIcmps``. + +RFC 5925 very permissively specifies how TCP port matching can be done for +MKTs:: + + TCP connection identifier. A TCP socket pair, i.e., a local IP + address, a remote IP address, a TCP local port, and a TCP remote port. + Values can be partially specified using ranges (e.g., 2-30), masks + (e.g., 0xF0), wildcards (e.g., "*"), or any other suitable indication. + +Currently Linux TCP-AO implementation doesn't provide any TCP port matching. +Probably, port ranges are the most flexible for uAPI, but so far +not implemented. + +4. ``setsockopt()`` vs ``accept()`` race +======================================== + +In contrast with TCP-MD5 established connection which has just one key, +TCP-AO connections may have many keys, which means that accepted connections +on a listen socket may have any amount of keys as well. As copying all those +keys on a first properly signed SYN would make the request socket bigger, that +would be undesirable. Currently, the implementation doesn't copy keys +to request sockets, but rather look them up on the "parent" listener socket. + +The result is that when userspace removes TCP-AO keys, that may break +not-yet-established connections on request sockets as well as not removing +keys from sockets that were already established, but not yet ``accept()``'ed, +hanging in the accept queue. + +The reverse is valid as well: if userspace adds a new key for a peer on +a listener socket, the established sockets in accept queue won't +have the new keys. + +At this moment, the resolution for the two races: +``setsockopt(TCP_AO_ADD_KEY)`` vs ``accept()`` +and ``setsockopt(TCP_AO_DEL_KEY)`` vs ``accept()`` is delegated to userspace. +This means that it's expected that userspace would check the MKTs on the socket +that was returned by ``accept()`` to verify that any key rotation that +happened on listen socket is reflected on the newly established connection. + +This is a similar "do-nothing" approach to TCP-MD5 from the kernel side and +may be changed later by introducing new flags to ``tcp_ao_add`` +and ``tcp_ao_del``. + +Note that this race is rare for it needs TCP-AO key rotation to happen +during the 3-way handshake for the new TCP connection. + +5. Interaction with TCP-MD5 +=========================== + +A TCP connection can not migrate between TCP-AO and TCP-MD5 options. The +established sockets that have either AO or MD5 keys are restricted for +adding keys of the other option. + +For listening sockets the picture is different: BGP server may want to receive +both TCP-AO and (deprecated) TCP-MD5 clients. As a result, both types of keys +may be added to TCP_CLOSED or TCP_LISTEN sockets. It's not allowed to add +different types of keys for the same peer. + +6. SNE Linux implementation +=========================== + +RFC 5925 [6.2] describes the algorithm of how to extend TCP sequence numbers +with SNE. In short: TCP has to track the previous sequence numbers and set +sne_flag when the current SEQ number rolls over. The flag is cleared when +both current and previous SEQ numbers cross 0x7fff, which is 32Kb. + +In times when sne_flag is set, the algorithm compares SEQ for each packet with +0x7fff and if it's higher than 32Kb, it assumes that the packet should be +verified with SNE before the increment. As a result, there's +this [0; 32Kb] window, when packets with (SNE - 1) can be accepted. + +Linux implementation simplifies this a bit: as the network stack already tracks +the first SEQ byte that ACK is wanted for (snd_una) and the next SEQ byte that +is wanted (rcv_nxt) - that's enough information for a rough estimation +on where in the 4GB SEQ number space both sender and receiver are. +When they roll over to zero, the corresponding SNE gets incremented. + +tcp_ao_compute_sne() is called for each TCP-AO segment. It compares SEQ numbers +from the segment with snd_una or rcv_nxt and fits the result into a 2GB window around them, +detecting SEQ numbers rolling over. That simplifies the code a lot and only +requires SNE numbers to be stored on every TCP-AO socket. + +The 2GB window at first glance seems much more permissive compared to +RFC 5926. But that is only used to pick the correct SNE before/after +a rollover. It allows more TCP segment replays, but yet all regular +TCP checks in tcp_sequence() are applied on the verified segment. +So, it trades a bit more permissive acceptance of replayed/retransmitted +segments for the simplicity of the algorithm and what seems better behaviour +for large TCP windows. + +7. Links +======== + +RFC 5925 The TCP Authentication Option + https://www.rfc-editor.org/rfc/pdfrfc/rfc5925.txt.pdf + +RFC 5926 Cryptographic Algorithms for the TCP Authentication Option (TCP-AO) + https://www.rfc-editor.org/rfc/pdfrfc/rfc5926.txt.pdf + +Draft "SHA-2 Algorithm for the TCP Authentication Option (TCP-AO)" + https://datatracker.ietf.org/doc/html/draft-nayak-tcp-sha2-03 + +RFC 2385 Protection of BGP Sessions via the TCP MD5 Signature Option + https://www.rfc-editor.org/rfc/pdfrfc/rfc2385.txt.pdf + +:Author: Dmitry Safonov diff --git a/Documentation/networking/xdp-rx-metadata.rst b/Documentation/networking/xdp-rx-metadata.rst index 25ce72af81..205696780b 100644 --- a/Documentation/networking/xdp-rx-metadata.rst +++ b/Documentation/networking/xdp-rx-metadata.rst @@ -105,6 +105,13 @@ bpf_tail_call Adding programs that access metadata kfuncs to the ``BPF_MAP_TYPE_PROG_ARRAY`` is currently not supported. +Supported Devices +================= + +It is possible to query which kfunc the particular netdev implements via +netlink. See ``xdp-rx-metadata-features`` attribute set in +``Documentation/netlink/specs/netdev.yaml``. + Example ======= diff --git a/Documentation/powerpc/associativity.rst b/Documentation/powerpc/associativity.rst deleted file mode 100644 index 4d01c73685..0000000000 --- a/Documentation/powerpc/associativity.rst +++ /dev/null @@ -1,105 +0,0 @@ -============================ -NUMA resource associativity -============================ - -Associativity represents the groupings of the various platform resources into -domains of substantially similar mean performance relative to resources outside -of that domain. Resources subsets of a given domain that exhibit better -performance relative to each other than relative to other resources subsets -are represented as being members of a sub-grouping domain. This performance -characteristic is presented in terms of NUMA node distance within the Linux kernel. -From the platform view, these groups are also referred to as domains. - -PAPR interface currently supports different ways of communicating these resource -grouping details to the OS. These are referred to as Form 0, Form 1 and Form2 -associativity grouping. Form 0 is the oldest format and is now considered deprecated. - -Hypervisor indicates the type/form of associativity used via "ibm,architecture-vec-5 property". -Bit 0 of byte 5 in the "ibm,architecture-vec-5" property indicates usage of Form 0 or Form 1. -A value of 1 indicates the usage of Form 1 associativity. For Form 2 associativity -bit 2 of byte 5 in the "ibm,architecture-vec-5" property is used. - -Form 0 ------- -Form 0 associativity supports only two NUMA distances (LOCAL and REMOTE). - -Form 1 ------- -With Form 1 a combination of ibm,associativity-reference-points, and ibm,associativity -device tree properties are used to determine the NUMA distance between resource groups/domains. - -The “ibm,associativity” property contains a list of one or more numbers (domainID) -representing the resource’s platform grouping domains. - -The “ibm,associativity-reference-points” property contains a list of one or more numbers -(domainID index) that represents the 1 based ordinal in the associativity lists. -The list of domainID indexes represents an increasing hierarchy of resource grouping. - -ex: -{ primary domainID index, secondary domainID index, tertiary domainID index.. } - -Linux kernel uses the domainID at the primary domainID index as the NUMA node id. -Linux kernel computes NUMA distance between two domains by recursively comparing -if they belong to the same higher-level domains. For mismatch at every higher -level of the resource group, the kernel doubles the NUMA distance between the -comparing domains. - -Form 2 -------- -Form 2 associativity format adds separate device tree properties representing NUMA node distance -thereby making the node distance computation flexible. Form 2 also allows flexible primary -domain numbering. With numa distance computation now detached from the index value in -"ibm,associativity-reference-points" property, Form 2 allows a large number of primary domain -ids at the same domainID index representing resource groups of different performance/latency -characteristics. - -Hypervisor indicates the usage of FORM2 associativity using bit 2 of byte 5 in the -"ibm,architecture-vec-5" property. - -"ibm,numa-lookup-index-table" property contains a list of one or more numbers representing -the domainIDs present in the system. The offset of the domainID in this property is -used as an index while computing numa distance information via "ibm,numa-distance-table". - -prop-encoded-array: The number N of the domainIDs encoded as with encode-int, followed by -N domainID encoded as with encode-int - -For ex: -"ibm,numa-lookup-index-table" = {4, 0, 8, 250, 252}. The offset of domainID 8 (2) is used when -computing the distance of domain 8 from other domains present in the system. For the rest of -this document, this offset will be referred to as domain distance offset. - -"ibm,numa-distance-table" property contains a list of one or more numbers representing the NUMA -distance between resource groups/domains present in the system. - -prop-encoded-array: The number N of the distance values encoded as with encode-int, followed by -N distance values encoded as with encode-bytes. The max distance value we could encode is 255. -The number N must be equal to the square of m where m is the number of domainIDs in the -numa-lookup-index-table. - -For ex: -ibm,numa-lookup-index-table = <3 0 8 40>; -ibm,numa-distace-table = <9>, /bits/ 8 < 10 20 80 20 10 160 80 160 10>; - -:: - - | 0 8 40 - --|------------ - | - 0 | 10 20 80 - | - 8 | 20 10 160 - | - 40| 80 160 10 - -A possible "ibm,associativity" property for resources in node 0, 8 and 40 - -{ 3, 6, 7, 0 } -{ 3, 6, 9, 8 } -{ 3, 6, 7, 40} - -With "ibm,associativity-reference-points" { 0x3 } - -"ibm,lookup-index-table" helps in having a compact representation of distance matrix. -Since domainID can be sparse, the matrix of distances can also be effectively sparse. -With "ibm,lookup-index-table" we can achieve a compact representation of -distance information. diff --git a/Documentation/powerpc/booting.rst b/Documentation/powerpc/booting.rst deleted file mode 100644 index 11aa440f98..0000000000 --- a/Documentation/powerpc/booting.rst +++ /dev/null @@ -1,110 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -DeviceTree Booting ------------------- - -During the development of the Linux/ppc64 kernel, and more specifically, the -addition of new platform types outside of the old IBM pSeries/iSeries pair, it -was decided to enforce some strict rules regarding the kernel entry and -bootloader <-> kernel interfaces, in order to avoid the degeneration that had -become the ppc32 kernel entry point and the way a new platform should be added -to the kernel. The legacy iSeries platform breaks those rules as it predates -this scheme, but no new board support will be accepted in the main tree that -doesn't follow them properly. In addition, since the advent of the arch/powerpc -merged architecture for ppc32 and ppc64, new 32-bit platforms and 32-bit -platforms which move into arch/powerpc will be required to use these rules as -well. - -The main requirement that will be defined in more detail below is the presence -of a device-tree whose format is defined after Open Firmware specification. -However, in order to make life easier to embedded board vendors, the kernel -doesn't require the device-tree to represent every device in the system and only -requires some nodes and properties to be present. For example, the kernel does -not require you to create a node for every PCI device in the system. It is a -requirement to have a node for PCI host bridges in order to provide interrupt -routing information and memory/IO ranges, among others. It is also recommended -to define nodes for on chip devices and other buses that don't specifically fit -in an existing OF specification. This creates a great flexibility in the way the -kernel can then probe those and match drivers to device, without having to hard -code all sorts of tables. It also makes it more flexible for board vendors to do -minor hardware upgrades without significantly impacting the kernel code or -cluttering it with special cases. - - -Entry point -~~~~~~~~~~~ - -There is one single entry point to the kernel, at the start -of the kernel image. That entry point supports two calling -conventions: - - a) Boot from Open Firmware. If your firmware is compatible - with Open Firmware (IEEE 1275) or provides an OF compatible - client interface API (support for "interpret" callback of - forth words isn't required), you can enter the kernel with: - - r5 : OF callback pointer as defined by IEEE 1275 - bindings to powerpc. Only the 32-bit client interface - is currently supported - - r3, r4 : address & length of an initrd if any or 0 - - The MMU is either on or off; the kernel will run the - trampoline located in arch/powerpc/kernel/prom_init.c to - extract the device-tree and other information from open - firmware and build a flattened device-tree as described - in b). prom_init() will then re-enter the kernel using - the second method. This trampoline code runs in the - context of the firmware, which is supposed to handle all - exceptions during that time. - - b) Direct entry with a flattened device-tree block. This entry - point is called by a) after the OF trampoline and can also be - called directly by a bootloader that does not support the Open - Firmware client interface. It is also used by "kexec" to - implement "hot" booting of a new kernel from a previous - running one. This method is what I will describe in more - details in this document, as method a) is simply standard Open - Firmware, and thus should be implemented according to the - various standard documents defining it and its binding to the - PowerPC platform. The entry point definition then becomes: - - r3 : physical pointer to the device-tree block - (defined in chapter II) in RAM - - r4 : physical pointer to the kernel itself. This is - used by the assembly code to properly disable the MMU - in case you are entering the kernel with MMU enabled - and a non-1:1 mapping. - - r5 : NULL (as to differentiate with method a) - -Note about SMP entry: Either your firmware puts your other -CPUs in some sleep loop or spin loop in ROM where you can get -them out via a soft reset or some other means, in which case -you don't need to care, or you'll have to enter the kernel -with all CPUs. The way to do that with method b) will be -described in a later revision of this document. - -Board supports (platforms) are not exclusive config options. An -arbitrary set of board supports can be built in a single kernel -image. The kernel will "know" what set of functions to use for a -given platform based on the content of the device-tree. Thus, you -should: - - a) add your platform support as a _boolean_ option in - arch/powerpc/Kconfig, following the example of PPC_PSERIES, - PPC_PMAC and PPC_MAPLE. The latter is probably a good - example of a board support to start from. - - b) create your main platform file as - "arch/powerpc/platforms/myplatform/myboard_setup.c" and add it - to the Makefile under the condition of your ``CONFIG_`` - option. This file will define a structure of type "ppc_md" - containing the various callbacks that the generic code will - use to get to your platform specific code - -A kernel image may support multiple platforms, but only if the -platforms feature the same core architecture. A single kernel build -cannot support both configurations with Book E and configurations -with classic Powerpc architectures. diff --git a/Documentation/powerpc/bootwrapper.rst b/Documentation/powerpc/bootwrapper.rst deleted file mode 100644 index cdfa2bc842..0000000000 --- a/Documentation/powerpc/bootwrapper.rst +++ /dev/null @@ -1,131 +0,0 @@ -======================== -The PowerPC boot wrapper -======================== - -Copyright (C) Secret Lab Technologies Ltd. - -PowerPC image targets compresses and wraps the kernel image (vmlinux) with -a boot wrapper to make it usable by the system firmware. There is no -standard PowerPC firmware interface, so the boot wrapper is designed to -be adaptable for each kind of image that needs to be built. - -The boot wrapper can be found in the arch/powerpc/boot/ directory. The -Makefile in that directory has targets for all the available image types. -The different image types are used to support all of the various firmware -interfaces found on PowerPC platforms. OpenFirmware is the most commonly -used firmware type on general purpose PowerPC systems from Apple, IBM and -others. U-Boot is typically found on embedded PowerPC hardware, but there -are a handful of other firmware implementations which are also popular. Each -firmware interface requires a different image format. - -The boot wrapper is built from the makefile in arch/powerpc/boot/Makefile and -it uses the wrapper script (arch/powerpc/boot/wrapper) to generate target -image. The details of the build system is discussed in the next section. -Currently, the following image format targets exist: - - ==================== ======================================================== - cuImage.%: Backwards compatible uImage for older version of - U-Boot (for versions that don't understand the device - tree). This image embeds a device tree blob inside - the image. The boot wrapper, kernel and device tree - are all embedded inside the U-Boot uImage file format - with boot wrapper code that extracts data from the old - bd_info structure and loads the data into the device - tree before jumping into the kernel. - - Because of the series of #ifdefs found in the - bd_info structure used in the old U-Boot interfaces, - cuImages are platform specific. Each specific - U-Boot platform has a different platform init file - which populates the embedded device tree with data - from the platform specific bd_info file. The platform - specific cuImage platform init code can be found in - `arch/powerpc/boot/cuboot.*.c`. Selection of the correct - cuImage init code for a specific board can be found in - the wrapper structure. - - dtbImage.%: Similar to zImage, except device tree blob is embedded - inside the image instead of provided by firmware. The - output image file can be either an elf file or a flat - binary depending on the platform. - - dtbImages are used on systems which do not have an - interface for passing a device tree directly. - dtbImages are similar to simpleImages except that - dtbImages have platform specific code for extracting - data from the board firmware, but simpleImages do not - talk to the firmware at all. - - PlayStation 3 support uses dtbImage. So do Embedded - Planet boards using the PlanetCore firmware. Board - specific initialization code is typically found in a - file named arch/powerpc/boot/.c; but this - can be overridden by the wrapper script. - - simpleImage.%: Firmware independent compressed image that does not - depend on any particular firmware interface and embeds - a device tree blob. This image is a flat binary that - can be loaded to any location in RAM and jumped to. - Firmware cannot pass any configuration data to the - kernel with this image type and it depends entirely on - the embedded device tree for all information. - - treeImage.%; Image format for used with OpenBIOS firmware found - on some ppc4xx hardware. This image embeds a device - tree blob inside the image. - - uImage: Native image format used by U-Boot. The uImage target - does not add any boot code. It just wraps a compressed - vmlinux in the uImage data structure. This image - requires a version of U-Boot that is able to pass - a device tree to the kernel at boot. If using an older - version of U-Boot, then you need to use a cuImage - instead. - - zImage.%: Image format which does not embed a device tree. - Used by OpenFirmware and other firmware interfaces - which are able to supply a device tree. This image - expects firmware to provide the device tree at boot. - Typically, if you have general purpose PowerPC - hardware then you want this image format. - ==================== ======================================================== - -Image types which embed a device tree blob (simpleImage, dtbImage, treeImage, -and cuImage) all generate the device tree blob from a file in the -arch/powerpc/boot/dts/ directory. The Makefile selects the correct device -tree source based on the name of the target. Therefore, if the kernel is -built with 'make treeImage.walnut', then the build system will use -arch/powerpc/boot/dts/walnut.dts to build treeImage.walnut. - -Two special targets called 'zImage' and 'zImage.initrd' also exist. These -targets build all the default images as selected by the kernel configuration. -Default images are selected by the boot wrapper Makefile -(arch/powerpc/boot/Makefile) by adding targets to the $image-y variable. Look -at the Makefile to see which default image targets are available. - -How it is built ---------------- -arch/powerpc is designed to support multiplatform kernels, which means -that a single vmlinux image can be booted on many different target boards. -It also means that the boot wrapper must be able to wrap for many kinds of -images on a single build. The design decision was made to not use any -conditional compilation code (#ifdef, etc) in the boot wrapper source code. -All of the boot wrapper pieces are buildable at any time regardless of the -kernel configuration. Building all the wrapper bits on every kernel build -also ensures that obscure parts of the wrapper are at the very least compile -tested in a large variety of environments. - -The wrapper is adapted for different image types at link time by linking in -just the wrapper bits that are appropriate for the image type. The 'wrapper -script' (found in arch/powerpc/boot/wrapper) is called by the Makefile and -is responsible for selecting the correct wrapper bits for the image type. -The arguments are well documented in the script's comment block, so they -are not repeated here. However, it is worth mentioning that the script -uses the -p (platform) argument as the main method of deciding which wrapper -bits to compile in. Look for the large 'case "$platform" in' block in the -middle of the script. This is also the place where platform specific fixups -can be selected by changing the link order. - -In particular, care should be taken when working with cuImages. cuImage -wrapper bits are very board specific and care should be taken to make sure -the target you are trying to build is supported by the wrapper bits. diff --git a/Documentation/powerpc/cpu_families.rst b/Documentation/powerpc/cpu_families.rst deleted file mode 100644 index eb7e60649b..0000000000 --- a/Documentation/powerpc/cpu_families.rst +++ /dev/null @@ -1,237 +0,0 @@ -============ -CPU Families -============ - -This document tries to summarise some of the different cpu families that exist -and are supported by arch/powerpc. - - -Book3S (aka sPAPR) ------------------- - -- Hash MMU (except 603 and e300) -- Radix MMU (POWER9 and later) -- Software loaded TLB (603 and e300) -- Selectable Software loaded TLB in addition to hash MMU (755, 7450, e600) -- Mix of 32 & 64 bit:: - - +--------------+ +----------------+ - | Old POWER | --------------> | RS64 (threads) | - +--------------+ +----------------+ - | - | - v - +--------------+ +----------------+ +------+ - | 601 | --------------> | 603 | ---> | e300 | - +--------------+ +----------------+ +------+ - | | - | | - v v - +--------------+ +-----+ +----------------+ +-------+ - | 604 | | 755 | <--- | 750 (G3) | ---> | 750CX | - +--------------+ +-----+ +----------------+ +-------+ - | | | - | | | - v v v - +--------------+ +----------------+ +-------+ - | 620 (64 bit) | | 7400 | | 750CL | - +--------------+ +----------------+ +-------+ - | | | - | | | - v v v - +--------------+ +----------------+ +-------+ - | POWER3/630 | | 7410 | | 750FX | - +--------------+ +----------------+ +-------+ - | | - | | - v v - +--------------+ +----------------+ - | POWER3+ | | 7450 | - +--------------+ +----------------+ - | | - | | - v v - +--------------+ +----------------+ - | POWER4 | | 7455 | - +--------------+ +----------------+ - | | - | | - v v - +--------------+ +-------+ +----------------+ - | POWER4+ | --> | 970 | | 7447 | - +--------------+ +-------+ +----------------+ - | | | - | | | - v v v - +--------------+ +-------+ +----------------+ - | POWER5 | | 970FX | | 7448 | - +--------------+ +-------+ +----------------+ - | | | - | | | - v v v - +--------------+ +-------+ +----------------+ - | POWER5+ | | 970MP | | e600 | - +--------------+ +-------+ +----------------+ - | - | - v - +--------------+ - | POWER5++ | - +--------------+ - | - | - v - +--------------+ +-------+ - | POWER6 | <-?-> | Cell | - +--------------+ +-------+ - | - | - v - +--------------+ - | POWER7 | - +--------------+ - | - | - v - +--------------+ - | POWER7+ | - +--------------+ - | - | - v - +--------------+ - | POWER8 | - +--------------+ - | - | - v - +--------------+ - | POWER9 | - +--------------+ - | - | - v - +--------------+ - | POWER10 | - +--------------+ - - - +---------------+ - | PA6T (64 bit) | - +---------------+ - - -IBM BookE ---------- - -- Software loaded TLB. -- All 32 bit:: - - +--------------+ - | 401 | - +--------------+ - | - | - v - +--------------+ - | 403 | - +--------------+ - | - | - v - +--------------+ - | 405 | - +--------------+ - | - | - v - +--------------+ - | 440 | - +--------------+ - | - | - v - +--------------+ +----------------+ - | 450 | --> | BG/P | - +--------------+ +----------------+ - | - | - v - +--------------+ - | 460 | - +--------------+ - | - | - v - +--------------+ - | 476 | - +--------------+ - - -Motorola/Freescale 8xx ----------------------- - -- Software loaded with hardware assist. -- All 32 bit:: - - +-------------+ - | MPC8xx Core | - +-------------+ - - -Freescale BookE ---------------- - -- Software loaded TLB. -- e6500 adds HW loaded indirect TLB entries. -- Mix of 32 & 64 bit:: - - +--------------+ - | e200 | - +--------------+ - - - +--------------------------------+ - | e500 | - +--------------------------------+ - | - | - v - +--------------------------------+ - | e500v2 | - +--------------------------------+ - | - | - v - +--------------------------------+ - | e500mc (Book3e) | - +--------------------------------+ - | - | - v - +--------------------------------+ - | e5500 (64 bit) | - +--------------------------------+ - | - | - v - +--------------------------------+ - | e6500 (HW TLB) (Multithreaded) | - +--------------------------------+ - - -IBM A2 core ------------ - -- Book3E, software loaded TLB + HW loaded indirect TLB entries. -- 64 bit:: - - +--------------+ +----------------+ - | A2 core | --> | WSP | - +--------------+ +----------------+ - | - | - v - +--------------+ - | BG/Q | - +--------------+ diff --git a/Documentation/powerpc/cpu_features.rst b/Documentation/powerpc/cpu_features.rst deleted file mode 100644 index b7bcdd2f41..0000000000 --- a/Documentation/powerpc/cpu_features.rst +++ /dev/null @@ -1,60 +0,0 @@ -============ -CPU Features -============ - -Hollis Blanchard -5 Jun 2002 - -This document describes the system (including self-modifying code) used in the -PPC Linux kernel to support a variety of PowerPC CPUs without requiring -compile-time selection. - -Early in the boot process the ppc32 kernel detects the current CPU type and -chooses a set of features accordingly. Some examples include Altivec support, -split instruction and data caches, and if the CPU supports the DOZE and NAP -sleep modes. - -Detection of the feature set is simple. A list of processors can be found in -arch/powerpc/kernel/cputable.c. The PVR register is masked and compared with -each value in the list. If a match is found, the cpu_features of cur_cpu_spec -is assigned to the feature bitmask for this processor and a __setup_cpu -function is called. - -C code may test 'cur_cpu_spec[smp_processor_id()]->cpu_features' for a -particular feature bit. This is done in quite a few places, for example -in ppc_setup_l2cr(). - -Implementing cpufeatures in assembly is a little more involved. There are -several paths that are performance-critical and would suffer if an array -index, structure dereference, and conditional branch were added. To avoid the -performance penalty but still allow for runtime (rather than compile-time) CPU -selection, unused code is replaced by 'nop' instructions. This nop'ing is -based on CPU 0's capabilities, so a multi-processor system with non-identical -processors will not work (but such a system would likely have other problems -anyways). - -After detecting the processor type, the kernel patches out sections of code -that shouldn't be used by writing nop's over it. Using cpufeatures requires -just 2 macros (found in arch/powerpc/include/asm/cputable.h), as seen in head.S -transfer_to_handler:: - - #ifdef CONFIG_ALTIVEC - BEGIN_FTR_SECTION - mfspr r22,SPRN_VRSAVE /* if G4, save vrsave register value */ - stw r22,THREAD_VRSAVE(r23) - END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) - #endif /* CONFIG_ALTIVEC */ - -If CPU 0 supports Altivec, the code is left untouched. If it doesn't, both -instructions are replaced with nop's. - -The END_FTR_SECTION macro has two simpler variations: END_FTR_SECTION_IFSET -and END_FTR_SECTION_IFCLR. These simply test if a flag is set (in -cur_cpu_spec[0]->cpu_features) or is cleared, respectively. These two macros -should be used in the majority of cases. - -The END_FTR_SECTION macros are implemented by storing information about this -code in the '__ftr_fixup' ELF section. When do_cpu_ftr_fixups -(arch/powerpc/kernel/misc.S) is invoked, it will iterate over the records in -__ftr_fixup, and if the required feature is not present it will loop writing -nop's from each BEGIN_FTR_SECTION to END_FTR_SECTION. diff --git a/Documentation/powerpc/cxl.rst b/Documentation/powerpc/cxl.rst deleted file mode 100644 index d2d7705761..0000000000 --- a/Documentation/powerpc/cxl.rst +++ /dev/null @@ -1,469 +0,0 @@ -==================================== -Coherent Accelerator Interface (CXL) -==================================== - -Introduction -============ - - The coherent accelerator interface is designed to allow the - coherent connection of accelerators (FPGAs and other devices) to a - POWER system. These devices need to adhere to the Coherent - Accelerator Interface Architecture (CAIA). - - IBM refers to this as the Coherent Accelerator Processor Interface - or CAPI. In the kernel it's referred to by the name CXL to avoid - confusion with the ISDN CAPI subsystem. - - Coherent in this context means that the accelerator and CPUs can - both access system memory directly and with the same effective - addresses. - - -Hardware overview -================= - - :: - - POWER8/9 FPGA - +----------+ +---------+ - | | | | - | CPU | | AFU | - | | | | - | | | | - | | | | - +----------+ +---------+ - | PHB | | | - | +------+ | PSL | - | | CAPP |<------>| | - +---+------+ PCIE +---------+ - - The POWER8/9 chip has a Coherently Attached Processor Proxy (CAPP) - unit which is part of the PCIe Host Bridge (PHB). This is managed - by Linux by calls into OPAL. Linux doesn't directly program the - CAPP. - - The FPGA (or coherently attached device) consists of two parts. - The POWER Service Layer (PSL) and the Accelerator Function Unit - (AFU). The AFU is used to implement specific functionality behind - the PSL. The PSL, among other things, provides memory address - translation services to allow each AFU direct access to userspace - memory. - - The AFU is the core part of the accelerator (eg. the compression, - crypto etc function). The kernel has no knowledge of the function - of the AFU. Only userspace interacts directly with the AFU. - - The PSL provides the translation and interrupt services that the - AFU needs. This is what the kernel interacts with. For example, if - the AFU needs to read a particular effective address, it sends - that address to the PSL, the PSL then translates it, fetches the - data from memory and returns it to the AFU. If the PSL has a - translation miss, it interrupts the kernel and the kernel services - the fault. The context to which this fault is serviced is based on - who owns that acceleration function. - - - POWER8 and PSL Version 8 are compliant to the CAIA Version 1.0. - - POWER9 and PSL Version 9 are compliant to the CAIA Version 2.0. - - This PSL Version 9 provides new features such as: - - * Interaction with the nest MMU on the P9 chip. - * Native DMA support. - * Supports sending ASB_Notify messages for host thread wakeup. - * Supports Atomic operations. - * etc. - - Cards with a PSL9 won't work on a POWER8 system and cards with a - PSL8 won't work on a POWER9 system. - -AFU Modes -========= - - There are two programming modes supported by the AFU. Dedicated - and AFU directed. AFU may support one or both modes. - - When using dedicated mode only one MMU context is supported. In - this mode, only one userspace process can use the accelerator at - time. - - When using AFU directed mode, up to 16K simultaneous contexts can - be supported. This means up to 16K simultaneous userspace - applications may use the accelerator (although specific AFUs may - support fewer). In this mode, the AFU sends a 16 bit context ID - with each of its requests. This tells the PSL which context is - associated with each operation. If the PSL can't translate an - operation, the ID can also be accessed by the kernel so it can - determine the userspace context associated with an operation. - - -MMIO space -========== - - A portion of the accelerator MMIO space can be directly mapped - from the AFU to userspace. Either the whole space can be mapped or - just a per context portion. The hardware is self describing, hence - the kernel can determine the offset and size of the per context - portion. - - -Interrupts -========== - - AFUs may generate interrupts that are destined for userspace. These - are received by the kernel as hardware interrupts and passed onto - userspace by a read syscall documented below. - - Data storage faults and error interrupts are handled by the kernel - driver. - - -Work Element Descriptor (WED) -============================= - - The WED is a 64-bit parameter passed to the AFU when a context is - started. Its format is up to the AFU hence the kernel has no - knowledge of what it represents. Typically it will be the - effective address of a work queue or status block where the AFU - and userspace can share control and status information. - - - - -User API -======== - -1. AFU character devices -^^^^^^^^^^^^^^^^^^^^^^^^ - - For AFUs operating in AFU directed mode, two character device - files will be created. /dev/cxl/afu0.0m will correspond to a - master context and /dev/cxl/afu0.0s will correspond to a slave - context. Master contexts have access to the full MMIO space an - AFU provides. Slave contexts have access to only the per process - MMIO space an AFU provides. - - For AFUs operating in dedicated process mode, the driver will - only create a single character device per AFU called - /dev/cxl/afu0.0d. This will have access to the entire MMIO space - that the AFU provides (like master contexts in AFU directed). - - The types described below are defined in include/uapi/misc/cxl.h - - The following file operations are supported on both slave and - master devices. - - A userspace library libcxl is available here: - - https://github.com/ibm-capi/libcxl - - This provides a C interface to this kernel API. - -open ----- - - Opens the device and allocates a file descriptor to be used with - the rest of the API. - - A dedicated mode AFU only has one context and only allows the - device to be opened once. - - An AFU directed mode AFU can have many contexts, the device can be - opened once for each context that is available. - - When all available contexts are allocated the open call will fail - and return -ENOSPC. - - Note: - IRQs need to be allocated for each context, which may limit - the number of contexts that can be created, and therefore - how many times the device can be opened. The POWER8 CAPP - supports 2040 IRQs and 3 are used by the kernel, so 2037 are - left. If 1 IRQ is needed per context, then only 2037 - contexts can be allocated. If 4 IRQs are needed per context, - then only 2037/4 = 509 contexts can be allocated. - - -ioctl ------ - - CXL_IOCTL_START_WORK: - Starts the AFU context and associates it with the current - process. Once this ioctl is successfully executed, all memory - mapped into this process is accessible to this AFU context - using the same effective addresses. No additional calls are - required to map/unmap memory. The AFU memory context will be - updated as userspace allocates and frees memory. This ioctl - returns once the AFU context is started. - - Takes a pointer to a struct cxl_ioctl_start_work - - :: - - struct cxl_ioctl_start_work { - __u64 flags; - __u64 work_element_descriptor; - __u64 amr; - __s16 num_interrupts; - __s16 reserved1; - __s32 reserved2; - __u64 reserved3; - __u64 reserved4; - __u64 reserved5; - __u64 reserved6; - }; - - flags: - Indicates which optional fields in the structure are - valid. - - work_element_descriptor: - The Work Element Descriptor (WED) is a 64-bit argument - defined by the AFU. Typically this is an effective - address pointing to an AFU specific structure - describing what work to perform. - - amr: - Authority Mask Register (AMR), same as the powerpc - AMR. This field is only used by the kernel when the - corresponding CXL_START_WORK_AMR value is specified in - flags. If not specified the kernel will use a default - value of 0. - - num_interrupts: - Number of userspace interrupts to request. This field - is only used by the kernel when the corresponding - CXL_START_WORK_NUM_IRQS value is specified in flags. - If not specified the minimum number required by the - AFU will be allocated. The min and max number can be - obtained from sysfs. - - reserved fields: - For ABI padding and future extensions - - CXL_IOCTL_GET_PROCESS_ELEMENT: - Get the current context id, also known as the process element. - The value is returned from the kernel as a __u32. - - -mmap ----- - - An AFU may have an MMIO space to facilitate communication with the - AFU. If it does, the MMIO space can be accessed via mmap. The size - and contents of this area are specific to the particular AFU. The - size can be discovered via sysfs. - - In AFU directed mode, master contexts are allowed to map all of - the MMIO space and slave contexts are allowed to only map the per - process MMIO space associated with the context. In dedicated - process mode the entire MMIO space can always be mapped. - - This mmap call must be done after the START_WORK ioctl. - - Care should be taken when accessing MMIO space. Only 32 and 64-bit - accesses are supported by POWER8. Also, the AFU will be designed - with a specific endianness, so all MMIO accesses should consider - endianness (recommend endian(3) variants like: le64toh(), - be64toh() etc). These endian issues equally apply to shared memory - queues the WED may describe. - - -read ----- - - Reads events from the AFU. Blocks if no events are pending - (unless O_NONBLOCK is supplied). Returns -EIO in the case of an - unrecoverable error or if the card is removed. - - read() will always return an integral number of events. - - The buffer passed to read() must be at least 4K bytes. - - The result of the read will be a buffer of one or more events, - each event is of type struct cxl_event, of varying size:: - - struct cxl_event { - struct cxl_event_header header; - union { - struct cxl_event_afu_interrupt irq; - struct cxl_event_data_storage fault; - struct cxl_event_afu_error afu_error; - }; - }; - - The struct cxl_event_header is defined as - - :: - - struct cxl_event_header { - __u16 type; - __u16 size; - __u16 process_element; - __u16 reserved1; - }; - - type: - This defines the type of event. The type determines how - the rest of the event is structured. These types are - described below and defined by enum cxl_event_type. - - size: - This is the size of the event in bytes including the - struct cxl_event_header. The start of the next event can - be found at this offset from the start of the current - event. - - process_element: - Context ID of the event. - - reserved field: - For future extensions and padding. - - If the event type is CXL_EVENT_AFU_INTERRUPT then the event - structure is defined as - - :: - - struct cxl_event_afu_interrupt { - __u16 flags; - __u16 irq; /* Raised AFU interrupt number */ - __u32 reserved1; - }; - - flags: - These flags indicate which optional fields are present - in this struct. Currently all fields are mandatory. - - irq: - The IRQ number sent by the AFU. - - reserved field: - For future extensions and padding. - - If the event type is CXL_EVENT_DATA_STORAGE then the event - structure is defined as - - :: - - struct cxl_event_data_storage { - __u16 flags; - __u16 reserved1; - __u32 reserved2; - __u64 addr; - __u64 dsisr; - __u64 reserved3; - }; - - flags: - These flags indicate which optional fields are present in - this struct. Currently all fields are mandatory. - - address: - The address that the AFU unsuccessfully attempted to - access. Valid accesses will be handled transparently by the - kernel but invalid accesses will generate this event. - - dsisr: - This field gives information on the type of fault. It is a - copy of the DSISR from the PSL hardware when the address - fault occurred. The form of the DSISR is as defined in the - CAIA. - - reserved fields: - For future extensions - - If the event type is CXL_EVENT_AFU_ERROR then the event structure - is defined as - - :: - - struct cxl_event_afu_error { - __u16 flags; - __u16 reserved1; - __u32 reserved2; - __u64 error; - }; - - flags: - These flags indicate which optional fields are present in - this struct. Currently all fields are Mandatory. - - error: - Error status from the AFU. Defined by the AFU. - - reserved fields: - For future extensions and padding - - -2. Card character device (powerVM guest only) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - In a powerVM guest, an extra character device is created for the - card. The device is only used to write (flash) a new image on the - FPGA accelerator. Once the image is written and verified, the - device tree is updated and the card is reset to reload the updated - image. - -open ----- - - Opens the device and allocates a file descriptor to be used with - the rest of the API. The device can only be opened once. - -ioctl ------ - -CXL_IOCTL_DOWNLOAD_IMAGE / CXL_IOCTL_VALIDATE_IMAGE: - Starts and controls flashing a new FPGA image. Partial - reconfiguration is not supported (yet), so the image must contain - a copy of the PSL and AFU(s). Since an image can be quite large, - the caller may have to iterate, splitting the image in smaller - chunks. - - Takes a pointer to a struct cxl_adapter_image:: - - struct cxl_adapter_image { - __u64 flags; - __u64 data; - __u64 len_data; - __u64 len_image; - __u64 reserved1; - __u64 reserved2; - __u64 reserved3; - __u64 reserved4; - }; - - flags: - These flags indicate which optional fields are present in - this struct. Currently all fields are mandatory. - - data: - Pointer to a buffer with part of the image to write to the - card. - - len_data: - Size of the buffer pointed to by data. - - len_image: - Full size of the image. - - -Sysfs Class -=========== - - A cxl sysfs class is added under /sys/class/cxl to facilitate - enumeration and tuning of the accelerators. Its layout is - described in Documentation/ABI/testing/sysfs-class-cxl - - -Udev rules -========== - - The following udev rules could be used to create a symlink to the - most logical chardev to use in any programming mode (afuX.Yd for - dedicated, afuX.Ys for afu directed), since the API is virtually - identical for each:: - - SUBSYSTEM=="cxl", ATTRS{mode}=="dedicated_process", SYMLINK="cxl/%b" - SUBSYSTEM=="cxl", ATTRS{mode}=="afu_directed", \ - KERNEL=="afu[0-9]*.[0-9]*s", SYMLINK="cxl/%b" diff --git a/Documentation/powerpc/cxlflash.rst b/Documentation/powerpc/cxlflash.rst deleted file mode 100644 index cea67931b3..0000000000 --- a/Documentation/powerpc/cxlflash.rst +++ /dev/null @@ -1,433 +0,0 @@ -================================ -Coherent Accelerator (CXL) Flash -================================ - -Introduction -============ - - The IBM Power architecture provides support for CAPI (Coherent - Accelerator Power Interface), which is available to certain PCIe slots - on Power 8 systems. CAPI can be thought of as a special tunneling - protocol through PCIe that allow PCIe adapters to look like special - purpose co-processors which can read or write an application's - memory and generate page faults. As a result, the host interface to - an adapter running in CAPI mode does not require the data buffers to - be mapped to the device's memory (IOMMU bypass) nor does it require - memory to be pinned. - - On Linux, Coherent Accelerator (CXL) kernel services present CAPI - devices as a PCI device by implementing a virtual PCI host bridge. - This abstraction simplifies the infrastructure and programming - model, allowing for drivers to look similar to other native PCI - device drivers. - - CXL provides a mechanism by which user space applications can - directly talk to a device (network or storage) bypassing the typical - kernel/device driver stack. The CXL Flash Adapter Driver enables a - user space application direct access to Flash storage. - - The CXL Flash Adapter Driver is a kernel module that sits in the - SCSI stack as a low level device driver (below the SCSI disk and - protocol drivers) for the IBM CXL Flash Adapter. This driver is - responsible for the initialization of the adapter, setting up the - special path for user space access, and performing error recovery. It - communicates directly the Flash Accelerator Functional Unit (AFU) - as described in Documentation/powerpc/cxl.rst. - - The cxlflash driver supports two, mutually exclusive, modes of - operation at the device (LUN) level: - - - Any flash device (LUN) can be configured to be accessed as a - regular disk device (i.e.: /dev/sdc). This is the default mode. - - - Any flash device (LUN) can be configured to be accessed from - user space with a special block library. This mode further - specifies the means of accessing the device and provides for - either raw access to the entire LUN (referred to as direct - or physical LUN access) or access to a kernel/AFU-mediated - partition of the LUN (referred to as virtual LUN access). The - segmentation of a disk device into virtual LUNs is assisted - by special translation services provided by the Flash AFU. - -Overview -======== - - The Coherent Accelerator Interface Architecture (CAIA) introduces a - concept of a master context. A master typically has special privileges - granted to it by the kernel or hypervisor allowing it to perform AFU - wide management and control. The master may or may not be involved - directly in each user I/O, but at the minimum is involved in the - initial setup before the user application is allowed to send requests - directly to the AFU. - - The CXL Flash Adapter Driver establishes a master context with the - AFU. It uses memory mapped I/O (MMIO) for this control and setup. The - Adapter Problem Space Memory Map looks like this:: - - +-------------------------------+ - | 512 * 64 KB User MMIO | - | (per context) | - | User Accessible | - +-------------------------------+ - | 512 * 128 B per context | - | Provisioning and Control | - | Trusted Process accessible | - +-------------------------------+ - | 64 KB Global | - | Trusted Process accessible | - +-------------------------------+ - - This driver configures itself into the SCSI software stack as an - adapter driver. The driver is the only entity that is considered a - Trusted Process to program the Provisioning and Control and Global - areas in the MMIO Space shown above. The master context driver - discovers all LUNs attached to the CXL Flash adapter and instantiates - scsi block devices (/dev/sdb, /dev/sdc etc.) for each unique LUN - seen from each path. - - Once these scsi block devices are instantiated, an application - written to a specification provided by the block library may get - access to the Flash from user space (without requiring a system call). - - This master context driver also provides a series of ioctls for this - block library to enable this user space access. The driver supports - two modes for accessing the block device. - - The first mode is called a virtual mode. In this mode a single scsi - block device (/dev/sdb) may be carved up into any number of distinct - virtual LUNs. The virtual LUNs may be resized as long as the sum of - the sizes of all the virtual LUNs, along with the meta-data associated - with it does not exceed the physical capacity. - - The second mode is called the physical mode. In this mode a single - block device (/dev/sdb) may be opened directly by the block library - and the entire space for the LUN is available to the application. - - Only the physical mode provides persistence of the data. i.e. The - data written to the block device will survive application exit and - restart and also reboot. The virtual LUNs do not persist (i.e. do - not survive after the application terminates or the system reboots). - - -Block library API -================= - - Applications intending to get access to the CXL Flash from user - space should use the block library, as it abstracts the details of - interfacing directly with the cxlflash driver that are necessary for - performing administrative actions (i.e.: setup, tear down, resize). - The block library can be thought of as a 'user' of services, - implemented as IOCTLs, that are provided by the cxlflash driver - specifically for devices (LUNs) operating in user space access - mode. While it is not a requirement that applications understand - the interface between the block library and the cxlflash driver, - a high-level overview of each supported service (IOCTL) is provided - below. - - The block library can be found on GitHub: - http://github.com/open-power/capiflash - - -CXL Flash Driver LUN IOCTLs -=========================== - - Users, such as the block library, that wish to interface with a flash - device (LUN) via user space access need to use the services provided - by the cxlflash driver. As these services are implemented as ioctls, - a file descriptor handle must first be obtained in order to establish - the communication channel between a user and the kernel. This file - descriptor is obtained by opening the device special file associated - with the scsi disk device (/dev/sdb) that was created during LUN - discovery. As per the location of the cxlflash driver within the - SCSI protocol stack, this open is actually not seen by the cxlflash - driver. Upon successful open, the user receives a file descriptor - (herein referred to as fd1) that should be used for issuing the - subsequent ioctls listed below. - - The structure definitions for these IOCTLs are available in: - uapi/scsi/cxlflash_ioctl.h - -DK_CXLFLASH_ATTACH ------------------- - - This ioctl obtains, initializes, and starts a context using the CXL - kernel services. These services specify a context id (u16) by which - to uniquely identify the context and its allocated resources. The - services additionally provide a second file descriptor (herein - referred to as fd2) that is used by the block library to initiate - memory mapped I/O (via mmap()) to the CXL flash device and poll for - completion events. This file descriptor is intentionally installed by - this driver and not the CXL kernel services to allow for intermediary - notification and access in the event of a non-user-initiated close(), - such as a killed process. This design point is described in further - detail in the description for the DK_CXLFLASH_DETACH ioctl. - - There are a few important aspects regarding the "tokens" (context id - and fd2) that are provided back to the user: - - - These tokens are only valid for the process under which they - were created. The child of a forked process cannot continue - to use the context id or file descriptor created by its parent - (see DK_CXLFLASH_VLUN_CLONE for further details). - - - These tokens are only valid for the lifetime of the context and - the process under which they were created. Once either is - destroyed, the tokens are to be considered stale and subsequent - usage will result in errors. - - - A valid adapter file descriptor (fd2 >= 0) is only returned on - the initial attach for a context. Subsequent attaches to an - existing context (DK_CXLFLASH_ATTACH_REUSE_CONTEXT flag present) - do not provide the adapter file descriptor as it was previously - made known to the application. - - - When a context is no longer needed, the user shall detach from - the context via the DK_CXLFLASH_DETACH ioctl. When this ioctl - returns with a valid adapter file descriptor and the return flag - DK_CXLFLASH_APP_CLOSE_ADAP_FD is present, the application _must_ - close the adapter file descriptor following a successful detach. - - - When this ioctl returns with a valid fd2 and the return flag - DK_CXLFLASH_APP_CLOSE_ADAP_FD is present, the application _must_ - close fd2 in the following circumstances: - - + Following a successful detach of the last user of the context - + Following a successful recovery on the context's original fd2 - + In the child process of a fork(), following a clone ioctl, - on the fd2 associated with the source context - - - At any time, a close on fd2 will invalidate the tokens. Applications - should exercise caution to only close fd2 when appropriate (outlined - in the previous bullet) to avoid premature loss of I/O. - -DK_CXLFLASH_USER_DIRECT ------------------------ - This ioctl is responsible for transitioning the LUN to direct - (physical) mode access and configuring the AFU for direct access from - user space on a per-context basis. Additionally, the block size and - last logical block address (LBA) are returned to the user. - - As mentioned previously, when operating in user space access mode, - LUNs may be accessed in whole or in part. Only one mode is allowed - at a time and if one mode is active (outstanding references exist), - requests to use the LUN in a different mode are denied. - - The AFU is configured for direct access from user space by adding an - entry to the AFU's resource handle table. The index of the entry is - treated as a resource handle that is returned to the user. The user - is then able to use the handle to reference the LUN during I/O. - -DK_CXLFLASH_USER_VIRTUAL ------------------------- - This ioctl is responsible for transitioning the LUN to virtual mode - of access and configuring the AFU for virtual access from user space - on a per-context basis. Additionally, the block size and last logical - block address (LBA) are returned to the user. - - As mentioned previously, when operating in user space access mode, - LUNs may be accessed in whole or in part. Only one mode is allowed - at a time and if one mode is active (outstanding references exist), - requests to use the LUN in a different mode are denied. - - The AFU is configured for virtual access from user space by adding - an entry to the AFU's resource handle table. The index of the entry - is treated as a resource handle that is returned to the user. The - user is then able to use the handle to reference the LUN during I/O. - - By default, the virtual LUN is created with a size of 0. The user - would need to use the DK_CXLFLASH_VLUN_RESIZE ioctl to adjust the grow - the virtual LUN to a desired size. To avoid having to perform this - resize for the initial creation of the virtual LUN, the user has the - option of specifying a size as part of the DK_CXLFLASH_USER_VIRTUAL - ioctl, such that when success is returned to the user, the - resource handle that is provided is already referencing provisioned - storage. This is reflected by the last LBA being a non-zero value. - - When a LUN is accessible from more than one port, this ioctl will - return with the DK_CXLFLASH_ALL_PORTS_ACTIVE return flag set. This - provides the user with a hint that I/O can be retried in the event - of an I/O error as the LUN can be reached over multiple paths. - -DK_CXLFLASH_VLUN_RESIZE ------------------------ - This ioctl is responsible for resizing a previously created virtual - LUN and will fail if invoked upon a LUN that is not in virtual - mode. Upon success, an updated last LBA is returned to the user - indicating the new size of the virtual LUN associated with the - resource handle. - - The partitioning of virtual LUNs is jointly mediated by the cxlflash - driver and the AFU. An allocation table is kept for each LUN that is - operating in the virtual mode and used to program a LUN translation - table that the AFU references when provided with a resource handle. - - This ioctl can return -EAGAIN if an AFU sync operation takes too long. - In addition to returning a failure to user, cxlflash will also schedule - an asynchronous AFU reset. Should the user choose to retry the operation, - it is expected to succeed. If this ioctl fails with -EAGAIN, the user - can either retry the operation or treat it as a failure. - -DK_CXLFLASH_RELEASE -------------------- - This ioctl is responsible for releasing a previously obtained - reference to either a physical or virtual LUN. This can be - thought of as the inverse of the DK_CXLFLASH_USER_DIRECT or - DK_CXLFLASH_USER_VIRTUAL ioctls. Upon success, the resource handle - is no longer valid and the entry in the resource handle table is - made available to be used again. - - As part of the release process for virtual LUNs, the virtual LUN - is first resized to 0 to clear out and free the translation tables - associated with the virtual LUN reference. - -DK_CXLFLASH_DETACH ------------------- - This ioctl is responsible for unregistering a context with the - cxlflash driver and release outstanding resources that were - not explicitly released via the DK_CXLFLASH_RELEASE ioctl. Upon - success, all "tokens" which had been provided to the user from the - DK_CXLFLASH_ATTACH onward are no longer valid. - - When the DK_CXLFLASH_APP_CLOSE_ADAP_FD flag was returned on a successful - attach, the application _must_ close the fd2 associated with the context - following the detach of the final user of the context. - -DK_CXLFLASH_VLUN_CLONE ----------------------- - This ioctl is responsible for cloning a previously created - context to a more recently created context. It exists solely to - support maintaining user space access to storage after a process - forks. Upon success, the child process (which invoked the ioctl) - will have access to the same LUNs via the same resource handle(s) - as the parent, but under a different context. - - Context sharing across processes is not supported with CXL and - therefore each fork must be met with establishing a new context - for the child process. This ioctl simplifies the state management - and playback required by a user in such a scenario. When a process - forks, child process can clone the parents context by first creating - a context (via DK_CXLFLASH_ATTACH) and then using this ioctl to - perform the clone from the parent to the child. - - The clone itself is fairly simple. The resource handle and lun - translation tables are copied from the parent context to the child's - and then synced with the AFU. - - When the DK_CXLFLASH_APP_CLOSE_ADAP_FD flag was returned on a successful - attach, the application _must_ close the fd2 associated with the source - context (still resident/accessible in the parent process) following the - clone. This is to avoid a stale entry in the file descriptor table of the - child process. - - This ioctl can return -EAGAIN if an AFU sync operation takes too long. - In addition to returning a failure to user, cxlflash will also schedule - an asynchronous AFU reset. Should the user choose to retry the operation, - it is expected to succeed. If this ioctl fails with -EAGAIN, the user - can either retry the operation or treat it as a failure. - -DK_CXLFLASH_VERIFY ------------------- - This ioctl is used to detect various changes such as the capacity of - the disk changing, the number of LUNs visible changing, etc. In cases - where the changes affect the application (such as a LUN resize), the - cxlflash driver will report the changed state to the application. - - The user calls in when they want to validate that a LUN hasn't been - changed in response to a check condition. As the user is operating out - of band from the kernel, they will see these types of events without - the kernel's knowledge. When encountered, the user's architected - behavior is to call in to this ioctl, indicating what they want to - verify and passing along any appropriate information. For now, only - verifying a LUN change (ie: size different) with sense data is - supported. - -DK_CXLFLASH_RECOVER_AFU ------------------------ - This ioctl is used to drive recovery (if such an action is warranted) - of a specified user context. Any state associated with the user context - is re-established upon successful recovery. - - User contexts are put into an error condition when the device needs to - be reset or is terminating. Users are notified of this error condition - by seeing all 0xF's on an MMIO read. Upon encountering this, the - architected behavior for a user is to call into this ioctl to recover - their context. A user may also call into this ioctl at any time to - check if the device is operating normally. If a failure is returned - from this ioctl, the user is expected to gracefully clean up their - context via release/detach ioctls. Until they do, the context they - hold is not relinquished. The user may also optionally exit the process - at which time the context/resources they held will be freed as part of - the release fop. - - When the DK_CXLFLASH_APP_CLOSE_ADAP_FD flag was returned on a successful - attach, the application _must_ unmap and close the fd2 associated with the - original context following this ioctl returning success and indicating that - the context was recovered (DK_CXLFLASH_RECOVER_AFU_CONTEXT_RESET). - -DK_CXLFLASH_MANAGE_LUN ----------------------- - This ioctl is used to switch a LUN from a mode where it is available - for file-system access (legacy), to a mode where it is set aside for - exclusive user space access (superpipe). In case a LUN is visible - across multiple ports and adapters, this ioctl is used to uniquely - identify each LUN by its World Wide Node Name (WWNN). - - -CXL Flash Driver Host IOCTLs -============================ - - Each host adapter instance that is supported by the cxlflash driver - has a special character device associated with it to enable a set of - host management function. These character devices are hosted in a - class dedicated for cxlflash and can be accessed via `/dev/cxlflash/*`. - - Applications can be written to perform various functions using the - host ioctl APIs below. - - The structure definitions for these IOCTLs are available in: - uapi/scsi/cxlflash_ioctl.h - -HT_CXLFLASH_LUN_PROVISION -------------------------- - This ioctl is used to create and delete persistent LUNs on cxlflash - devices that lack an external LUN management interface. It is only - valid when used with AFUs that support the LUN provision capability. - - When sufficient space is available, LUNs can be created by specifying - the target port to host the LUN and a desired size in 4K blocks. Upon - success, the LUN ID and WWID of the created LUN will be returned and - the SCSI bus can be scanned to detect the change in LUN topology. Note - that partial allocations are not supported. Should a creation fail due - to a space issue, the target port can be queried for its current LUN - geometry. - - To remove a LUN, the device must first be disassociated from the Linux - SCSI subsystem. The LUN deletion can then be initiated by specifying a - target port and LUN ID. Upon success, the LUN geometry associated with - the port will be updated to reflect new number of provisioned LUNs and - available capacity. - - To query the LUN geometry of a port, the target port is specified and - upon success, the following information is presented: - - - Maximum number of provisioned LUNs allowed for the port - - Current number of provisioned LUNs for the port - - Maximum total capacity of provisioned LUNs for the port (4K blocks) - - Current total capacity of provisioned LUNs for the port (4K blocks) - - With this information, the number of available LUNs and capacity can be - can be calculated. - -HT_CXLFLASH_AFU_DEBUG ---------------------- - This ioctl is used to debug AFUs by supporting a command pass-through - interface. It is only valid when used with AFUs that support the AFU - debug capability. - - With exception of buffer management, AFU debug commands are opaque to - cxlflash and treated as pass-through. For debug commands that do require - data transfer, the user supplies an adequately sized data buffer and must - specify the data transfer direction with respect to the host. There is a - maximum transfer size of 256K imposed. Note that partial read completions - are not supported - when errors are experienced with a host read data - transfer, the data buffer is not copied back to the user. diff --git a/Documentation/powerpc/dawr-power9.rst b/Documentation/powerpc/dawr-power9.rst deleted file mode 100644 index 310f2e0cea..0000000000 --- a/Documentation/powerpc/dawr-power9.rst +++ /dev/null @@ -1,101 +0,0 @@ -===================== -DAWR issues on POWER9 -===================== - -On older POWER9 processors, the Data Address Watchpoint Register (DAWR) can -cause a checkstop if it points to cache inhibited (CI) memory. Currently Linux -has no way to distinguish CI memory when configuring the DAWR, so on affected -systems, the DAWR is disabled. - -Affected processor revisions -============================ - -This issue is only present on processors prior to v2.3. The revision can be -found in /proc/cpuinfo:: - - processor : 0 - cpu : POWER9, altivec supported - clock : 3800.000000MHz - revision : 2.3 (pvr 004e 1203) - -On a system with the issue, the DAWR is disabled as detailed below. - -Technical Details: -================== - -DAWR has 6 different ways of being set. -1) ptrace -2) h_set_mode(DAWR) -3) h_set_dabr() -4) kvmppc_set_one_reg() -5) xmon - -For ptrace, we now advertise zero breakpoints on POWER9 via the -PPC_PTRACE_GETHWDBGINFO call. This results in GDB falling back to -software emulation of the watchpoint (which is slow). - -h_set_mode(DAWR) and h_set_dabr() will now return an error to the -guest on a POWER9 host. Current Linux guests ignore this error, so -they will silently not get the DAWR. - -kvmppc_set_one_reg() will store the value in the vcpu but won't -actually set it on POWER9 hardware. This is done so we don't break -migration from POWER8 to POWER9, at the cost of silently losing the -DAWR on the migration. - -For xmon, the 'bd' command will return an error on P9. - -Consequences for users -====================== - -For GDB watchpoints (ie 'watch' command) on POWER9 bare metal , GDB -will accept the command. Unfortunately since there is no hardware -support for the watchpoint, GDB will software emulate the watchpoint -making it run very slowly. - -The same will also be true for any guests started on a POWER9 -host. The watchpoint will fail and GDB will fall back to software -emulation. - -If a guest is started on a POWER8 host, GDB will accept the watchpoint -and configure the hardware to use the DAWR. This will run at full -speed since it can use the hardware emulation. Unfortunately if this -guest is migrated to a POWER9 host, the watchpoint will be lost on the -POWER9. Loads and stores to the watchpoint locations will not be -trapped in GDB. The watchpoint is remembered, so if the guest is -migrated back to the POWER8 host, it will start working again. - -Force enabling the DAWR -======================= -Kernels (since ~v5.2) have an option to force enable the DAWR via:: - - echo Y > /sys/kernel/debug/powerpc/dawr_enable_dangerous - -This enables the DAWR even on POWER9. - -This is a dangerous setting, USE AT YOUR OWN RISK. - -Some users may not care about a bad user crashing their box -(ie. single user/desktop systems) and really want the DAWR. This -allows them to force enable DAWR. - -This flag can also be used to disable DAWR access. Once this is -cleared, all DAWR access should be cleared immediately and your -machine once again safe from crashing. - -Userspace may get confused by toggling this. If DAWR is force -enabled/disabled between getting the number of breakpoints (via -PTRACE_GETHWDBGINFO) and setting the breakpoint, userspace will get an -inconsistent view of what's available. Similarly for guests. - -For the DAWR to be enabled in a KVM guest, the DAWR needs to be force -enabled in the host AND the guest. For this reason, this won't work on -POWERVM as it doesn't allow the HCALL to work. Writes of 'Y' to the -dawr_enable_dangerous file will fail if the hypervisor doesn't support -writing the DAWR. - -To double check the DAWR is working, run this kernel selftest: - - tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c - -Any errors/failures/skips mean something is wrong. diff --git a/Documentation/powerpc/dexcr.rst b/Documentation/powerpc/dexcr.rst deleted file mode 100644 index 615a631f51..0000000000 --- a/Documentation/powerpc/dexcr.rst +++ /dev/null @@ -1,58 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0-or-later - -========================================== -DEXCR (Dynamic Execution Control Register) -========================================== - -Overview -======== - -The DEXCR is a privileged special purpose register (SPR) introduced in -PowerPC ISA 3.1B (Power10) that allows per-cpu control over several dynamic -execution behaviours. These behaviours include speculation (e.g., indirect -branch target prediction) and enabling return-oriented programming (ROP) -protection instructions. - -The execution control is exposed in hardware as up to 32 bits ('aspects') in -the DEXCR. Each aspect controls a certain behaviour, and can be set or cleared -to enable/disable the aspect. There are several variants of the DEXCR for -different purposes: - -DEXCR - A privileged SPR that can control aspects for userspace and kernel space -HDEXCR - A hypervisor-privileged SPR that can control aspects for the hypervisor and - enforce aspects for the kernel and userspace. -UDEXCR - An optional ultravisor-privileged SPR that can control aspects for the ultravisor. - -Userspace can examine the current DEXCR state using a dedicated SPR that -provides a non-privileged read-only view of the userspace DEXCR aspects. -There is also an SPR that provides a read-only view of the hypervisor enforced -aspects, which ORed with the userspace DEXCR view gives the effective DEXCR -state for a process. - - -Configuration -============= - -The DEXCR is currently unconfigurable. All threads are run with the -NPHIE aspect enabled. - - -coredump and ptrace -=================== - -The userspace values of the DEXCR and HDEXCR (in this order) are exposed under -``NT_PPC_DEXCR``. These are each 64 bits and readonly, and are intended to -assist with core dumps. The DEXCR may be made writable in future. The top 32 -bits of both registers (corresponding to the non-userspace bits) are masked off. - -If the kernel config ``CONFIG_CHECKPOINT_RESTORE`` is enabled, then -``NT_PPC_HASHKEYR`` is available and exposes the HASHKEYR value of the process -for reading and writing. This is a tradeoff between increased security and -checkpoint/restore support: a process should normally have no need to know its -secret key, but restoring a process requires setting its original key. The key -therefore appears in core dumps, and an attacker may be able to retrieve it from -a coredump and effectively bypass ROP protection on any threads that share this -key (potentially all threads from the same parent that have not run ``exec()``). diff --git a/Documentation/powerpc/dscr.rst b/Documentation/powerpc/dscr.rst deleted file mode 100644 index f735ec5375..0000000000 --- a/Documentation/powerpc/dscr.rst +++ /dev/null @@ -1,87 +0,0 @@ -=================================== -DSCR (Data Stream Control Register) -=================================== - -DSCR register in powerpc allows user to have some control of prefetch of data -stream in the processor. Please refer to the ISA documents or related manual -for more detailed information regarding how to use this DSCR to attain this -control of the prefetches . This document here provides an overview of kernel -support for DSCR, related kernel objects, its functionalities and exported -user interface. - -(A) Data Structures: - - (1) thread_struct:: - - dscr /* Thread DSCR value */ - dscr_inherit /* Thread has changed default DSCR */ - - (2) PACA:: - - dscr_default /* per-CPU DSCR default value */ - - (3) sysfs.c:: - - dscr_default /* System DSCR default value */ - -(B) Scheduler Changes: - - Scheduler will write the per-CPU DSCR default which is stored in the - CPU's PACA value into the register if the thread has dscr_inherit value - cleared which means that it has not changed the default DSCR till now. - If the dscr_inherit value is set which means that it has changed the - default DSCR value, scheduler will write the changed value which will - now be contained in thread struct's dscr into the register instead of - the per-CPU default PACA based DSCR value. - - NOTE: Please note here that the system wide global DSCR value never - gets used directly in the scheduler process context switch at all. - -(C) SYSFS Interface: - - - Global DSCR default: /sys/devices/system/cpu/dscr_default - - CPU specific DSCR default: /sys/devices/system/cpu/cpuN/dscr - - Changing the global DSCR default in the sysfs will change all the CPU - specific DSCR defaults immediately in their PACA structures. Again if - the current process has the dscr_inherit clear, it also writes the new - value into every CPU's DSCR register right away and updates the current - thread's DSCR value as well. - - Changing the CPU specific DSCR default value in the sysfs does exactly - the same thing as above but unlike the global one above, it just changes - stuff for that particular CPU instead for all the CPUs on the system. - -(D) User Space Instructions: - - The DSCR register can be accessed in the user space using any of these - two SPR numbers available for that purpose. - - (1) Problem state SPR: 0x03 (Un-privileged, POWER8 only) - (2) Privileged state SPR: 0x11 (Privileged) - - Accessing DSCR through privileged SPR number (0x11) from user space - works, as it is emulated following an illegal instruction exception - inside the kernel. Both mfspr and mtspr instructions are emulated. - - Accessing DSCR through user level SPR (0x03) from user space will first - create a facility unavailable exception. Inside this exception handler - all mfspr instruction based read attempts will get emulated and returned - where as the first mtspr instruction based write attempts will enable - the DSCR facility for the next time around (both for read and write) by - setting DSCR facility in the FSCR register. - -(E) Specifics about 'dscr_inherit': - - The thread struct element 'dscr_inherit' represents whether the thread - in question has attempted and changed the DSCR itself using any of the - following methods. This element signifies whether the thread wants to - use the CPU default DSCR value or its own changed DSCR value in the - kernel. - - (1) mtspr instruction (SPR number 0x03) - (2) mtspr instruction (SPR number 0x11) - (3) ptrace interface (Explicitly set user DSCR value) - - Any child of the process created after this event in the process inherits - this same behaviour as well. diff --git a/Documentation/powerpc/eeh-pci-error-recovery.rst b/Documentation/powerpc/eeh-pci-error-recovery.rst deleted file mode 100644 index d6643a91bd..0000000000 --- a/Documentation/powerpc/eeh-pci-error-recovery.rst +++ /dev/null @@ -1,336 +0,0 @@ -========================== -PCI Bus EEH Error Recovery -========================== - -Linas Vepstas - -12 January 2005 - - -Overview: ---------- -The IBM POWER-based pSeries and iSeries computers include PCI bus -controller chips that have extended capabilities for detecting and -reporting a large variety of PCI bus error conditions. These features -go under the name of "EEH", for "Enhanced Error Handling". The EEH -hardware features allow PCI bus errors to be cleared and a PCI -card to be "rebooted", without also having to reboot the operating -system. - -This is in contrast to traditional PCI error handling, where the -PCI chip is wired directly to the CPU, and an error would cause -a CPU machine-check/check-stop condition, halting the CPU entirely. -Another "traditional" technique is to ignore such errors, which -can lead to data corruption, both of user data or of kernel data, -hung/unresponsive adapters, or system crashes/lockups. Thus, -the idea behind EEH is that the operating system can become more -reliable and robust by protecting it from PCI errors, and giving -the OS the ability to "reboot"/recover individual PCI devices. - -Future systems from other vendors, based on the PCI-E specification, -may contain similar features. - - -Causes of EEH Errors --------------------- -EEH was originally designed to guard against hardware failure, such -as PCI cards dying from heat, humidity, dust, vibration and bad -electrical connections. The vast majority of EEH errors seen in -"real life" are due to either poorly seated PCI cards, or, -unfortunately quite commonly, due to device driver bugs, device firmware -bugs, and sometimes PCI card hardware bugs. - -The most common software bug, is one that causes the device to -attempt to DMA to a location in system memory that has not been -reserved for DMA access for that card. This is a powerful feature, -as it prevents what; otherwise, would have been silent memory -corruption caused by the bad DMA. A number of device driver -bugs have been found and fixed in this way over the past few -years. Other possible causes of EEH errors include data or -address line parity errors (for example, due to poor electrical -connectivity due to a poorly seated card), and PCI-X split-completion -errors (due to software, device firmware, or device PCI hardware bugs). -The vast majority of "true hardware failures" can be cured by -physically removing and re-seating the PCI card. - - -Detection and Recovery ----------------------- -In the following discussion, a generic overview of how to detect -and recover from EEH errors will be presented. This is followed -by an overview of how the current implementation in the Linux -kernel does it. The actual implementation is subject to change, -and some of the finer points are still being debated. These -may in turn be swayed if or when other architectures implement -similar functionality. - -When a PCI Host Bridge (PHB, the bus controller connecting the -PCI bus to the system CPU electronics complex) detects a PCI error -condition, it will "isolate" the affected PCI card. Isolation -will block all writes (either to the card from the system, or -from the card to the system), and it will cause all reads to -return all-ff's (0xff, 0xffff, 0xffffffff for 8/16/32-bit reads). -This value was chosen because it is the same value you would -get if the device was physically unplugged from the slot. -This includes access to PCI memory, I/O space, and PCI config -space. Interrupts; however, will continue to be delivered. - -Detection and recovery are performed with the aid of ppc64 -firmware. The programming interfaces in the Linux kernel -into the firmware are referred to as RTAS (Run-Time Abstraction -Services). The Linux kernel does not (should not) access -the EEH function in the PCI chipsets directly, primarily because -there are a number of different chipsets out there, each with -different interfaces and quirks. The firmware provides a -uniform abstraction layer that will work with all pSeries -and iSeries hardware (and be forwards-compatible). - -If the OS or device driver suspects that a PCI slot has been -EEH-isolated, there is a firmware call it can make to determine if -this is the case. If so, then the device driver should put itself -into a consistent state (given that it won't be able to complete any -pending work) and start recovery of the card. Recovery normally -would consist of resetting the PCI device (holding the PCI #RST -line high for two seconds), followed by setting up the device -config space (the base address registers (BAR's), latency timer, -cache line size, interrupt line, and so on). This is followed by a -reinitialization of the device driver. In a worst-case scenario, -the power to the card can be toggled, at least on hot-plug-capable -slots. In principle, layers far above the device driver probably -do not need to know that the PCI card has been "rebooted" in this -way; ideally, there should be at most a pause in Ethernet/disk/USB -I/O while the card is being reset. - -If the card cannot be recovered after three or four resets, the -kernel/device driver should assume the worst-case scenario, that the -card has died completely, and report this error to the sysadmin. -In addition, error messages are reported through RTAS and also through -syslogd (/var/log/messages) to alert the sysadmin of PCI resets. -The correct way to deal with failed adapters is to use the standard -PCI hotplug tools to remove and replace the dead card. - - -Current PPC64 Linux EEH Implementation --------------------------------------- -At this time, a generic EEH recovery mechanism has been implemented, -so that individual device drivers do not need to be modified to support -EEH recovery. This generic mechanism piggy-backs on the PCI hotplug -infrastructure, and percolates events up through the userspace/udev -infrastructure. Following is a detailed description of how this is -accomplished. - -EEH must be enabled in the PHB's very early during the boot process, -and if a PCI slot is hot-plugged. The former is performed by -eeh_init() in arch/powerpc/platforms/pseries/eeh.c, and the later by -drivers/pci/hotplug/pSeries_pci.c calling in to the eeh.c code. -EEH must be enabled before a PCI scan of the device can proceed. -Current Power5 hardware will not work unless EEH is enabled; -although older Power4 can run with it disabled. Effectively, -EEH can no longer be turned off. PCI devices *must* be -registered with the EEH code; the EEH code needs to know about -the I/O address ranges of the PCI device in order to detect an -error. Given an arbitrary address, the routine -pci_get_device_by_addr() will find the pci device associated -with that address (if any). - -The default arch/powerpc/include/asm/io.h macros readb(), inb(), insb(), -etc. include a check to see if the i/o read returned all-0xff's. -If so, these make a call to eeh_dn_check_failure(), which in turn -asks the firmware if the all-ff's value is the sign of a true EEH -error. If it is not, processing continues as normal. The grand -total number of these false alarms or "false positives" can be -seen in /proc/ppc64/eeh (subject to change). Normally, almost -all of these occur during boot, when the PCI bus is scanned, where -a large number of 0xff reads are part of the bus scan procedure. - -If a frozen slot is detected, code in -arch/powerpc/platforms/pseries/eeh.c will print a stack trace to -syslog (/var/log/messages). This stack trace has proven to be very -useful to device-driver authors for finding out at what point the EEH -error was detected, as the error itself usually occurs slightly -beforehand. - -Next, it uses the Linux kernel notifier chain/work queue mechanism to -allow any interested parties to find out about the failure. Device -drivers, or other parts of the kernel, can use -`eeh_register_notifier(struct notifier_block *)` to find out about EEH -events. The event will include a pointer to the pci device, the -device node and some state info. Receivers of the event can "do as -they wish"; the default handler will be described further in this -section. - -To assist in the recovery of the device, eeh.c exports the -following functions: - -rtas_set_slot_reset() - assert the PCI #RST line for 1/8th of a second -rtas_configure_bridge() - ask firmware to configure any PCI bridges - located topologically under the pci slot. -eeh_save_bars() and eeh_restore_bars(): - save and restore the PCI - config-space info for a device and any devices under it. - - -A handler for the EEH notifier_block events is implemented in -drivers/pci/hotplug/pSeries_pci.c, called handle_eeh_events(). -It saves the device BAR's and then calls rpaphp_unconfig_pci_adapter(). -This last call causes the device driver for the card to be stopped, -which causes uevents to go out to user space. This triggers -user-space scripts that might issue commands such as "ifdown eth0" -for ethernet cards, and so on. This handler then sleeps for 5 seconds, -hoping to give the user-space scripts enough time to complete. -It then resets the PCI card, reconfigures the device BAR's, and -any bridges underneath. It then calls rpaphp_enable_pci_slot(), -which restarts the device driver and triggers more user-space -events (for example, calling "ifup eth0" for ethernet cards). - - -Device Shutdown and User-Space Events -------------------------------------- -This section documents what happens when a pci slot is unconfigured, -focusing on how the device driver gets shut down, and on how the -events get delivered to user-space scripts. - -Following is an example sequence of events that cause a device driver -close function to be called during the first phase of an EEH reset. -The following sequence is an example of the pcnet32 device driver:: - - rpa_php_unconfig_pci_adapter (struct slot *) // in rpaphp_pci.c - { - calls - pci_remove_bus_device (struct pci_dev *) // in /drivers/pci/remove.c - { - calls - pci_destroy_dev (struct pci_dev *) - { - calls - device_unregister (&dev->dev) // in /drivers/base/core.c - { - calls - device_del (struct device *) - { - calls - bus_remove_device() // in /drivers/base/bus.c - { - calls - device_release_driver() - { - calls - struct device_driver->remove() which is just - pci_device_remove() // in /drivers/pci/pci_driver.c - { - calls - struct pci_driver->remove() which is just - pcnet32_remove_one() // in /drivers/net/pcnet32.c - { - calls - unregister_netdev() // in /net/core/dev.c - { - calls - dev_close() // in /net/core/dev.c - { - calls dev->stop(); - which is just pcnet32_close() // in pcnet32.c - { - which does what you wanted - to stop the device - } - } - } - which - frees pcnet32 device driver memory - } - }}}}}} - - -in drivers/pci/pci_driver.c, -struct device_driver->remove() is just pci_device_remove() -which calls struct pci_driver->remove() which is pcnet32_remove_one() -which calls unregister_netdev() (in net/core/dev.c) -which calls dev_close() (in net/core/dev.c) -which calls dev->stop() which is pcnet32_close() -which then does the appropriate shutdown. - ---- - -Following is the analogous stack trace for events sent to user-space -when the pci device is unconfigured:: - - rpa_php_unconfig_pci_adapter() { // in rpaphp_pci.c - calls - pci_remove_bus_device (struct pci_dev *) { // in /drivers/pci/remove.c - calls - pci_destroy_dev (struct pci_dev *) { - calls - device_unregister (&dev->dev) { // in /drivers/base/core.c - calls - device_del(struct device * dev) { // in /drivers/base/core.c - calls - kobject_del() { //in /libs/kobject.c - calls - kobject_uevent() { // in /libs/kobject.c - calls - kset_uevent() { // in /lib/kobject.c - calls - kset->uevent_ops->uevent() // which is really just - a call to - dev_uevent() { // in /drivers/base/core.c - calls - dev->bus->uevent() which is really just a call to - pci_uevent () { // in drivers/pci/hotplug.c - which prints device name, etc.... - } - } - then kobject_uevent() sends a netlink uevent to userspace - --> userspace uevent - (during early boot, nobody listens to netlink events and - kobject_uevent() executes uevent_helper[], which runs the - event process /sbin/hotplug) - } - } - kobject_del() then calls sysfs_remove_dir(), which would - trigger any user-space daemon that was watching /sysfs, - and notice the delete event. - - -Pro's and Con's of the Current Design -------------------------------------- -There are several issues with the current EEH software recovery design, -which may be addressed in future revisions. But first, note that the -big plus of the current design is that no changes need to be made to -individual device drivers, so that the current design throws a wide net. -The biggest negative of the design is that it potentially disturbs -network daemons and file systems that didn't need to be disturbed. - -- A minor complaint is that resetting the network card causes - user-space back-to-back ifdown/ifup burps that potentially disturb - network daemons, that didn't need to even know that the pci - card was being rebooted. - -- A more serious concern is that the same reset, for SCSI devices, - causes havoc to mounted file systems. Scripts cannot post-facto - unmount a file system without flushing pending buffers, but this - is impossible, because I/O has already been stopped. Thus, - ideally, the reset should happen at or below the block layer, - so that the file systems are not disturbed. - - Reiserfs does not tolerate errors returned from the block device. - Ext3fs seems to be tolerant, retrying reads/writes until it does - succeed. Both have been only lightly tested in this scenario. - - The SCSI-generic subsystem already has built-in code for performing - SCSI device resets, SCSI bus resets, and SCSI host-bus-adapter - (HBA) resets. These are cascaded into a chain of attempted - resets if a SCSI command fails. These are completely hidden - from the block layer. It would be very natural to add an EEH - reset into this chain of events. - -- If a SCSI error occurs for the root device, all is lost unless - the sysadmin had the foresight to run /bin, /sbin, /etc, /var - and so on, out of ramdisk/tmpfs. - - -Conclusions ------------ -There's forward progress ... diff --git a/Documentation/powerpc/elf_hwcaps.rst b/Documentation/powerpc/elf_hwcaps.rst deleted file mode 100644 index 3366e5b18e..0000000000 --- a/Documentation/powerpc/elf_hwcaps.rst +++ /dev/null @@ -1,231 +0,0 @@ -.. _elf_hwcaps_powerpc: - -================== -POWERPC ELF HWCAPs -================== - -This document describes the usage and semantics of the powerpc ELF HWCAPs. - - -1. Introduction ---------------- - -Some hardware or software features are only available on some CPU -implementations, and/or with certain kernel configurations, but have no other -discovery mechanism available to userspace code. The kernel exposes the -presence of these features to userspace through a set of flags called HWCAPs, -exposed in the auxiliary vector. - -Userspace software can test for features by acquiring the AT_HWCAP or -AT_HWCAP2 entry of the auxiliary vector, and testing whether the relevant -flags are set, e.g.:: - - bool floating_point_is_present(void) - { - unsigned long HWCAPs = getauxval(AT_HWCAP); - if (HWCAPs & PPC_FEATURE_HAS_FPU) - return true; - - return false; - } - -Where software relies on a feature described by a HWCAP, it should check the -relevant HWCAP flag to verify that the feature is present before attempting to -make use of the feature. - -HWCAP is the preferred method to test for the presence of a feature rather -than probing through other means, which may not be reliable or may cause -unpredictable behaviour. - -Software that targets a particular platform does not necessarily have to -test for required or implied features. For example if the program requires -FPU, VMX, VSX, it is not necessary to test those HWCAPs, and it may be -impossible to do so if the compiler generates code requiring those features. - -2. Facilities -------------- - -The Power ISA uses the term "facility" to describe a class of instructions, -registers, interrupts, etc. The presence or absence of a facility indicates -whether this class is available to be used, but the specifics depend on the -ISA version. For example, if the VSX facility is available, the VSX -instructions that can be used differ between the v3.0B and v3.1B ISA -versions. - -3. Categories -------------- - -The Power ISA before v3.0 uses the term "category" to describe certain -classes of instructions and operating modes which may be optional or -mutually exclusive, the exact meaning of the HWCAP flag may depend on -context, e.g., the presence of the BOOKE feature implies that the server -category is not implemented. - -4. HWCAP allocation -------------------- - -HWCAPs are allocated as described in Power Architecture 64-Bit ELF V2 ABI -Specification (which will be reflected in the kernel's uapi headers). - -5. The HWCAPs exposed in AT_HWCAP ---------------------------------- - -PPC_FEATURE_32 - 32-bit CPU - -PPC_FEATURE_64 - 64-bit CPU (userspace may be running in 32-bit mode). - -PPC_FEATURE_601_INSTR - The processor is PowerPC 601. - Unused in the kernel since f0ed73f3fa2c ("powerpc: Remove PowerPC 601") - -PPC_FEATURE_HAS_ALTIVEC - Vector (aka Altivec, VMX) facility is available. - -PPC_FEATURE_HAS_FPU - Floating point facility is available. - -PPC_FEATURE_HAS_MMU - Memory management unit is present and enabled. - -PPC_FEATURE_HAS_4xxMAC - The processor is 40x or 44x family. - -PPC_FEATURE_UNIFIED_CACHE - The processor has a unified L1 cache for instructions and data, as - found in NXP e200. - Unused in the kernel since 39c8bf2b3cc1 ("powerpc: Retire e200 core (mpc555x processor)") - -PPC_FEATURE_HAS_SPE - Signal Processing Engine facility is available. - -PPC_FEATURE_HAS_EFP_SINGLE - Embedded Floating Point single precision operations are available. - -PPC_FEATURE_HAS_EFP_DOUBLE - Embedded Floating Point double precision operations are available. - -PPC_FEATURE_NO_TB - The timebase facility (mftb instruction) is not available. - This is a 601 specific HWCAP, so if it is known that the processor - running is not a 601, via other HWCAPs or other means, it is not - required to test this bit before using the timebase. - Unused in the kernel since f0ed73f3fa2c ("powerpc: Remove PowerPC 601") - -PPC_FEATURE_POWER4 - The processor is POWER4 or PPC970/FX/MP. - POWER4 support dropped from the kernel since 471d7ff8b51b ("powerpc/64s: Remove POWER4 support") - -PPC_FEATURE_POWER5 - The processor is POWER5. - -PPC_FEATURE_POWER5_PLUS - The processor is POWER5+. - -PPC_FEATURE_CELL - The processor is Cell. - -PPC_FEATURE_BOOKE - The processor implements the embedded category ("BookE") architecture. - -PPC_FEATURE_SMT - The processor implements SMT. - -PPC_FEATURE_ICACHE_SNOOP - The processor icache is coherent with the dcache, and instruction storage - can be made consistent with data storage for the purpose of executing - instructions with the sequence (as described in, e.g., POWER9 Processor - User's Manual, 4.6.2.2 Instruction Cache Block Invalidate (icbi)):: - - sync - icbi (to any address) - isync - -PPC_FEATURE_ARCH_2_05 - The processor supports the v2.05 userlevel architecture. Processors - supporting later architectures DO NOT set this feature. - -PPC_FEATURE_PA6T - The processor is PA6T. - -PPC_FEATURE_HAS_DFP - DFP facility is available. - -PPC_FEATURE_POWER6_EXT - The processor is POWER6. - -PPC_FEATURE_ARCH_2_06 - The processor supports the v2.06 userlevel architecture. Processors - supporting later architectures also set this feature. - -PPC_FEATURE_HAS_VSX - VSX facility is available. - -PPC_FEATURE_PSERIES_PERFMON_COMPAT - The processor supports architected PMU events in the range 0xE0-0xFF. - -PPC_FEATURE_TRUE_LE - The processor supports true little-endian mode. - -PPC_FEATURE_PPC_LE - The processor supports "PowerPC Little-Endian", that uses address - munging to make storage access appear to be little-endian, but the - data is stored in a different format that is unsuitable to be - accessed by other agents not running in this mode. - -6. The HWCAPs exposed in AT_HWCAP2 ----------------------------------- - -PPC_FEATURE2_ARCH_2_07 - The processor supports the v2.07 userlevel architecture. Processors - supporting later architectures also set this feature. - -PPC_FEATURE2_HTM - Transactional Memory feature is available. - -PPC_FEATURE2_DSCR - DSCR facility is available. - -PPC_FEATURE2_EBB - EBB facility is available. - -PPC_FEATURE2_ISEL - isel instruction is available. This is superseded by ARCH_2_07 and - later. - -PPC_FEATURE2_TAR - TAR facility is available. - -PPC_FEATURE2_VEC_CRYPTO - v2.07 crypto instructions are available. - -PPC_FEATURE2_HTM_NOSC - System calls fail if called in a transactional state, see - Documentation/powerpc/syscall64-abi.rst - -PPC_FEATURE2_ARCH_3_00 - The processor supports the v3.0B / v3.0C userlevel architecture. Processors - supporting later architectures also set this feature. - -PPC_FEATURE2_HAS_IEEE128 - IEEE 128-bit binary floating point is supported with VSX - quad-precision instructions and data types. - -PPC_FEATURE2_DARN - darn instruction is available. - -PPC_FEATURE2_SCV - The scv 0 instruction may be used for system calls, see - Documentation/powerpc/syscall64-abi.rst. - -PPC_FEATURE2_HTM_NO_SUSPEND - A limited Transactional Memory facility that does not support suspend is - available, see Documentation/powerpc/transactional_memory.rst. - -PPC_FEATURE2_ARCH_3_1 - The processor supports the v3.1 userlevel architecture. Processors - supporting later architectures also set this feature. - -PPC_FEATURE2_MMA - MMA facility is available. diff --git a/Documentation/powerpc/elfnote.rst b/Documentation/powerpc/elfnote.rst deleted file mode 100644 index 3ec8d61e9a..0000000000 --- a/Documentation/powerpc/elfnote.rst +++ /dev/null @@ -1,41 +0,0 @@ -========================== -ELF Note PowerPC Namespace -========================== - -The PowerPC namespace in an ELF Note of the kernel binary is used to store -capabilities and information which can be used by a bootloader or userland. - -Types and Descriptors ---------------------- - -The types to be used with the "PowerPC" namespace are defined in [#f1]_. - - 1) PPC_ELFNOTE_CAPABILITIES - -Define the capabilities supported/required by the kernel. This type uses a -bitmap as "descriptor" field. Each bit is described below: - -- Ultravisor-capable bit (PowerNV only). - -.. code-block:: c - - #define PPCCAP_ULTRAVISOR_BIT (1 << 0) - -Indicate that the powerpc kernel binary knows how to run in an -ultravisor-enabled system. - -In an ultravisor-enabled system, some machine resources are now controlled -by the ultravisor. If the kernel is not ultravisor-capable, but it ends up -being run on a machine with ultravisor, the kernel will probably crash -trying to access ultravisor resources. For instance, it may crash in early -boot trying to set the partition table entry 0. - -In an ultravisor-enabled system, a bootloader could warn the user or prevent -the kernel from being run if the PowerPC ultravisor capability doesn't exist -or the Ultravisor-capable bit is not set. - -References ----------- - -.. [#f1] arch/powerpc/include/asm/elfnote.h - diff --git a/Documentation/powerpc/features.rst b/Documentation/powerpc/features.rst deleted file mode 100644 index ee4b95e042..0000000000 --- a/Documentation/powerpc/features.rst +++ /dev/null @@ -1,3 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -.. kernel-feat:: features powerpc diff --git a/Documentation/powerpc/firmware-assisted-dump.rst b/Documentation/powerpc/firmware-assisted-dump.rst deleted file mode 100644 index e363fc4852..0000000000 --- a/Documentation/powerpc/firmware-assisted-dump.rst +++ /dev/null @@ -1,381 +0,0 @@ -====================== -Firmware-Assisted Dump -====================== - -July 2011 - -The goal of firmware-assisted dump is to enable the dump of -a crashed system, and to do so from a fully-reset system, and -to minimize the total elapsed time until the system is back -in production use. - -- Firmware-Assisted Dump (FADump) infrastructure is intended to replace - the existing phyp assisted dump. -- Fadump uses the same firmware interfaces and memory reservation model - as phyp assisted dump. -- Unlike phyp dump, FADump exports the memory dump through /proc/vmcore - in the ELF format in the same way as kdump. This helps us reuse the - kdump infrastructure for dump capture and filtering. -- Unlike phyp dump, userspace tool does not need to refer any sysfs - interface while reading /proc/vmcore. -- Unlike phyp dump, FADump allows user to release all the memory reserved - for dump, with a single operation of echo 1 > /sys/kernel/fadump_release_mem. -- Once enabled through kernel boot parameter, FADump can be - started/stopped through /sys/kernel/fadump_registered interface (see - sysfs files section below) and can be easily integrated with kdump - service start/stop init scripts. - -Comparing with kdump or other strategies, firmware-assisted -dump offers several strong, practical advantages: - -- Unlike kdump, the system has been reset, and loaded - with a fresh copy of the kernel. In particular, - PCI and I/O devices have been reinitialized and are - in a clean, consistent state. -- Once the dump is copied out, the memory that held the dump - is immediately available to the running kernel. And therefore, - unlike kdump, FADump doesn't need a 2nd reboot to get back - the system to the production configuration. - -The above can only be accomplished by coordination with, -and assistance from the Power firmware. The procedure is -as follows: - -- The first kernel registers the sections of memory with the - Power firmware for dump preservation during OS initialization. - These registered sections of memory are reserved by the first - kernel during early boot. - -- When system crashes, the Power firmware will copy the registered - low memory regions (boot memory) from source to destination area. - It will also save hardware PTE's. - - NOTE: - The term 'boot memory' means size of the low memory chunk - that is required for a kernel to boot successfully when - booted with restricted memory. By default, the boot memory - size will be the larger of 5% of system RAM or 256MB. - Alternatively, user can also specify boot memory size - through boot parameter 'crashkernel=' which will override - the default calculated size. Use this option if default - boot memory size is not sufficient for second kernel to - boot successfully. For syntax of crashkernel= parameter, - refer to Documentation/admin-guide/kdump/kdump.rst. If any - offset is provided in crashkernel= parameter, it will be - ignored as FADump uses a predefined offset to reserve memory - for boot memory dump preservation in case of a crash. - -- After the low memory (boot memory) area has been saved, the - firmware will reset PCI and other hardware state. It will - *not* clear the RAM. It will then launch the bootloader, as - normal. - -- The freshly booted kernel will notice that there is a new node - (rtas/ibm,kernel-dump on pSeries or ibm,opal/dump/mpipl-boot - on OPAL platform) in the device tree, indicating that - there is crash data available from a previous boot. During - the early boot OS will reserve rest of the memory above - boot memory size effectively booting with restricted memory - size. This will make sure that this kernel (also, referred - to as second kernel or capture kernel) will not touch any - of the dump memory area. - -- User-space tools will read /proc/vmcore to obtain the contents - of memory, which holds the previous crashed kernel dump in ELF - format. The userspace tools may copy this info to disk, or - network, nas, san, iscsi, etc. as desired. - -- Once the userspace tool is done saving dump, it will echo - '1' to /sys/kernel/fadump_release_mem to release the reserved - memory back to general use, except the memory required for - next firmware-assisted dump registration. - - e.g.:: - - # echo 1 > /sys/kernel/fadump_release_mem - -Please note that the firmware-assisted dump feature -is only available on POWER6 and above systems on pSeries -(PowerVM) platform and POWER9 and above systems with OP940 -or later firmware versions on PowerNV (OPAL) platform. -Note that, OPAL firmware exports ibm,opal/dump node when -FADump is supported on PowerNV platform. - -On OPAL based machines, system first boots into an intermittent -kernel (referred to as petitboot kernel) before booting into the -capture kernel. This kernel would have minimal kernel and/or -userspace support to process crash data. Such kernel needs to -preserve previously crash'ed kernel's memory for the subsequent -capture kernel boot to process this crash data. Kernel config -option CONFIG_PRESERVE_FA_DUMP has to be enabled on such kernel -to ensure that crash data is preserved to process later. - --- On OPAL based machines (PowerNV), if the kernel is build with - CONFIG_OPAL_CORE=y, OPAL memory at the time of crash is also - exported as /sys/firmware/opal/mpipl/core file. This procfs file is - helpful in debugging OPAL crashes with GDB. The kernel memory - used for exporting this procfs file can be released by echo'ing - '1' to /sys/firmware/opal/mpipl/release_core node. - - e.g. - # echo 1 > /sys/firmware/opal/mpipl/release_core - -Implementation details: ------------------------ - -During boot, a check is made to see if firmware supports -this feature on that particular machine. If it does, then -we check to see if an active dump is waiting for us. If yes -then everything but boot memory size of RAM is reserved during -early boot (See Fig. 2). This area is released once we finish -collecting the dump from user land scripts (e.g. kdump scripts) -that are run. If there is dump data, then the -/sys/kernel/fadump_release_mem file is created, and the reserved -memory is held. - -If there is no waiting dump data, then only the memory required to -hold CPU state, HPTE region, boot memory dump, FADump header and -elfcore header, is usually reserved at an offset greater than boot -memory size (see Fig. 1). This area is *not* released: this region -will be kept permanently reserved, so that it can act as a receptacle -for a copy of the boot memory content in addition to CPU state and -HPTE region, in the case a crash does occur. - -Since this reserved memory area is used only after the system crash, -there is no point in blocking this significant chunk of memory from -production kernel. Hence, the implementation uses the Linux kernel's -Contiguous Memory Allocator (CMA) for memory reservation if CMA is -configured for kernel. With CMA reservation this memory will be -available for applications to use it, while kernel is prevented from -using it. With this FADump will still be able to capture all of the -kernel memory and most of the user space memory except the user pages -that were present in CMA region:: - - o Memory Reservation during first kernel - - Low memory Top of memory - 0 boot memory size |<--- Reserved dump area --->| | - | | | Permanent Reservation | | - V V | | V - +-----------+-----/ /---+---+----+-------+-----+-----+----+--+ - | | |///|////| DUMP | HDR | ELF |////| | - +-----------+-----/ /---+---+----+-------+-----+-----+----+--+ - | ^ ^ ^ ^ ^ - | | | | | | - \ CPU HPTE / | | - ------------------------------ | | - Boot memory content gets transferred | | - to reserved area by firmware at the | | - time of crash. | | - FADump Header | - (meta area) | - | - | - Metadata: This area holds a metadata structure whose - address is registered with f/w and retrieved in the - second kernel after crash, on platforms that support - tags (OPAL). Having such structure with info needed - to process the crashdump eases dump capture process. - - Fig. 1 - - - o Memory Reservation during second kernel after crash - - Low memory Top of memory - 0 boot memory size | - | |<------------ Crash preserved area ------------>| - V V |<--- Reserved dump area --->| | - +-----------+-----/ /---+---+----+-------+-----+-----+----+--+ - | | |///|////| DUMP | HDR | ELF |////| | - +-----------+-----/ /---+---+----+-------+-----+-----+----+--+ - | | - V V - Used by second /proc/vmcore - kernel to boot - - +---+ - |///| -> Regions (CPU, HPTE & Metadata) marked like this in the above - +---+ figures are not always present. For example, OPAL platform - does not have CPU & HPTE regions while Metadata region is - not supported on pSeries currently. - - Fig. 2 - - -Currently the dump will be copied from /proc/vmcore to a new file upon -user intervention. The dump data available through /proc/vmcore will be -in ELF format. Hence the existing kdump infrastructure (kdump scripts) -to save the dump works fine with minor modifications. KDump scripts on -major Distro releases have already been modified to work seamlessly (no -user intervention in saving the dump) when FADump is used, instead of -KDump, as dump mechanism. - -The tools to examine the dump will be same as the ones -used for kdump. - -How to enable firmware-assisted dump (FADump): ----------------------------------------------- - -1. Set config option CONFIG_FA_DUMP=y and build kernel. -2. Boot into linux kernel with 'fadump=on' kernel cmdline option. - By default, FADump reserved memory will be initialized as CMA area. - Alternatively, user can boot linux kernel with 'fadump=nocma' to - prevent FADump to use CMA. -3. Optionally, user can also set 'crashkernel=' kernel cmdline - to specify size of the memory to reserve for boot memory dump - preservation. - -NOTE: - 1. 'fadump_reserve_mem=' parameter has been deprecated. Instead - use 'crashkernel=' to specify size of the memory to reserve - for boot memory dump preservation. - 2. If firmware-assisted dump fails to reserve memory then it - will fallback to existing kdump mechanism if 'crashkernel=' - option is set at kernel cmdline. - 3. if user wants to capture all of user space memory and ok with - reserved memory not available to production system, then - 'fadump=nocma' kernel parameter can be used to fallback to - old behaviour. - -Sysfs/debugfs files: --------------------- - -Firmware-assisted dump feature uses sysfs file system to hold -the control files and debugfs file to display memory reserved region. - -Here is the list of files under kernel sysfs: - - /sys/kernel/fadump_enabled - This is used to display the FADump status. - - - 0 = FADump is disabled - - 1 = FADump is enabled - - This interface can be used by kdump init scripts to identify if - FADump is enabled in the kernel and act accordingly. - - /sys/kernel/fadump_registered - This is used to display the FADump registration status as well - as to control (start/stop) the FADump registration. - - - 0 = FADump is not registered. - - 1 = FADump is registered and ready to handle system crash. - - To register FADump echo 1 > /sys/kernel/fadump_registered and - echo 0 > /sys/kernel/fadump_registered for un-register and stop the - FADump. Once the FADump is un-registered, the system crash will not - be handled and vmcore will not be captured. This interface can be - easily integrated with kdump service start/stop. - - /sys/kernel/fadump/mem_reserved - - This is used to display the memory reserved by FADump for saving the - crash dump. - - /sys/kernel/fadump_release_mem - This file is available only when FADump is active during - second kernel. This is used to release the reserved memory - region that are held for saving crash dump. To release the - reserved memory echo 1 to it:: - - echo 1 > /sys/kernel/fadump_release_mem - - After echo 1, the content of the /sys/kernel/debug/powerpc/fadump_region - file will change to reflect the new memory reservations. - - The existing userspace tools (kdump infrastructure) can be easily - enhanced to use this interface to release the memory reserved for - dump and continue without 2nd reboot. - -Note: /sys/kernel/fadump_release_opalcore sysfs has moved to - /sys/firmware/opal/mpipl/release_core - - /sys/firmware/opal/mpipl/release_core - - This file is available only on OPAL based machines when FADump is - active during capture kernel. This is used to release the memory - used by the kernel to export /sys/firmware/opal/mpipl/core file. To - release this memory, echo '1' to it: - - echo 1 > /sys/firmware/opal/mpipl/release_core - -Note: The following FADump sysfs files are deprecated. - -+----------------------------------+--------------------------------+ -| Deprecated | Alternative | -+----------------------------------+--------------------------------+ -| /sys/kernel/fadump_enabled | /sys/kernel/fadump/enabled | -+----------------------------------+--------------------------------+ -| /sys/kernel/fadump_registered | /sys/kernel/fadump/registered | -+----------------------------------+--------------------------------+ -| /sys/kernel/fadump_release_mem | /sys/kernel/fadump/release_mem | -+----------------------------------+--------------------------------+ - -Here is the list of files under powerpc debugfs: -(Assuming debugfs is mounted on /sys/kernel/debug directory.) - - /sys/kernel/debug/powerpc/fadump_region - This file shows the reserved memory regions if FADump is - enabled otherwise this file is empty. The output format - is:: - - : [-] bytes, Dumped: - - and for kernel DUMP region is: - - DUMP: Src: , Dest: , Size: , Dumped: # bytes - - e.g. - Contents when FADump is registered during first kernel:: - - # cat /sys/kernel/debug/powerpc/fadump_region - CPU : [0x0000006ffb0000-0x0000006fff001f] 0x40020 bytes, Dumped: 0x0 - HPTE: [0x0000006fff0020-0x0000006fff101f] 0x1000 bytes, Dumped: 0x0 - DUMP: [0x0000006fff1020-0x0000007fff101f] 0x10000000 bytes, Dumped: 0x0 - - Contents when FADump is active during second kernel:: - - # cat /sys/kernel/debug/powerpc/fadump_region - CPU : [0x0000006ffb0000-0x0000006fff001f] 0x40020 bytes, Dumped: 0x40020 - HPTE: [0x0000006fff0020-0x0000006fff101f] 0x1000 bytes, Dumped: 0x1000 - DUMP: [0x0000006fff1020-0x0000007fff101f] 0x10000000 bytes, Dumped: 0x10000000 - : [0x00000010000000-0x0000006ffaffff] 0x5ffb0000 bytes, Dumped: 0x5ffb0000 - - -NOTE: - Please refer to Documentation/filesystems/debugfs.rst on - how to mount the debugfs filesystem. - - -TODO: ------ - - Need to come up with the better approach to find out more - accurate boot memory size that is required for a kernel to - boot successfully when booted with restricted memory. - - The FADump implementation introduces a FADump crash info structure - in the scratch area before the ELF core header. The idea of introducing - this structure is to pass some important crash info data to the second - kernel which will help second kernel to populate ELF core header with - correct data before it gets exported through /proc/vmcore. The current - design implementation does not address a possibility of introducing - additional fields (in future) to this structure without affecting - compatibility. Need to come up with the better approach to address this. - - The possible approaches are: - - 1. Introduce version field for version tracking, bump up the version - whenever a new field is added to the structure in future. The version - field can be used to find out what fields are valid for the current - version of the structure. - 2. Reserve the area of predefined size (say PAGE_SIZE) for this - structure and have unused area as reserved (initialized to zero) - for future field additions. - - The advantage of approach 1 over 2 is we don't need to reserve extra space. - -Author: Mahesh Salgaonkar - -This document is based on the original documentation written for phyp - -assisted dump by Linas Vepstas and Manish Ahuja. diff --git a/Documentation/powerpc/hvcs.rst b/Documentation/powerpc/hvcs.rst deleted file mode 100644 index 6808acde67..0000000000 --- a/Documentation/powerpc/hvcs.rst +++ /dev/null @@ -1,581 +0,0 @@ -=============================================================== -HVCS IBM "Hypervisor Virtual Console Server" Installation Guide -=============================================================== - -for Linux Kernel 2.6.4+ - -Copyright (C) 2004 IBM Corporation - -.. =========================================================================== -.. NOTE:Eight space tabs are the optimum editor setting for reading this file. -.. =========================================================================== - - -Author(s): Ryan S. Arnold - -Date Created: March, 02, 2004 -Last Changed: August, 24, 2004 - -.. Table of contents: - - 1. Driver Introduction: - 2. System Requirements - 3. Build Options: - 3.1 Built-in: - 3.2 Module: - 4. Installation: - 5. Connection: - 6. Disconnection: - 7. Configuration: - 8. Questions & Answers: - 9. Reporting Bugs: - -1. Driver Introduction: -======================= - -This is the device driver for the IBM Hypervisor Virtual Console Server, -"hvcs". The IBM hvcs provides a tty driver interface to allow Linux user -space applications access to the system consoles of logically partitioned -operating systems (Linux and AIX) running on the same partitioned Power5 -ppc64 system. Physical hardware consoles per partition are not practical -on this hardware so system consoles are accessed by this driver using -firmware interfaces to virtual terminal devices. - -2. System Requirements: -======================= - -This device driver was written using 2.6.4 Linux kernel APIs and will only -build and run on kernels of this version or later. - -This driver was written to operate solely on IBM Power5 ppc64 hardware -though some care was taken to abstract the architecture dependent firmware -calls from the driver code. - -Sysfs must be mounted on the system so that the user can determine which -major and minor numbers are associated with each vty-server. Directions -for sysfs mounting are outside the scope of this document. - -3. Build Options: -================= - -The hvcs driver registers itself as a tty driver. The tty layer -dynamically allocates a block of major and minor numbers in a quantity -requested by the registering driver. The hvcs driver asks the tty layer -for 64 of these major/minor numbers by default to use for hvcs device node -entries. - -If the default number of device entries is adequate then this driver can be -built into the kernel. If not, the default can be over-ridden by inserting -the driver as a module with insmod parameters. - -3.1 Built-in: -------------- - -The following menuconfig example demonstrates selecting to build this -driver into the kernel:: - - Device Drivers ---> - Character devices ---> - <*> IBM Hypervisor Virtual Console Server Support - -Begin the kernel make process. - -3.2 Module: ------------ - -The following menuconfig example demonstrates selecting to build this -driver as a kernel module:: - - Device Drivers ---> - Character devices ---> - IBM Hypervisor Virtual Console Server Support - -The make process will build the following kernel modules: - - - hvcs.ko - - hvcserver.ko - -To insert the module with the default allocation execute the following -commands in the order they appear:: - - insmod hvcserver.ko - insmod hvcs.ko - -The hvcserver module contains architecture specific firmware calls and must -be inserted first, otherwise the hvcs module will not find some of the -symbols it expects. - -To override the default use an insmod parameter as follows (requesting 4 -tty devices as an example):: - - insmod hvcs.ko hvcs_parm_num_devs=4 - -There is a maximum number of dev entries that can be specified on insmod. -We think that 1024 is currently a decent maximum number of server adapters -to allow. This can always be changed by modifying the constant in the -source file before building. - -NOTE: The length of time it takes to insmod the driver seems to be related -to the number of tty interfaces the registering driver requests. - -In order to remove the driver module execute the following command:: - - rmmod hvcs.ko - -The recommended method for installing hvcs as a module is to use depmod to -build a current modules.dep file in /lib/modules/`uname -r` and then -execute:: - - modprobe hvcs hvcs_parm_num_devs=4 - -The modules.dep file indicates that hvcserver.ko needs to be inserted -before hvcs.ko and modprobe uses this file to smartly insert the modules in -the proper order. - -The following modprobe command is used to remove hvcs and hvcserver in the -proper order:: - - modprobe -r hvcs - -4. Installation: -================ - -The tty layer creates sysfs entries which contain the major and minor -numbers allocated for the hvcs driver. The following snippet of "tree" -output of the sysfs directory shows where these numbers are presented:: - - sys/ - |-- *other sysfs base dirs* - | - |-- class - | |-- *other classes of devices* - | | - | `-- tty - | |-- *other tty devices* - | | - | |-- hvcs0 - | | `-- dev - | |-- hvcs1 - | | `-- dev - | |-- hvcs2 - | | `-- dev - | |-- hvcs3 - | | `-- dev - | | - | |-- *other tty devices* - | - |-- *other sysfs base dirs* - -For the above examples the following output is a result of cat'ing the -"dev" entry in the hvcs directory:: - - Pow5:/sys/class/tty/hvcs0/ # cat dev - 254:0 - - Pow5:/sys/class/tty/hvcs1/ # cat dev - 254:1 - - Pow5:/sys/class/tty/hvcs2/ # cat dev - 254:2 - - Pow5:/sys/class/tty/hvcs3/ # cat dev - 254:3 - -The output from reading the "dev" attribute is the char device major and -minor numbers that the tty layer has allocated for this driver's use. Most -systems running hvcs will already have the device entries created or udev -will do it automatically. - -Given the example output above, to manually create a /dev/hvcs* node entry -mknod can be used as follows:: - - mknod /dev/hvcs0 c 254 0 - mknod /dev/hvcs1 c 254 1 - mknod /dev/hvcs2 c 254 2 - mknod /dev/hvcs3 c 254 3 - -Using mknod to manually create the device entries makes these device nodes -persistent. Once created they will exist prior to the driver insmod. - -Attempting to connect an application to /dev/hvcs* prior to insertion of -the hvcs module will result in an error message similar to the following:: - - "/dev/hvcs*: No such device". - -NOTE: Just because there is a device node present doesn't mean that there -is a vty-server device configured for that node. - -5. Connection -============= - -Since this driver controls devices that provide a tty interface a user can -interact with the device node entries using any standard tty-interactive -method (e.g. "cat", "dd", "echo"). The intent of this driver however, is -to provide real time console interaction with a Linux partition's console, -which requires the use of applications that provide bi-directional, -interactive I/O with a tty device. - -Applications (e.g. "minicom" and "screen") that act as terminal emulators -or perform terminal type control sequence conversion on the data being -passed through them are NOT acceptable for providing interactive console -I/O. These programs often emulate antiquated terminal types (vt100 and -ANSI) and expect inbound data to take the form of one of these supported -terminal types but they either do not convert, or do not _adequately_ -convert, outbound data into the terminal type of the terminal which invoked -them (though screen makes an attempt and can apparently be configured with -much termcap wrestling.) - -For this reason kermit and cu are two of the recommended applications for -interacting with a Linux console via an hvcs device. These programs simply -act as a conduit for data transfer to and from the tty device. They do not -require inbound data to take the form of a particular terminal type, nor do -they cook outbound data to a particular terminal type. - -In order to ensure proper functioning of console applications one must make -sure that once connected to a /dev/hvcs console that the console's $TERM -env variable is set to the exact terminal type of the terminal emulator -used to launch the interactive I/O application. If one is using xterm and -kermit to connect to /dev/hvcs0 when the console prompt becomes available -one should "export TERM=xterm" on the console. This tells ncurses -applications that are invoked from the console that they should output -control sequences that xterm can understand. - -As a precautionary measure an hvcs user should always "exit" from their -session before disconnecting an application such as kermit from the device -node. If this is not done, the next user to connect to the console will -continue using the previous user's logged in session which includes -using the $TERM variable that the previous user supplied. - -Hotplug add and remove of vty-server adapters affects which /dev/hvcs* node -is used to connect to each vty-server adapter. In order to determine which -vty-server adapter is associated with which /dev/hvcs* node a special sysfs -attribute has been added to each vty-server sysfs entry. This entry is -called "index" and showing it reveals an integer that refers to the -/dev/hvcs* entry to use to connect to that device. For instance cating the -index attribute of vty-server adapter 30000004 shows the following:: - - Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat index - 2 - -This index of '2' means that in order to connect to vty-server adapter -30000004 the user should interact with /dev/hvcs2. - -It should be noted that due to the system hotplug I/O capabilities of a -system the /dev/hvcs* entry that interacts with a particular vty-server -adapter is not guaranteed to remain the same across system reboots. Look -in the Q & A section for more on this issue. - -6. Disconnection -================ - -As a security feature to prevent the delivery of stale data to an -unintended target the Power5 system firmware disables the fetching of data -and discards that data when a connection between a vty-server and a vty has -been severed. As an example, when a vty-server is immediately disconnected -from a vty following output of data to the vty the vty adapter may not have -enough time between when it received the data interrupt and when the -connection was severed to fetch the data from firmware before the fetch is -disabled by firmware. - -When hvcs is being used to serve consoles this behavior is not a huge issue -because the adapter stays connected for large amounts of time following -almost all data writes. When hvcs is being used as a tty conduit to tunnel -data between two partitions [see Q & A below] this is a huge problem -because the standard Linux behavior when cat'ing or dd'ing data to a device -is to open the tty, send the data, and then close the tty. If this driver -manually terminated vty-server connections on tty close this would close -the vty-server and vty connection before the target vty has had a chance to -fetch the data. - -Additionally, disconnecting a vty-server and vty only on module removal or -adapter removal is impractical because other vty-servers in other -partitions may require the usage of the target vty at any time. - -Due to this behavioral restriction disconnection of vty-servers from the -connected vty is a manual procedure using a write to a sysfs attribute -outlined below, on the other hand the initial vty-server connection to a -vty is established automatically by this driver. Manual vty-server -connection is never required. - -In order to terminate the connection between a vty-server and vty the -"vterm_state" sysfs attribute within each vty-server's sysfs entry is used. -Reading this attribute reveals the current connection state of the -vty-server adapter. A zero means that the vty-server is not connected to a -vty. A one indicates that a connection is active. - -Writing a '0' (zero) to the vterm_state attribute will disconnect the VTERM -connection between the vty-server and target vty ONLY if the vterm_state -previously read '1'. The write directive is ignored if the vterm_state -read '0' or if any value other than '0' was written to the vterm_state -attribute. The following example will show the method used for verifying -the vty-server connection status and disconnecting a vty-server connection:: - - Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat vterm_state - 1 - - Pow5:/sys/bus/vio/drivers/hvcs/30000004 # echo 0 > vterm_state - - Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat vterm_state - 0 - -All vty-server connections are automatically terminated when the device is -hotplug removed and when the module is removed. - -7. Configuration -================ - -Each vty-server has a sysfs entry in the /sys/devices/vio directory, which -is symlinked in several other sysfs tree directories, notably under the -hvcs driver entry, which looks like the following example:: - - Pow5:/sys/bus/vio/drivers/hvcs # ls - . .. 30000003 30000004 rescan - -By design, firmware notifies the hvcs driver of vty-server lifetimes and -partner vty removals but not the addition of partner vtys. Since an HMC -Super Admin can add partner info dynamically we have provided the hvcs -driver sysfs directory with the "rescan" update attribute which will query -firmware and update the partner info for all the vty-servers that this -driver manages. Writing a '1' to the attribute triggers the update. An -explicit example follows: - - Pow5:/sys/bus/vio/drivers/hvcs # echo 1 > rescan - -Reading the attribute will indicate a state of '1' or '0'. A one indicates -that an update is in process. A zero indicates that an update has -completed or was never executed. - -Vty-server entries in this directory are a 32 bit partition unique unit -address that is created by firmware. An example vty-server sysfs entry -looks like the following:: - - Pow5:/sys/bus/vio/drivers/hvcs/30000004 # ls - . current_vty devspec name partner_vtys - .. index partner_clcs vterm_state - -Each entry is provided, by default with a "name" attribute. Reading the -"name" attribute will reveal the device type as shown in the following -example:: - - Pow5:/sys/bus/vio/drivers/hvcs/30000003 # cat name - vty-server - -Each entry is also provided, by default, with a "devspec" attribute which -reveals the full device specification when read, as shown in the following -example:: - - Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat devspec - /vdevice/vty-server@30000004 - -Each vty-server sysfs dir is provided with two read-only attributes that -provide lists of easily parsed partner vty data: "partner_vtys" and -"partner_clcs":: - - Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat partner_vtys - 30000000 - 30000001 - 30000002 - 30000000 - 30000000 - - Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat partner_clcs - U5112.428.103048A-V3-C0 - U5112.428.103048A-V3-C2 - U5112.428.103048A-V3-C3 - U5112.428.103048A-V4-C0 - U5112.428.103048A-V5-C0 - -Reading partner_vtys returns a list of partner vtys. Vty unit address -numbering is only per-partition-unique so entries will frequently repeat. - -Reading partner_clcs returns a list of "converged location codes" which are -composed of a system serial number followed by "-V*", where the '*' is the -target partition number, and "-C*", where the '*' is the slot of the -adapter. The first vty partner corresponds to the first clc item, the -second vty partner to the second clc item, etc. - -A vty-server can only be connected to a single vty at a time. The entry, -"current_vty" prints the clc of the currently selected partner vty when -read. - -The current_vty can be changed by writing a valid partner clc to the entry -as in the following example:: - - Pow5:/sys/bus/vio/drivers/hvcs/30000004 # echo U5112.428.10304 - 8A-V4-C0 > current_vty - -Changing the current_vty when a vty-server is already connected to a vty -does not affect the current connection. The change takes effect when the -currently open connection is freed. - -Information on the "vterm_state" attribute was covered earlier on the -chapter entitled "disconnection". - -8. Questions & Answers: -======================= - -Q: What are the security concerns involving hvcs? - -A: There are three main security concerns: - - 1. The creator of the /dev/hvcs* nodes has the ability to restrict - the access of the device entries to certain users or groups. It - may be best to create a special hvcs group privilege for providing - access to system consoles. - - 2. To provide network security when grabbing the console it is - suggested that the user connect to the console hosting partition - using a secure method, such as SSH or sit at a hardware console. - - 3. Make sure to exit the user session when done with a console or - the next vty-server connection (which may be from another - partition) will experience the previously logged in session. - ---------------------------------------------------------------------------- - -Q: How do I multiplex a console that I grab through hvcs so that other -people can see it: - -A: You can use "screen" to directly connect to the /dev/hvcs* device and -setup a session on your machine with the console group privileges. As -pointed out earlier by default screen doesn't provide the termcap settings -for most terminal emulators to provide adequate character conversion from -term type "screen" to others. This means that curses based programs may -not display properly in screen sessions. - ---------------------------------------------------------------------------- - -Q: Why are the colors all messed up? -Q: Why are the control characters acting strange or not working? -Q: Why is the console output all strange and unintelligible? - -A: Please see the preceding section on "Connection" for a discussion of how -applications can affect the display of character control sequences. -Additionally, just because you logged into the console using and xterm -doesn't mean someone else didn't log into the console with the HMC console -(vt320) before you and leave the session logged in. The best thing to do -is to export TERM to the terminal type of your terminal emulator when you -get the console. Additionally make sure to "exit" the console before you -disconnect from the console. This will ensure that the next user gets -their own TERM type set when they login. - ---------------------------------------------------------------------------- - -Q: When I try to CONNECT kermit to an hvcs device I get: -"Sorry, can't open connection: /dev/hvcs*"What is happening? - -A: Some other Power5 console mechanism has a connection to the vty and -isn't giving it up. You can try to force disconnect the consoles from the -HMC by right clicking on the partition and then selecting "close terminal". -Otherwise you have to hunt down the people who have console authority. It -is possible that you already have the console open using another kermit -session and just forgot about it. Please review the console options for -Power5 systems to determine the many ways a system console can be held. - -OR - -A: Another user may not have a connectivity method currently attached to a -/dev/hvcs device but the vterm_state may reveal that they still have the -vty-server connection established. They need to free this using the method -outlined in the section on "Disconnection" in order for others to connect -to the target vty. - -OR - -A: The user profile you are using to execute kermit probably doesn't have -permissions to use the /dev/hvcs* device. - -OR - -A: You probably haven't inserted the hvcs.ko module yet but the /dev/hvcs* -entry still exists (on systems without udev). - -OR - -A: There is not a corresponding vty-server device that maps to an existing -/dev/hvcs* entry. - ---------------------------------------------------------------------------- - -Q: When I try to CONNECT kermit to an hvcs device I get: -"Sorry, write access to UUCP lockfile directory denied." - -A: The /dev/hvcs* entry you have specified doesn't exist where you said it -does? Maybe you haven't inserted the module (on systems with udev). - ---------------------------------------------------------------------------- - -Q: If I already have one Linux partition installed can I use hvcs on said -partition to provide the console for the install of a second Linux -partition? - -A: Yes granted that your are connected to the /dev/hvcs* device using -kermit or cu or some other program that doesn't provide terminal emulation. - ---------------------------------------------------------------------------- - -Q: Can I connect to more than one partition's console at a time using this -driver? - -A: Yes. Of course this means that there must be more than one vty-server -configured for this partition and each must point to a disconnected vty. - ---------------------------------------------------------------------------- - -Q: Does the hvcs driver support dynamic (hotplug) addition of devices? - -A: Yes, if you have dlpar and hotplug enabled for your system and it has -been built into the kernel the hvcs drivers is configured to dynamically -handle additions of new devices and removals of unused devices. - ---------------------------------------------------------------------------- - -Q: For some reason /dev/hvcs* doesn't map to the same vty-server adapter -after a reboot. What happened? - -A: Assignment of vty-server adapters to /dev/hvcs* entries is always done -in the order that the adapters are exposed. Due to hotplug capabilities of -this driver assignment of hotplug added vty-servers may be in a different -order than how they would be exposed on module load. Rebooting or -reloading the module after dynamic addition may result in the /dev/hvcs* -and vty-server coupling changing if a vty-server adapter was added in a -slot between two other vty-server adapters. Refer to the section above -on how to determine which vty-server goes with which /dev/hvcs* node. -Hint; look at the sysfs "index" attribute for the vty-server. - ---------------------------------------------------------------------------- - -Q: Can I use /dev/hvcs* as a conduit to another partition and use a tty -device on that partition as the other end of the pipe? - -A: Yes, on Power5 platforms the hvc_console driver provides a tty interface -for extra /dev/hvc* devices (where /dev/hvc0 is most likely the console). -In order to get a tty conduit working between the two partitions the HMC -Super Admin must create an additional "serial server" for the target -partition with the HMC gui which will show up as /dev/hvc* when the target -partition is rebooted. - -The HMC Super Admin then creates an additional "serial client" for the -current partition and points this at the target partition's newly created -"serial server" adapter (remember the slot). This shows up as an -additional /dev/hvcs* device. - -Now a program on the target system can be configured to read or write to -/dev/hvc* and another program on the current partition can be configured to -read or write to /dev/hvcs*. Now you have a tty conduit between two -partitions. - ---------------------------------------------------------------------------- - -9. Reporting Bugs: -================== - -The proper channel for reporting bugs is either through the Linux OS -distribution company that provided your OS or by posting issues to the -PowerPC development mailing list at: - -linuxppc-dev@lists.ozlabs.org - -This request is to provide a documented and searchable public exchange -of the problems and solutions surrounding this driver for the benefit of -all users. diff --git a/Documentation/powerpc/imc.rst b/Documentation/powerpc/imc.rst deleted file mode 100644 index 633bcee7dc..0000000000 --- a/Documentation/powerpc/imc.rst +++ /dev/null @@ -1,199 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 -.. _imc: - -=================================== -IMC (In-Memory Collection Counters) -=================================== - -Anju T Sudhakar, 10 May 2019 - -.. contents:: - :depth: 3 - - -Basic overview -============== - -IMC (In-Memory collection counters) is a hardware monitoring facility that -collects large numbers of hardware performance events at Nest level (these are -on-chip but off-core), Core level and Thread level. - -The Nest PMU counters are handled by a Nest IMC microcode which runs in the OCC -(On-Chip Controller) complex. The microcode collects the counter data and moves -the nest IMC counter data to memory. - -The Core and Thread IMC PMU counters are handled in the core. Core level PMU -counters give us the IMC counters' data per core and thread level PMU counters -give us the IMC counters' data per CPU thread. - -OPAL obtains the IMC PMU and supported events information from the IMC Catalog -and passes on to the kernel via the device tree. The event's information -contains: - -- Event name -- Event Offset -- Event description - -and possibly also: - -- Event scale -- Event unit - -Some PMUs may have a common scale and unit values for all their supported -events. For those cases, the scale and unit properties for those events must be -inherited from the PMU. - -The event offset in the memory is where the counter data gets accumulated. - -IMC catalog is available at: - https://github.com/open-power/ima-catalog - -The kernel discovers the IMC counters information in the device tree at the -`imc-counters` device node which has a compatible field -`ibm,opal-in-memory-counters`. From the device tree, the kernel parses the PMUs -and their event's information and register the PMU and its attributes in the -kernel. - -IMC example usage -================= - -.. code-block:: sh - - # perf list - [...] - nest_mcs01/PM_MCS01_64B_RD_DISP_PORT01/ [Kernel PMU event] - nest_mcs01/PM_MCS01_64B_RD_DISP_PORT23/ [Kernel PMU event] - [...] - core_imc/CPM_0THRD_NON_IDLE_PCYC/ [Kernel PMU event] - core_imc/CPM_1THRD_NON_IDLE_INST/ [Kernel PMU event] - [...] - thread_imc/CPM_0THRD_NON_IDLE_PCYC/ [Kernel PMU event] - thread_imc/CPM_1THRD_NON_IDLE_INST/ [Kernel PMU event] - -To see per chip data for nest_mcs0/PM_MCS_DOWN_128B_DATA_XFER_MC0/: - -.. code-block:: sh - - # ./perf stat -e "nest_mcs01/PM_MCS01_64B_WR_DISP_PORT01/" -a --per-socket - -To see non-idle instructions for core 0: - -.. code-block:: sh - - # ./perf stat -e "core_imc/CPM_NON_IDLE_INST/" -C 0 -I 1000 - -To see non-idle instructions for a "make": - -.. code-block:: sh - - # ./perf stat -e "thread_imc/CPM_NON_IDLE_PCYC/" make - - -IMC Trace-mode -=============== - -POWER9 supports two modes for IMC which are the Accumulation mode and Trace -mode. In Accumulation mode, event counts are accumulated in system Memory. -Hypervisor then reads the posted counts periodically or when requested. In IMC -Trace mode, the 64 bit trace SCOM value is initialized with the event -information. The CPMCxSEL and CPMC_LOAD in the trace SCOM, specifies the event -to be monitored and the sampling duration. On each overflow in the CPMCxSEL, -hardware snapshots the program counter along with event counts and writes into -memory pointed by LDBAR. - -LDBAR is a 64 bit special purpose per thread register, it has bits to indicate -whether hardware is configured for accumulation or trace mode. - -LDBAR Register Layout ---------------------- - - +-------+----------------------+ - | 0 | Enable/Disable | - +-------+----------------------+ - | 1 | 0: Accumulation Mode | - | +----------------------+ - | | 1: Trace Mode | - +-------+----------------------+ - | 2:3 | Reserved | - +-------+----------------------+ - | 4-6 | PB scope | - +-------+----------------------+ - | 7 | Reserved | - +-------+----------------------+ - | 8:50 | Counter Address | - +-------+----------------------+ - | 51:63 | Reserved | - +-------+----------------------+ - -TRACE_IMC_SCOM bit representation ---------------------------------- - - +-------+------------+ - | 0:1 | SAMPSEL | - +-------+------------+ - | 2:33 | CPMC_LOAD | - +-------+------------+ - | 34:40 | CPMC1SEL | - +-------+------------+ - | 41:47 | CPMC2SEL | - +-------+------------+ - | 48:50 | BUFFERSIZE | - +-------+------------+ - | 51:63 | RESERVED | - +-------+------------+ - -CPMC_LOAD contains the sampling duration. SAMPSEL and CPMCxSEL determines the -event to count. BUFFERSIZE indicates the memory range. On each overflow, -hardware snapshots the program counter along with event counts and updates the -memory and reloads the CMPC_LOAD value for the next sampling duration. IMC -hardware does not support exceptions, so it quietly wraps around if memory -buffer reaches the end. - -*Currently the event monitored for trace-mode is fixed as cycle.* - -Trace IMC example usage -======================= - -.. code-block:: sh - - # perf list - [....] - trace_imc/trace_cycles/ [Kernel PMU event] - -To record an application/process with trace-imc event: - -.. code-block:: sh - - # perf record -e trace_imc/trace_cycles/ yes > /dev/null - [ perf record: Woken up 1 times to write data ] - [ perf record: Captured and wrote 0.012 MB perf.data (21 samples) ] - -The `perf.data` generated, can be read using perf report. - -Benefits of using IMC trace-mode -================================ - -PMI (Performance Monitoring Interrupts) interrupt handling is avoided, since IMC -trace mode snapshots the program counter and updates to the memory. And this -also provide a way for the operating system to do instruction sampling in real -time without PMI processing overhead. - -Performance data using `perf top` with and without trace-imc event. - -PMI interrupts count when `perf top` command is executed without trace-imc event. - -.. code-block:: sh - - # grep PMI /proc/interrupts - PMI: 0 0 0 0 Performance monitoring interrupts - # ./perf top - ... - # grep PMI /proc/interrupts - PMI: 39735 8710 17338 17801 Performance monitoring interrupts - # ./perf top -e trace_imc/trace_cycles/ - ... - # grep PMI /proc/interrupts - PMI: 39735 8710 17338 17801 Performance monitoring interrupts - - -That is, the PMI interrupt counts do not increment when using the `trace_imc` event. diff --git a/Documentation/powerpc/index.rst b/Documentation/powerpc/index.rst deleted file mode 100644 index a508347984..0000000000 --- a/Documentation/powerpc/index.rst +++ /dev/null @@ -1,48 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -======= -powerpc -======= - -.. toctree:: - :maxdepth: 1 - - associativity - booting - bootwrapper - cpu_families - cpu_features - cxl - cxlflash - dawr-power9 - dexcr - dscr - eeh-pci-error-recovery - elf_hwcaps - elfnote - firmware-assisted-dump - hvcs - imc - isa-versions - kaslr-booke32 - mpc52xx - papr_hcalls - pci_iov_resource_on_powernv - pmu-ebb - ptrace - qe_firmware - syscall64-abi - transactional_memory - ultravisor - vas-api - vcpudispatch_stats - vmemmap_dedup - - features - -.. only:: subproject and html - - Indices - ======= - - * :ref:`genindex` diff --git a/Documentation/powerpc/isa-versions.rst b/Documentation/powerpc/isa-versions.rst deleted file mode 100644 index a8d6b6028b..0000000000 --- a/Documentation/powerpc/isa-versions.rst +++ /dev/null @@ -1,101 +0,0 @@ -========================== -CPU to ISA Version Mapping -========================== - -Mapping of some CPU versions to relevant ISA versions. - -Note Power4 and Power4+ are not supported. - -========= ==================================================================== -CPU Architecture version -========= ==================================================================== -Power10 Power ISA v3.1 -Power9 Power ISA v3.0B -Power8 Power ISA v2.07 -e6500 Power ISA v2.06 with some exceptions -e5500 Power ISA v2.06 with some exceptions, no Altivec -Power7 Power ISA v2.06 -Power6 Power ISA v2.05 -PA6T Power ISA v2.04 -Cell PPU - Power ISA v2.02 with some minor exceptions - - Plus Altivec/VMX ~= 2.03 -Power5++ Power ISA v2.04 (no VMX) -Power5+ Power ISA v2.03 -Power5 - PowerPC User Instruction Set Architecture Book I v2.02 - - PowerPC Virtual Environment Architecture Book II v2.02 - - PowerPC Operating Environment Architecture Book III v2.02 -PPC970 - PowerPC User Instruction Set Architecture Book I v2.01 - - PowerPC Virtual Environment Architecture Book II v2.01 - - PowerPC Operating Environment Architecture Book III v2.01 - - Plus Altivec/VMX ~= 2.03 -Power4+ - PowerPC User Instruction Set Architecture Book I v2.01 - - PowerPC Virtual Environment Architecture Book II v2.01 - - PowerPC Operating Environment Architecture Book III v2.01 -Power4 - PowerPC User Instruction Set Architecture Book I v2.00 - - PowerPC Virtual Environment Architecture Book II v2.00 - - PowerPC Operating Environment Architecture Book III v2.00 -========= ==================================================================== - - -Key Features ------------- - -========== ================== -CPU VMX (aka. Altivec) -========== ================== -Power10 Yes -Power9 Yes -Power8 Yes -e6500 Yes -e5500 No -Power7 Yes -Power6 Yes -PA6T Yes -Cell PPU Yes -Power5++ No -Power5+ No -Power5 No -PPC970 Yes -Power4+ No -Power4 No -========== ================== - -========== ==== -CPU VSX -========== ==== -Power10 Yes -Power9 Yes -Power8 Yes -e6500 No -e5500 No -Power7 Yes -Power6 No -PA6T No -Cell PPU No -Power5++ No -Power5+ No -Power5 No -PPC970 No -Power4+ No -Power4 No -========== ==== - -========== ==================================== -CPU Transactional Memory -========== ==================================== -Power10 No (* see Power ISA v3.1, "Appendix A. Notes on the Removal of Transactional Memory from the Architecture") -Power9 Yes (* see transactional_memory.txt) -Power8 Yes -e6500 No -e5500 No -Power7 No -Power6 No -PA6T No -Cell PPU No -Power5++ No -Power5+ No -Power5 No -PPC970 No -Power4+ No -Power4 No -========== ==================================== diff --git a/Documentation/powerpc/kasan.txt b/Documentation/powerpc/kasan.txt deleted file mode 100644 index a4f647e4ff..0000000000 --- a/Documentation/powerpc/kasan.txt +++ /dev/null @@ -1,58 +0,0 @@ -KASAN is supported on powerpc on 32-bit and Radix 64-bit only. - -32 bit support -============== - -KASAN is supported on both hash and nohash MMUs on 32-bit. - -The shadow area sits at the top of the kernel virtual memory space above the -fixmap area and occupies one eighth of the total kernel virtual memory space. - -Instrumentation of the vmalloc area is optional, unless built with modules, -in which case it is required. - -64 bit support -============== - -Currently, only the radix MMU is supported. There have been versions for hash -and Book3E processors floating around on the mailing list, but nothing has been -merged. - -KASAN support on Book3S is a bit tricky to get right: - - - It would be good to support inline instrumentation so as to be able to catch - stack issues that cannot be caught with outline mode. - - - Inline instrumentation requires a fixed offset. - - - Book3S runs code with translations off ("real mode") during boot, including a - lot of generic device-tree parsing code which is used to determine MMU - features. - - - Some code - most notably a lot of KVM code - also runs with translations off - after boot. - - - Therefore any offset has to point to memory that is valid with - translations on or off. - -One approach is just to give up on inline instrumentation. This way boot-time -checks can be delayed until after the MMU is set is up, and we can just not -instrument any code that runs with translations off after booting. This is the -current approach. - -To avoid this limitation, the KASAN shadow would have to be placed inside the -linear mapping, using the same high-bits trick we use for the rest of the linear -mapping. This is tricky: - - - We'd like to place it near the start of physical memory. In theory we can do - this at run-time based on how much physical memory we have, but this requires - being able to arbitrarily relocate the kernel, which is basically the tricky - part of KASLR. Not being game to implement both tricky things at once, this - is hopefully something we can revisit once we get KASLR for Book3S. - - - Alternatively, we can place the shadow at the _end_ of memory, but this - requires knowing how much contiguous physical memory a system has _at compile - time_. This is a big hammer, and has some unfortunate consequences: inablity - to handle discontiguous physical memory, total failure to boot on machines - with less memory than specified, and that machines with more memory than - specified can't use it. This was deemed unacceptable. diff --git a/Documentation/powerpc/kaslr-booke32.rst b/Documentation/powerpc/kaslr-booke32.rst deleted file mode 100644 index 5681c1d1b6..0000000000 --- a/Documentation/powerpc/kaslr-booke32.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -=========================== -KASLR for Freescale BookE32 -=========================== - -The word KASLR stands for Kernel Address Space Layout Randomization. - -This document tries to explain the implementation of the KASLR for -Freescale BookE32. KASLR is a security feature that deters exploit -attempts relying on knowledge of the location of kernel internals. - -Since CONFIG_RELOCATABLE has already supported, what we need to do is -map or copy kernel to a proper place and relocate. Freescale Book-E -parts expect lowmem to be mapped by fixed TLB entries(TLB1). The TLB1 -entries are not suitable to map the kernel directly in a randomized -region, so we chose to copy the kernel to a proper place and restart to -relocate. - -Entropy is derived from the banner and timer base, which will change every -build and boot. This not so much safe so additionally the bootloader may -pass entropy via the /chosen/kaslr-seed node in device tree. - -We will use the first 512M of the low memory to randomize the kernel -image. The memory will be split in 64M zones. We will use the lower 8 -bit of the entropy to decide the index of the 64M zone. Then we chose a -16K aligned offset inside the 64M zone to put the kernel in:: - - KERNELBASE - - |--> 64M <--| - | | - +---------------+ +----------------+---------------+ - | |....| |kernel| | | - +---------------+ +----------------+---------------+ - | | - |-----> offset <-----| - - kernstart_virt_addr - -To enable KASLR, set CONFIG_RANDOMIZE_BASE = y. If KASLR is enabled and you -want to disable it at runtime, add "nokaslr" to the kernel cmdline. diff --git a/Documentation/powerpc/mpc52xx.rst b/Documentation/powerpc/mpc52xx.rst deleted file mode 100644 index 5243b1763f..0000000000 --- a/Documentation/powerpc/mpc52xx.rst +++ /dev/null @@ -1,43 +0,0 @@ -============================= -Linux 2.6.x on MPC52xx family -============================= - -For the latest info, go to https://www.246tNt.com/mpc52xx/ - -To compile/use : - - - U-Boot:: - - # tftpboot 200000 uImage - => tftpboot 400000 pRamdisk - => bootm 200000 400000 - - - DBug:: - - # dn -i zImage.initrd.lite5200 - - -Some remarks: - - - The port is named mpc52xxx, and config options are PPC_MPC52xx. The MGT5100 - is not supported, and I'm not sure anyone is interested in working on it - so. I didn't took 5xxx because there's apparently a lot of 5xxx that have - nothing to do with the MPC5200. I also included the 'MPC' for the same - reason. - - Of course, I inspired myself from the 2.4 port. If you think I forgot to - mention you/your company in the copyright of some code, I'll correct it - ASAP. diff --git a/Documentation/powerpc/papr_hcalls.rst b/Documentation/powerpc/papr_hcalls.rst deleted file mode 100644 index 80d2c0aada..0000000000 --- a/Documentation/powerpc/papr_hcalls.rst +++ /dev/null @@ -1,302 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -=========================== -Hypercall Op-codes (hcalls) -=========================== - -Overview -========= - -Virtualization on 64-bit Power Book3S Platforms is based on the PAPR -specification [1]_ which describes the run-time environment for a guest -operating system and how it should interact with the hypervisor for -privileged operations. Currently there are two PAPR compliant hypervisors: - -- **IBM PowerVM (PHYP)**: IBM's proprietary hypervisor that supports AIX, - IBM-i and Linux as supported guests (termed as Logical Partitions - or LPARS). It supports the full PAPR specification. - -- **Qemu/KVM**: Supports PPC64 linux guests running on a PPC64 linux host. - Though it only implements a subset of PAPR specification called LoPAPR [2]_. - -On PPC64 arch a guest kernel running on top of a PAPR hypervisor is called -a *pSeries guest*. A pseries guest runs in a supervisor mode (HV=0) and must -issue hypercalls to the hypervisor whenever it needs to perform an action -that is hypervisor privileged [3]_ or for other services managed by the -hypervisor. - -Hence a Hypercall (hcall) is essentially a request by the pseries guest -asking hypervisor to perform a privileged operation on behalf of the guest. The -guest issues a with necessary input operands. The hypervisor after performing -the privilege operation returns a status code and output operands back to the -guest. - -HCALL ABI -========= -The ABI specification for a hcall between a pseries guest and PAPR hypervisor -is covered in section 14.5.3 of ref [2]_. Switch to the Hypervisor context is -done via the instruction **HVCS** that expects the Opcode for hcall is set in *r3* -and any in-arguments for the hcall are provided in registers *r4-r12*. If values -have to be passed through a memory buffer, the data stored in that buffer should be -in Big-endian byte order. - -Once control returns back to the guest after hypervisor has serviced the -'HVCS' instruction the return value of the hcall is available in *r3* and any -out values are returned in registers *r4-r12*. Again like in case of in-arguments, -any out values stored in a memory buffer will be in Big-endian byte order. - -Powerpc arch code provides convenient wrappers named **plpar_hcall_xxx** defined -in a arch specific header [4]_ to issue hcalls from the linux kernel -running as pseries guest. - -Register Conventions -==================== - -Any hcall should follow same register convention as described in section 2.2.1.1 -of "64-Bit ELF V2 ABI Specification: Power Architecture"[5]_. Table below -summarizes these conventions: - -+----------+----------+-------------------------------------------+ -| Register |Volatile | Purpose | -| Range |(Y/N) | | -+==========+==========+===========================================+ -| r0 | Y | Optional-usage | -+----------+----------+-------------------------------------------+ -| r1 | N | Stack Pointer | -+----------+----------+-------------------------------------------+ -| r2 | N | TOC | -+----------+----------+-------------------------------------------+ -| r3 | Y | hcall opcode/return value | -+----------+----------+-------------------------------------------+ -| r4-r10 | Y | in and out values | -+----------+----------+-------------------------------------------+ -| r11 | Y | Optional-usage/Environmental pointer | -+----------+----------+-------------------------------------------+ -| r12 | Y | Optional-usage/Function entry address at | -| | | global entry point | -+----------+----------+-------------------------------------------+ -| r13 | N | Thread-Pointer | -+----------+----------+-------------------------------------------+ -| r14-r31 | N | Local Variables | -+----------+----------+-------------------------------------------+ -| LR | Y | Link Register | -+----------+----------+-------------------------------------------+ -| CTR | Y | Loop Counter | -+----------+----------+-------------------------------------------+ -| XER | Y | Fixed-point exception register. | -+----------+----------+-------------------------------------------+ -| CR0-1 | Y | Condition register fields. | -+----------+----------+-------------------------------------------+ -| CR2-4 | N | Condition register fields. | -+----------+----------+-------------------------------------------+ -| CR5-7 | Y | Condition register fields. | -+----------+----------+-------------------------------------------+ -| Others | N | | -+----------+----------+-------------------------------------------+ - -DRC & DRC Indexes -================= -:: - - DR1 Guest - +--+ +------------+ +---------+ - | | <----> | | | User | - +--+ DRC1 | | DRC | Space | - | PAPR | Index +---------+ - DR2 | Hypervisor | | | - +--+ | | <-----> | Kernel | - | | <----> | | Hcall | | - +--+ DRC2 +------------+ +---------+ - -PAPR hypervisor terms shared hardware resources like PCI devices, NVDIMMs etc -available for use by LPARs as Dynamic Resource (DR). When a DR is allocated to -an LPAR, PHYP creates a data-structure called Dynamic Resource Connector (DRC) -to manage LPAR access. An LPAR refers to a DRC via an opaque 32-bit number -called DRC-Index. The DRC-index value is provided to the LPAR via device-tree -where its present as an attribute in the device tree node associated with the -DR. - -HCALL Return-values -=================== - -After servicing the hcall, hypervisor sets the return-value in *r3* indicating -success or failure of the hcall. In case of a failure an error code indicates -the cause for error. These codes are defined and documented in arch specific -header [4]_. - -In some cases a hcall can potentially take a long time and need to be issued -multiple times in order to be completely serviced. These hcalls will usually -accept an opaque value *continue-token* within there argument list and a -return value of *H_CONTINUE* indicates that hypervisor hasn't still finished -servicing the hcall yet. - -To make such hcalls the guest need to set *continue-token == 0* for the -initial call and use the hypervisor returned value of *continue-token* -for each subsequent hcall until hypervisor returns a non *H_CONTINUE* -return value. - -HCALL Op-codes -============== - -Below is a partial list of HCALLs that are supported by PHYP. For the -corresponding opcode values please look into the arch specific header [4]_: - -**H_SCM_READ_METADATA** - -| Input: *drcIndex, offset, buffer-address, numBytesToRead* -| Out: *numBytesRead* -| Return Value: *H_Success, H_Parameter, H_P2, H_P3, H_Hardware* - -Given a DRC Index of an NVDIMM, read N-bytes from the metadata area -associated with it, at a specified offset and copy it to provided buffer. -The metadata area stores configuration information such as label information, -bad-blocks etc. The metadata area is located out-of-band of NVDIMM storage -area hence a separate access semantics is provided. - -**H_SCM_WRITE_METADATA** - -| Input: *drcIndex, offset, data, numBytesToWrite* -| Out: *None* -| Return Value: *H_Success, H_Parameter, H_P2, H_P4, H_Hardware* - -Given a DRC Index of an NVDIMM, write N-bytes to the metadata area -associated with it, at the specified offset and from the provided buffer. - -**H_SCM_BIND_MEM** - -| Input: *drcIndex, startingScmBlockIndex, numScmBlocksToBind,* -| *targetLogicalMemoryAddress, continue-token* -| Out: *continue-token, targetLogicalMemoryAddress, numScmBlocksToBound* -| Return Value: *H_Success, H_Parameter, H_P2, H_P3, H_P4, H_Overlap,* -| *H_Too_Big, H_P5, H_Busy* - -Given a DRC-Index of an NVDIMM, map a continuous SCM blocks range -*(startingScmBlockIndex, startingScmBlockIndex+numScmBlocksToBind)* to the guest -at *targetLogicalMemoryAddress* within guest physical address space. In -case *targetLogicalMemoryAddress == 0xFFFFFFFF_FFFFFFFF* then hypervisor -assigns a target address to the guest. The HCALL can fail if the Guest has -an active PTE entry to the SCM block being bound. - -**H_SCM_UNBIND_MEM** -| Input: drcIndex, startingScmLogicalMemoryAddress, numScmBlocksToUnbind -| Out: numScmBlocksUnbound -| Return Value: *H_Success, H_Parameter, H_P2, H_P3, H_In_Use, H_Overlap,* -| *H_Busy, H_LongBusyOrder1mSec, H_LongBusyOrder10mSec* - -Given a DRC-Index of an NVDimm, unmap *numScmBlocksToUnbind* SCM blocks starting -at *startingScmLogicalMemoryAddress* from guest physical address space. The -HCALL can fail if the Guest has an active PTE entry to the SCM block being -unbound. - -**H_SCM_QUERY_BLOCK_MEM_BINDING** - -| Input: *drcIndex, scmBlockIndex* -| Out: *Guest-Physical-Address* -| Return Value: *H_Success, H_Parameter, H_P2, H_NotFound* - -Given a DRC-Index and an SCM Block index return the guest physical address to -which the SCM block is mapped to. - -**H_SCM_QUERY_LOGICAL_MEM_BINDING** - -| Input: *Guest-Physical-Address* -| Out: *drcIndex, scmBlockIndex* -| Return Value: *H_Success, H_Parameter, H_P2, H_NotFound* - -Given a guest physical address return which DRC Index and SCM block is mapped -to that address. - -**H_SCM_UNBIND_ALL** - -| Input: *scmTargetScope, drcIndex* -| Out: *None* -| Return Value: *H_Success, H_Parameter, H_P2, H_P3, H_In_Use, H_Busy,* -| *H_LongBusyOrder1mSec, H_LongBusyOrder10mSec* - -Depending on the Target scope unmap all SCM blocks belonging to all NVDIMMs -or all SCM blocks belonging to a single NVDIMM identified by its drcIndex -from the LPAR memory. - -**H_SCM_HEALTH** - -| Input: drcIndex -| Out: *health-bitmap (r4), health-bit-valid-bitmap (r5)* -| Return Value: *H_Success, H_Parameter, H_Hardware* - -Given a DRC Index return the info on predictive failure and overall health of -the PMEM device. The asserted bits in the health-bitmap indicate one or more states -(described in table below) of the PMEM device and health-bit-valid-bitmap indicate -which bits in health-bitmap are valid. The bits are reported in -reverse bit ordering for example a value of 0xC400000000000000 -indicates bits 0, 1, and 5 are valid. - -Health Bitmap Flags: - -+------+-----------------------------------------------------------------------+ -| Bit | Definition | -+======+=======================================================================+ -| 00 | PMEM device is unable to persist memory contents. | -| | If the system is powered down, nothing will be saved. | -+------+-----------------------------------------------------------------------+ -| 01 | PMEM device failed to persist memory contents. Either contents were | -| | not saved successfully on power down or were not restored properly on | -| | power up. | -+------+-----------------------------------------------------------------------+ -| 02 | PMEM device contents are persisted from previous IPL. The data from | -| | the last boot were successfully restored. | -+------+-----------------------------------------------------------------------+ -| 03 | PMEM device contents are not persisted from previous IPL. There was no| -| | data to restore from the last boot. | -+------+-----------------------------------------------------------------------+ -| 04 | PMEM device memory life remaining is critically low | -+------+-----------------------------------------------------------------------+ -| 05 | PMEM device will be garded off next IPL due to failure | -+------+-----------------------------------------------------------------------+ -| 06 | PMEM device contents cannot persist due to current platform health | -| | status. A hardware failure may prevent data from being saved or | -| | restored. | -+------+-----------------------------------------------------------------------+ -| 07 | PMEM device is unable to persist memory contents in certain conditions| -+------+-----------------------------------------------------------------------+ -| 08 | PMEM device is encrypted | -+------+-----------------------------------------------------------------------+ -| 09 | PMEM device has successfully completed a requested erase or secure | -| | erase procedure. | -+------+-----------------------------------------------------------------------+ -|10:63 | Reserved / Unused | -+------+-----------------------------------------------------------------------+ - -**H_SCM_PERFORMANCE_STATS** - -| Input: drcIndex, resultBuffer Addr -| Out: None -| Return Value: *H_Success, H_Parameter, H_Unsupported, H_Hardware, H_Authority, H_Privilege* - -Given a DRC Index collect the performance statistics for NVDIMM and copy them -to the resultBuffer. - -**H_SCM_FLUSH** - -| Input: *drcIndex, continue-token* -| Out: *continue-token* -| Return Value: *H_SUCCESS, H_Parameter, H_P2, H_BUSY* - -Given a DRC Index Flush the data to backend NVDIMM device. - -The hcall returns H_BUSY when the flush takes longer time and the hcall needs -to be issued multiple times in order to be completely serviced. The -*continue-token* from the output to be passed in the argument list of -subsequent hcalls to the hypervisor until the hcall is completely serviced -at which point H_SUCCESS or other error is returned by the hypervisor. - -References -========== -.. [1] "Power Architecture Platform Reference" - https://en.wikipedia.org/wiki/Power_Architecture_Platform_Reference -.. [2] "Linux on Power Architecture Platform Reference" - https://members.openpowerfoundation.org/document/dl/469 -.. [3] "Definitions and Notation" Book III-Section 14.5.3 - https://openpowerfoundation.org/?resource_lib=power-isa-version-3-0 -.. [4] arch/powerpc/include/asm/hvcall.h -.. [5] "64-Bit ELF V2 ABI Specification: Power Architecture" - https://openpowerfoundation.org/?resource_lib=64-bit-elf-v2-abi-specification-power-architecture diff --git a/Documentation/powerpc/pci_iov_resource_on_powernv.rst b/Documentation/powerpc/pci_iov_resource_on_powernv.rst deleted file mode 100644 index f5a5793e16..0000000000 --- a/Documentation/powerpc/pci_iov_resource_on_powernv.rst +++ /dev/null @@ -1,312 +0,0 @@ -=================================================== -PCI Express I/O Virtualization Resource on Powerenv -=================================================== - -Wei Yang - -Benjamin Herrenschmidt - -Bjorn Helgaas - -26 Aug 2014 - -This document describes the requirement from hardware for PCI MMIO resource -sizing and assignment on PowerKVM and how generic PCI code handles this -requirement. The first two sections describe the concepts of Partitionable -Endpoints and the implementation on P8 (IODA2). The next two sections talks -about considerations on enabling SRIOV on IODA2. - -1. Introduction to Partitionable Endpoints -========================================== - -A Partitionable Endpoint (PE) is a way to group the various resources -associated with a device or a set of devices to provide isolation between -partitions (i.e., filtering of DMA, MSIs etc.) and to provide a mechanism -to freeze a device that is causing errors in order to limit the possibility -of propagation of bad data. - -There is thus, in HW, a table of PE states that contains a pair of "frozen" -state bits (one for MMIO and one for DMA, they get set together but can be -cleared independently) for each PE. - -When a PE is frozen, all stores in any direction are dropped and all loads -return all 1's value. MSIs are also blocked. There's a bit more state that -captures things like the details of the error that caused the freeze etc., but -that's not critical. - -The interesting part is how the various PCIe transactions (MMIO, DMA, ...) -are matched to their corresponding PEs. - -The following section provides a rough description of what we have on P8 -(IODA2). Keep in mind that this is all per PHB (PCI host bridge). Each PHB -is a completely separate HW entity that replicates the entire logic, so has -its own set of PEs, etc. - -2. Implementation of Partitionable Endpoints on P8 (IODA2) -========================================================== - -P8 supports up to 256 Partitionable Endpoints per PHB. - - * Inbound - - For DMA, MSIs and inbound PCIe error messages, we have a table (in - memory but accessed in HW by the chip) that provides a direct - correspondence between a PCIe RID (bus/dev/fn) with a PE number. - We call this the RTT. - - - For DMA we then provide an entire address space for each PE that can - contain two "windows", depending on the value of PCI address bit 59. - Each window can be configured to be remapped via a "TCE table" (IOMMU - translation table), which has various configurable characteristics - not described here. - - - For MSIs, we have two windows in the address space (one at the top of - the 32-bit space and one much higher) which, via a combination of the - address and MSI value, will result in one of the 2048 interrupts per - bridge being triggered. There's a PE# in the interrupt controller - descriptor table as well which is compared with the PE# obtained from - the RTT to "authorize" the device to emit that specific interrupt. - - - Error messages just use the RTT. - - * Outbound. That's where the tricky part is. - - Like other PCI host bridges, the Power8 IODA2 PHB supports "windows" - from the CPU address space to the PCI address space. There is one M32 - window and sixteen M64 windows. They have different characteristics. - First what they have in common: they forward a configurable portion of - the CPU address space to the PCIe bus and must be naturally aligned - power of two in size. The rest is different: - - - The M32 window: - - * Is limited to 4GB in size. - - * Drops the top bits of the address (above the size) and replaces - them with a configurable value. This is typically used to generate - 32-bit PCIe accesses. We configure that window at boot from FW and - don't touch it from Linux; it's usually set to forward a 2GB - portion of address space from the CPU to PCIe - 0x8000_0000..0xffff_ffff. (Note: The top 64KB are actually - reserved for MSIs but this is not a problem at this point; we just - need to ensure Linux doesn't assign anything there, the M32 logic - ignores that however and will forward in that space if we try). - - * It is divided into 256 segments of equal size. A table in the chip - maps each segment to a PE#. That allows portions of the MMIO space - to be assigned to PEs on a segment granularity. For a 2GB window, - the segment granularity is 2GB/256 = 8MB. - - Now, this is the "main" window we use in Linux today (excluding - SR-IOV). We basically use the trick of forcing the bridge MMIO windows - onto a segment alignment/granularity so that the space behind a bridge - can be assigned to a PE. - - Ideally we would like to be able to have individual functions in PEs - but that would mean using a completely different address allocation - scheme where individual function BARs can be "grouped" to fit in one or - more segments. - - - The M64 windows: - - * Must be at least 256MB in size. - - * Do not translate addresses (the address on PCIe is the same as the - address on the PowerBus). There is a way to also set the top 14 - bits which are not conveyed by PowerBus but we don't use this. - - * Can be configured to be segmented. When not segmented, we can - specify the PE# for the entire window. When segmented, a window - has 256 segments; however, there is no table for mapping a segment - to a PE#. The segment number *is* the PE#. - - * Support overlaps. If an address is covered by multiple windows, - there's a defined ordering for which window applies. - - We have code (fairly new compared to the M32 stuff) that exploits that - for large BARs in 64-bit space: - - We configure an M64 window to cover the entire region of address space - that has been assigned by FW for the PHB (about 64GB, ignore the space - for the M32, it comes out of a different "reserve"). We configure it - as segmented. - - Then we do the same thing as with M32, using the bridge alignment - trick, to match to those giant segments. - - Since we cannot remap, we have two additional constraints: - - - We do the PE# allocation *after* the 64-bit space has been assigned - because the addresses we use directly determine the PE#. We then - update the M32 PE# for the devices that use both 32-bit and 64-bit - spaces or assign the remaining PE# to 32-bit only devices. - - - We cannot "group" segments in HW, so if a device ends up using more - than one segment, we end up with more than one PE#. There is a HW - mechanism to make the freeze state cascade to "companion" PEs but - that only works for PCIe error messages (typically used so that if - you freeze a switch, it freezes all its children). So we do it in - SW. We lose a bit of effectiveness of EEH in that case, but that's - the best we found. So when any of the PEs freezes, we freeze the - other ones for that "domain". We thus introduce the concept of - "master PE" which is the one used for DMA, MSIs, etc., and "secondary - PEs" that are used for the remaining M64 segments. - - We would like to investigate using additional M64 windows in "single - PE" mode to overlay over specific BARs to work around some of that, for - example for devices with very large BARs, e.g., GPUs. It would make - sense, but we haven't done it yet. - -3. Considerations for SR-IOV on PowerKVM -======================================== - - * SR-IOV Background - - The PCIe SR-IOV feature allows a single Physical Function (PF) to - support several Virtual Functions (VFs). Registers in the PF's SR-IOV - Capability control the number of VFs and whether they are enabled. - - When VFs are enabled, they appear in Configuration Space like normal - PCI devices, but the BARs in VF config space headers are unusual. For - a non-VF device, software uses BARs in the config space header to - discover the BAR sizes and assign addresses for them. For VF devices, - software uses VF BAR registers in the *PF* SR-IOV Capability to - discover sizes and assign addresses. The BARs in the VF's config space - header are read-only zeros. - - When a VF BAR in the PF SR-IOV Capability is programmed, it sets the - base address for all the corresponding VF(n) BARs. For example, if the - PF SR-IOV Capability is programmed to enable eight VFs, and it has a - 1MB VF BAR0, the address in that VF BAR sets the base of an 8MB region. - This region is divided into eight contiguous 1MB regions, each of which - is a BAR0 for one of the VFs. Note that even though the VF BAR - describes an 8MB region, the alignment requirement is for a single VF, - i.e., 1MB in this example. - - There are several strategies for isolating VFs in PEs: - - - M32 window: There's one M32 window, and it is split into 256 - equally-sized segments. The finest granularity possible is a 256MB - window with 1MB segments. VF BARs that are 1MB or larger could be - mapped to separate PEs in this window. Each segment can be - individually mapped to a PE via the lookup table, so this is quite - flexible, but it works best when all the VF BARs are the same size. If - they are different sizes, the entire window has to be small enough that - the segment size matches the smallest VF BAR, which means larger VF - BARs span several segments. - - - Non-segmented M64 window: A non-segmented M64 window is mapped entirely - to a single PE, so it could only isolate one VF. - - - Single segmented M64 windows: A segmented M64 window could be used just - like the M32 window, but the segments can't be individually mapped to - PEs (the segment number is the PE#), so there isn't as much - flexibility. A VF with multiple BARs would have to be in a "domain" of - multiple PEs, which is not as well isolated as a single PE. - - - Multiple segmented M64 windows: As usual, each window is split into 256 - equally-sized segments, and the segment number is the PE#. But if we - use several M64 windows, they can be set to different base addresses - and different segment sizes. If we have VFs that each have a 1MB BAR - and a 32MB BAR, we could use one M64 window to assign 1MB segments and - another M64 window to assign 32MB segments. - - Finally, the plan to use M64 windows for SR-IOV, which will be described - more in the next two sections. For a given VF BAR, we need to - effectively reserve the entire 256 segments (256 * VF BAR size) and - position the VF BAR to start at the beginning of a free range of - segments/PEs inside that M64 window. - - The goal is of course to be able to give a separate PE for each VF. - - The IODA2 platform has 16 M64 windows, which are used to map MMIO - range to PE#. Each M64 window defines one MMIO range and this range is - divided into 256 segments, with each segment corresponding to one PE. - - We decide to leverage this M64 window to map VFs to individual PEs, since - SR-IOV VF BARs are all the same size. - - But doing so introduces another problem: total_VFs is usually smaller - than the number of M64 window segments, so if we map one VF BAR directly - to one M64 window, some part of the M64 window will map to another - device's MMIO range. - - IODA supports 256 PEs, so segmented windows contain 256 segments, so if - total_VFs is less than 256, we have the situation in Figure 1.0, where - segments [total_VFs, 255] of the M64 window may map to some MMIO range on - other devices:: - - 0 1 total_VFs - 1 - +------+------+- -+------+------+ - | | | ... | | | - +------+------+- -+------+------+ - - VF(n) BAR space - - 0 1 total_VFs - 1 255 - +------+------+- -+------+------+- -+------+------+ - | | | ... | | | ... | | | - +------+------+- -+------+------+- -+------+------+ - - M64 window - - Figure 1.0 Direct map VF(n) BAR space - - Our current solution is to allocate 256 segments even if the VF(n) BAR - space doesn't need that much, as shown in Figure 1.1:: - - 0 1 total_VFs - 1 255 - +------+------+- -+------+------+- -+------+------+ - | | | ... | | | ... | | | - +------+------+- -+------+------+- -+------+------+ - - VF(n) BAR space + extra - - 0 1 total_VFs - 1 255 - +------+------+- -+------+------+- -+------+------+ - | | | ... | | | ... | | | - +------+------+- -+------+------+- -+------+------+ - - M64 window - - Figure 1.1 Map VF(n) BAR space + extra - - Allocating the extra space ensures that the entire M64 window will be - assigned to this one SR-IOV device and none of the space will be - available for other devices. Note that this only expands the space - reserved in software; there are still only total_VFs VFs, and they only - respond to segments [0, total_VFs - 1]. There's nothing in hardware that - responds to segments [total_VFs, 255]. - -4. Implications for the Generic PCI Code -======================================== - -The PCIe SR-IOV spec requires that the base of the VF(n) BAR space be -aligned to the size of an individual VF BAR. - -In IODA2, the MMIO address determines the PE#. If the address is in an M32 -window, we can set the PE# by updating the table that translates segments -to PE#s. Similarly, if the address is in an unsegmented M64 window, we can -set the PE# for the window. But if it's in a segmented M64 window, the -segment number is the PE#. - -Therefore, the only way to control the PE# for a VF is to change the base -of the VF(n) BAR space in the VF BAR. If the PCI core allocates the exact -amount of space required for the VF(n) BAR space, the VF BAR value is fixed -and cannot be changed. - -On the other hand, if the PCI core allocates additional space, the VF BAR -value can be changed as long as the entire VF(n) BAR space remains inside -the space allocated by the core. - -Ideally the segment size will be the same as an individual VF BAR size. -Then each VF will be in its own PE. The VF BARs (and therefore the PE#s) -are contiguous. If VF0 is in PE(x), then VF(n) is in PE(x+n). If we -allocate 256 segments, there are (256 - numVFs) choices for the PE# of VF0. - -If the segment size is smaller than the VF BAR size, it will take several -segments to cover a VF BAR, and a VF will be in several PEs. This is -possible, but the isolation isn't as good, and it reduces the number of PE# -choices because instead of consuming only numVFs segments, the VF(n) BAR -space will consume (numVFs * n) segments. That means there aren't as many -available segments for adjusting base of the VF(n) BAR space. diff --git a/Documentation/powerpc/pmu-ebb.rst b/Documentation/powerpc/pmu-ebb.rst deleted file mode 100644 index 4f474758eb..0000000000 --- a/Documentation/powerpc/pmu-ebb.rst +++ /dev/null @@ -1,138 +0,0 @@ -======================== -PMU Event Based Branches -======================== - -Event Based Branches (EBBs) are a feature which allows the hardware to -branch directly to a specified user space address when certain events occur. - -The full specification is available in Power ISA v2.07: - - https://www.power.org/documentation/power-isa-version-2-07/ - -One type of event for which EBBs can be configured is PMU exceptions. This -document describes the API for configuring the Power PMU to generate EBBs, -using the Linux perf_events API. - - -Terminology ------------ - -Throughout this document we will refer to an "EBB event" or "EBB events". This -just refers to a struct perf_event which has set the "EBB" flag in its -attr.config. All events which can be configured on the hardware PMU are -possible "EBB events". - - -Background ----------- - -When a PMU EBB occurs it is delivered to the currently running process. As such -EBBs can only sensibly be used by programs for self-monitoring. - -It is a feature of the perf_events API that events can be created on other -processes, subject to standard permission checks. This is also true of EBB -events, however unless the target process enables EBBs (via mtspr(BESCR)) no -EBBs will ever be delivered. - -This makes it possible for a process to enable EBBs for itself, but not -actually configure any events. At a later time another process can come along -and attach an EBB event to the process, which will then cause EBBs to be -delivered to the first process. It's not clear if this is actually useful. - - -When the PMU is configured for EBBs, all PMU interrupts are delivered to the -user process. This means once an EBB event is scheduled on the PMU, no non-EBB -events can be configured. This means that EBB events can not be run -concurrently with regular 'perf' commands, or any other perf events. - -It is however safe to run 'perf' commands on a process which is using EBBs. The -kernel will in general schedule the EBB event, and perf will be notified that -its events could not run. - -The exclusion between EBB events and regular events is implemented using the -existing "pinned" and "exclusive" attributes of perf_events. This means EBB -events will be given priority over other events, unless they are also pinned. -If an EBB event and a regular event are both pinned, then whichever is enabled -first will be scheduled and the other will be put in error state. See the -section below titled "Enabling an EBB event" for more information. - - -Creating an EBB event ---------------------- - -To request that an event is counted using EBB, the event code should have bit -63 set. - -EBB events must be created with a particular, and restrictive, set of -attributes - this is so that they interoperate correctly with the rest of the -perf_events subsystem. - -An EBB event must be created with the "pinned" and "exclusive" attributes set. -Note that if you are creating a group of EBB events, only the leader can have -these attributes set. - -An EBB event must NOT set any of the "inherit", "sample_period", "freq" or -"enable_on_exec" attributes. - -An EBB event must be attached to a task. This is specified to perf_event_open() -by passing a pid value, typically 0 indicating the current task. - -All events in a group must agree on whether they want EBB. That is all events -must request EBB, or none may request EBB. - -EBB events must specify the PMC they are to be counted on. This ensures -userspace is able to reliably determine which PMC the event is scheduled on. - - -Enabling an EBB event ---------------------- - -Once an EBB event has been successfully opened, it must be enabled with the -perf_events API. This can be achieved either via the ioctl() interface, or the -prctl() interface. - -However, due to the design of the perf_events API, enabling an event does not -guarantee that it has been scheduled on the PMU. To ensure that the EBB event -has been scheduled on the PMU, you must perform a read() on the event. If the -read() returns EOF, then the event has not been scheduled and EBBs are not -enabled. - -This behaviour occurs because the EBB event is pinned and exclusive. When the -EBB event is enabled it will force all other non-pinned events off the PMU. In -this case the enable will be successful. However if there is already an event -pinned on the PMU then the enable will not be successful. - - -Reading an EBB event --------------------- - -It is possible to read() from an EBB event. However the results are -meaningless. Because interrupts are being delivered to the user process the -kernel is not able to count the event, and so will return a junk value. - - -Closing an EBB event --------------------- - -When an EBB event is finished with, you can close it using close() as for any -regular event. If this is the last EBB event the PMU will be deconfigured and -no further PMU EBBs will be delivered. - - -EBB Handler ------------ - -The EBB handler is just regular userspace code, however it must be written in -the style of an interrupt handler. When the handler is entered all registers -are live (possibly) and so must be saved somehow before the handler can invoke -other code. - -It's up to the program how to handle this. For C programs a relatively simple -option is to create an interrupt frame on the stack and save registers there. - -Fork ----- - -EBB events are not inherited across fork. If the child process wishes to use -EBBs it should open a new event for itself. Similarly the EBB state in -BESCR/EBBHR/EBBRR is cleared across fork(). diff --git a/Documentation/powerpc/ptrace.rst b/Documentation/powerpc/ptrace.rst deleted file mode 100644 index 5629edf4d5..0000000000 --- a/Documentation/powerpc/ptrace.rst +++ /dev/null @@ -1,157 +0,0 @@ -====== -Ptrace -====== - -GDB intends to support the following hardware debug features of BookE -processors: - -4 hardware breakpoints (IAC) -2 hardware watchpoints (read, write and read-write) (DAC) -2 value conditions for the hardware watchpoints (DVC) - -For that, we need to extend ptrace so that GDB can query and set these -resources. Since we're extending, we're trying to create an interface -that's extendable and that covers both BookE and server processors, so -that GDB doesn't need to special-case each of them. We added the -following 3 new ptrace requests. - -1. PPC_PTRACE_GETHWDBGINFO -============================ - -Query for GDB to discover the hardware debug features. The main info to -be returned here is the minimum alignment for the hardware watchpoints. -BookE processors don't have restrictions here, but server processors have -an 8-byte alignment restriction for hardware watchpoints. We'd like to avoid -adding special cases to GDB based on what it sees in AUXV. - -Since we're at it, we added other useful info that the kernel can return to -GDB: this query will return the number of hardware breakpoints, hardware -watchpoints and whether it supports a range of addresses and a condition. -The query will fill the following structure provided by the requesting process:: - - struct ppc_debug_info { - unit32_t version; - unit32_t num_instruction_bps; - unit32_t num_data_bps; - unit32_t num_condition_regs; - unit32_t data_bp_alignment; - unit32_t sizeof_condition; /* size of the DVC register */ - uint64_t features; /* bitmask of the individual flags */ - }; - -features will have bits indicating whether there is support for:: - - #define PPC_DEBUG_FEATURE_INSN_BP_RANGE 0x1 - #define PPC_DEBUG_FEATURE_INSN_BP_MASK 0x2 - #define PPC_DEBUG_FEATURE_DATA_BP_RANGE 0x4 - #define PPC_DEBUG_FEATURE_DATA_BP_MASK 0x8 - #define PPC_DEBUG_FEATURE_DATA_BP_DAWR 0x10 - #define PPC_DEBUG_FEATURE_DATA_BP_ARCH_31 0x20 - -2. PPC_PTRACE_SETHWDEBUG - -Sets a hardware breakpoint or watchpoint, according to the provided structure:: - - struct ppc_hw_breakpoint { - uint32_t version; - #define PPC_BREAKPOINT_TRIGGER_EXECUTE 0x1 - #define PPC_BREAKPOINT_TRIGGER_READ 0x2 - #define PPC_BREAKPOINT_TRIGGER_WRITE 0x4 - uint32_t trigger_type; /* only some combinations allowed */ - #define PPC_BREAKPOINT_MODE_EXACT 0x0 - #define PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE 0x1 - #define PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE 0x2 - #define PPC_BREAKPOINT_MODE_MASK 0x3 - uint32_t addr_mode; /* address match mode */ - - #define PPC_BREAKPOINT_CONDITION_MODE 0x3 - #define PPC_BREAKPOINT_CONDITION_NONE 0x0 - #define PPC_BREAKPOINT_CONDITION_AND 0x1 - #define PPC_BREAKPOINT_CONDITION_EXACT 0x1 /* different name for the same thing as above */ - #define PPC_BREAKPOINT_CONDITION_OR 0x2 - #define PPC_BREAKPOINT_CONDITION_AND_OR 0x3 - #define PPC_BREAKPOINT_CONDITION_BE_ALL 0x00ff0000 /* byte enable bits */ - #define PPC_BREAKPOINT_CONDITION_BE(n) (1<<((n)+16)) - uint32_t condition_mode; /* break/watchpoint condition flags */ - - uint64_t addr; - uint64_t addr2; - uint64_t condition_value; - }; - -A request specifies one event, not necessarily just one register to be set. -For instance, if the request is for a watchpoint with a condition, both the -DAC and DVC registers will be set in the same request. - -With this GDB can ask for all kinds of hardware breakpoints and watchpoints -that the BookE supports. COMEFROM breakpoints available in server processors -are not contemplated, but that is out of the scope of this work. - -ptrace will return an integer (handle) uniquely identifying the breakpoint or -watchpoint just created. This integer will be used in the PPC_PTRACE_DELHWDEBUG -request to ask for its removal. Return -ENOSPC if the requested breakpoint -can't be allocated on the registers. - -Some examples of using the structure to: - -- set a breakpoint in the first breakpoint register:: - - p.version = PPC_DEBUG_CURRENT_VERSION; - p.trigger_type = PPC_BREAKPOINT_TRIGGER_EXECUTE; - p.addr_mode = PPC_BREAKPOINT_MODE_EXACT; - p.condition_mode = PPC_BREAKPOINT_CONDITION_NONE; - p.addr = (uint64_t) address; - p.addr2 = 0; - p.condition_value = 0; - -- set a watchpoint which triggers on reads in the second watchpoint register:: - - p.version = PPC_DEBUG_CURRENT_VERSION; - p.trigger_type = PPC_BREAKPOINT_TRIGGER_READ; - p.addr_mode = PPC_BREAKPOINT_MODE_EXACT; - p.condition_mode = PPC_BREAKPOINT_CONDITION_NONE; - p.addr = (uint64_t) address; - p.addr2 = 0; - p.condition_value = 0; - -- set a watchpoint which triggers only with a specific value:: - - p.version = PPC_DEBUG_CURRENT_VERSION; - p.trigger_type = PPC_BREAKPOINT_TRIGGER_READ; - p.addr_mode = PPC_BREAKPOINT_MODE_EXACT; - p.condition_mode = PPC_BREAKPOINT_CONDITION_AND | PPC_BREAKPOINT_CONDITION_BE_ALL; - p.addr = (uint64_t) address; - p.addr2 = 0; - p.condition_value = (uint64_t) condition; - -- set a ranged hardware breakpoint:: - - p.version = PPC_DEBUG_CURRENT_VERSION; - p.trigger_type = PPC_BREAKPOINT_TRIGGER_EXECUTE; - p.addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE; - p.condition_mode = PPC_BREAKPOINT_CONDITION_NONE; - p.addr = (uint64_t) begin_range; - p.addr2 = (uint64_t) end_range; - p.condition_value = 0; - -- set a watchpoint in server processors (BookS):: - - p.version = 1; - p.trigger_type = PPC_BREAKPOINT_TRIGGER_RW; - p.addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE; - or - p.addr_mode = PPC_BREAKPOINT_MODE_EXACT; - - p.condition_mode = PPC_BREAKPOINT_CONDITION_NONE; - p.addr = (uint64_t) begin_range; - /* For PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE addr2 needs to be specified, where - * addr2 - addr <= 8 Bytes. - */ - p.addr2 = (uint64_t) end_range; - p.condition_value = 0; - -3. PPC_PTRACE_DELHWDEBUG - -Takes an integer which identifies an existing breakpoint or watchpoint -(i.e., the value returned from PTRACE_SETHWDEBUG), and deletes the -corresponding breakpoint or watchpoint.. diff --git a/Documentation/powerpc/qe_firmware.rst b/Documentation/powerpc/qe_firmware.rst deleted file mode 100644 index a358f152b7..0000000000 --- a/Documentation/powerpc/qe_firmware.rst +++ /dev/null @@ -1,296 +0,0 @@ -========================================= -Freescale QUICC Engine Firmware Uploading -========================================= - -(c) 2007 Timur Tabi , - Freescale Semiconductor - -.. Table of Contents - - I - Software License for Firmware - - II - Microcode Availability - - III - Description and Terminology - - IV - Microcode Programming Details - - V - Firmware Structure Layout - - VI - Sample Code for Creating Firmware Files - -Revision Information -==================== - -November 30, 2007: Rev 1.0 - Initial version - -I - Software License for Firmware -================================= - -Each firmware file comes with its own software license. For information on -the particular license, please see the license text that is distributed with -the firmware. - -II - Microcode Availability -=========================== - -Firmware files are distributed through various channels. Some are available on -http://opensource.freescale.com. For other firmware files, please contact -your Freescale representative or your operating system vendor. - -III - Description and Terminology -================================= - -In this document, the term 'microcode' refers to the sequence of 32-bit -integers that compose the actual QE microcode. - -The term 'firmware' refers to a binary blob that contains the microcode as -well as other data that - - 1) describes the microcode's purpose - 2) describes how and where to upload the microcode - 3) specifies the values of various registers - 4) includes additional data for use by specific device drivers - -Firmware files are binary files that contain only a firmware. - -IV - Microcode Programming Details -=================================== - -The QE architecture allows for only one microcode present in I-RAM for each -RISC processor. To replace any current microcode, a full QE reset (which -disables the microcode) must be performed first. - -QE microcode is uploaded using the following procedure: - -1) The microcode is placed into I-RAM at a specific location, using the - IRAM.IADD and IRAM.IDATA registers. - -2) The CERCR.CIR bit is set to 0 or 1, depending on whether the firmware - needs split I-RAM. Split I-RAM is only meaningful for SOCs that have - QEs with multiple RISC processors, such as the 8360. Splitting the I-RAM - allows each processor to run a different microcode, effectively creating an - asymmetric multiprocessing (AMP) system. - -3) The TIBCR trap registers are loaded with the addresses of the trap handlers - in the microcode. - -4) The RSP.ECCR register is programmed with the value provided. - -5) If necessary, device drivers that need the virtual traps and extended mode - data will use them. - -Virtual Microcode Traps - -These virtual traps are conditional branches in the microcode. These are -"soft" provisional introduced in the ROMcode in order to enable higher -flexibility and save h/w traps If new features are activated or an issue is -being fixed in the RAM package utilizing they should be activated. This data -structure signals the microcode which of these virtual traps is active. - -This structure contains 6 words that the application should copy to some -specific been defined. This table describes the structure:: - - --------------------------------------------------------------- - | Offset in | | Destination Offset | Size of | - | array | Protocol | within PRAM | Operand | - --------------------------------------------------------------| - | 0 | Ethernet | 0xF8 | 4 bytes | - | | interworking | | | - --------------------------------------------------------------- - | 4 | ATM | 0xF8 | 4 bytes | - | | interworking | | | - --------------------------------------------------------------- - | 8 | PPP | 0xF8 | 4 bytes | - | | interworking | | | - --------------------------------------------------------------- - | 12 | Ethernet RX | 0x22 | 1 byte | - | | Distributor Page | | | - --------------------------------------------------------------- - | 16 | ATM Globtal | 0x28 | 1 byte | - | | Params Table | | | - --------------------------------------------------------------- - | 20 | Insert Frame | 0xF8 | 4 bytes | - --------------------------------------------------------------- - - -Extended Modes - -This is a double word bit array (64 bits) that defines special functionality -which has an impact on the software drivers. Each bit has its own impact -and has special instructions for the s/w associated with it. This structure is -described in this table:: - - ----------------------------------------------------------------------- - | Bit # | Name | Description | - ----------------------------------------------------------------------- - | 0 | General | Indicates that prior to each host command | - | | push command | given by the application, the software must | - | | | assert a special host command (push command)| - | | | CECDR = 0x00800000. | - | | | CECR = 0x01c1000f. | - ----------------------------------------------------------------------- - | 1 | UCC ATM | Indicates that after issuing ATM RX INIT | - | | RX INIT | command, the host must issue another special| - | | push command | command (push command) and immediately | - | | | following that re-issue the ATM RX INIT | - | | | command. (This makes the sequence of | - | | | initializing the ATM receiver a sequence of | - | | | three host commands) | - | | | CECDR = 0x00800000. | - | | | CECR = 0x01c1000f. | - ----------------------------------------------------------------------- - | 2 | Add/remove | Indicates that following the specific host | - | | command | command: "Add/Remove entry in Hash Lookup | - | | validation | Table" used in Interworking setup, the user | - | | | must issue another command. | - | | | CECDR = 0xce000003. | - | | | CECR = 0x01c10f58. | - ----------------------------------------------------------------------- - | 3 | General push | Indicates that the s/w has to initialize | - | | command | some pointers in the Ethernet thread pages | - | | | which are used when Header Compression is | - | | | activated. The full details of these | - | | | pointers is located in the software drivers.| - ----------------------------------------------------------------------- - | 4 | General push | Indicates that after issuing Ethernet TX | - | | command | INIT command, user must issue this command | - | | | for each SNUM of Ethernet TX thread. | - | | | CECDR = 0x00800003. | - | | | CECR = 0x7'b{0}, 8'b{Enet TX thread SNUM}, | - | | | 1'b{1}, 12'b{0}, 4'b{1} | - ----------------------------------------------------------------------- - | 5 - 31 | N/A | Reserved, set to zero. | - ----------------------------------------------------------------------- - -V - Firmware Structure Layout -============================== - -QE microcode from Freescale is typically provided as a header file. This -header file contains macros that define the microcode binary itself as well as -some other data used in uploading that microcode. The format of these files -do not lend themselves to simple inclusion into other code. Hence, -the need for a more portable format. This section defines that format. - -Instead of distributing a header file, the microcode and related data are -embedded into a binary blob. This blob is passed to the qe_upload_firmware() -function, which parses the blob and performs everything necessary to upload -the microcode. - -All integers are big-endian. See the comments for function -qe_upload_firmware() for up-to-date implementation information. - -This structure supports versioning, where the version of the structure is -embedded into the structure itself. To ensure forward and backwards -compatibility, all versions of the structure must use the same 'qe_header' -structure at the beginning. - -'header' (type: struct qe_header): - The 'length' field is the size, in bytes, of the entire structure, - including all the microcode embedded in it, as well as the CRC (if - present). - - The 'magic' field is an array of three bytes that contains the letters - 'Q', 'E', and 'F'. This is an identifier that indicates that this - structure is a QE Firmware structure. - - The 'version' field is a single byte that indicates the version of this - structure. If the layout of the structure should ever need to be - changed to add support for additional types of microcode, then the - version number should also be changed. - -The 'id' field is a null-terminated string(suitable for printing) that -identifies the firmware. - -The 'count' field indicates the number of 'microcode' structures. There -must be one and only one 'microcode' structure for each RISC processor. -Therefore, this field also represents the number of RISC processors for this -SOC. - -The 'soc' structure contains the SOC numbers and revisions used to match -the microcode to the SOC itself. Normally, the microcode loader should -check the data in this structure with the SOC number and revisions, and -only upload the microcode if there's a match. However, this check is not -made on all platforms. - -Although it is not recommended, you can specify '0' in the soc.model -field to skip matching SOCs altogether. - -The 'model' field is a 16-bit number that matches the actual SOC. The -'major' and 'minor' fields are the major and minor revision numbers, -respectively, of the SOC. - -For example, to match the 8323, revision 1.0:: - - soc.model = 8323 - soc.major = 1 - soc.minor = 0 - -'padding' is necessary for structure alignment. This field ensures that the -'extended_modes' field is aligned on a 64-bit boundary. - -'extended_modes' is a bitfield that defines special functionality which has an -impact on the device drivers. Each bit has its own impact and has special -instructions for the driver associated with it. This field is stored in -the QE library and available to any driver that calls qe_get_firmware_info(). - -'vtraps' is an array of 8 words that contain virtual trap values for each -virtual traps. As with 'extended_modes', this field is stored in the QE -library and available to any driver that calls qe_get_firmware_info(). - -'microcode' (type: struct qe_microcode): - For each RISC processor there is one 'microcode' structure. The first - 'microcode' structure is for the first RISC, and so on. - - The 'id' field is a null-terminated string suitable for printing that - identifies this particular microcode. - - 'traps' is an array of 16 words that contain hardware trap values - for each of the 16 traps. If trap[i] is 0, then this particular - trap is to be ignored (i.e. not written to TIBCR[i]). The entire value - is written as-is to the TIBCR[i] register, so be sure to set the EN - and T_IBP bits if necessary. - - 'eccr' is the value to program into the ECCR register. - - 'iram_offset' is the offset into IRAM to start writing the - microcode. - - 'count' is the number of 32-bit words in the microcode. - - 'code_offset' is the offset, in bytes, from the beginning of this - structure where the microcode itself can be found. The first - microcode binary should be located immediately after the 'microcode' - array. - - 'major', 'minor', and 'revision' are the major, minor, and revision - version numbers, respectively, of the microcode. If all values are 0, - then these fields are ignored. - - 'reserved' is necessary for structure alignment. Since 'microcode' - is an array, the 64-bit 'extended_modes' field needs to be aligned - on a 64-bit boundary, and this can only happen if the size of - 'microcode' is a multiple of 8 bytes. To ensure that, we add - 'reserved'. - -After the last microcode is a 32-bit CRC. It can be calculated using -this algorithm:: - - u32 crc32(const u8 *p, unsigned int len) - { - unsigned int i; - u32 crc = 0; - - while (len--) { - crc ^= *p++; - for (i = 0; i < 8; i++) - crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0); - } - return crc; - } - -VI - Sample Code for Creating Firmware Files -============================================ - -A Python program that creates firmware binaries from the header files normally -distributed by Freescale can be found on http://opensource.freescale.com. diff --git a/Documentation/powerpc/syscall64-abi.rst b/Documentation/powerpc/syscall64-abi.rst deleted file mode 100644 index 56490c4c0c..0000000000 --- a/Documentation/powerpc/syscall64-abi.rst +++ /dev/null @@ -1,153 +0,0 @@ -=============================================== -Power Architecture 64-bit Linux system call ABI -=============================================== - -syscall -======= - -Invocation ----------- -The syscall is made with the sc instruction, and returns with execution -continuing at the instruction following the sc instruction. - -If PPC_FEATURE2_SCV appears in the AT_HWCAP2 ELF auxiliary vector, the -scv 0 instruction is an alternative that may provide better performance, -with some differences to calling sequence. - -syscall calling sequence\ [1]_ matches the Power Architecture 64-bit ELF ABI -specification C function calling sequence, including register preservation -rules, with the following differences. - -.. [1] Some syscalls (typically low-level management functions) may have - different calling sequences (e.g., rt_sigreturn). - -Parameters ----------- -The system call number is specified in r0. - -There is a maximum of 6 integer parameters to a syscall, passed in r3-r8. - -Return value ------------- -- For the sc instruction, both a value and an error condition are returned. - cr0.SO is the error condition, and r3 is the return value. When cr0.SO is - clear, the syscall succeeded and r3 is the return value. When cr0.SO is set, - the syscall failed and r3 is the error value (that normally corresponds to - errno). - -- For the scv 0 instruction, the return value indicates failure if it is - -4095..-1 (i.e., it is >= -MAX_ERRNO (-4095) as an unsigned comparison), - in which case the error value is the negated return value. - -Stack ------ -System calls do not modify the caller's stack frame. For example, the caller's -stack frame LR and CR save fields are not used. - -Register preservation rules ---------------------------- -Register preservation rules match the ELF ABI calling sequence with some -differences. - -For the sc instruction, the differences from the ELF ABI are as follows: - -+--------------+--------------------+-----------------------------------------+ -| Register | Preservation Rules | Purpose | -+==============+====================+=========================================+ -| r0 | Volatile | (System call number.) | -+--------------+--------------------+-----------------------------------------+ -| r3 | Volatile | (Parameter 1, and return value.) | -+--------------+--------------------+-----------------------------------------+ -| r4-r8 | Volatile | (Parameters 2-6.) | -+--------------+--------------------+-----------------------------------------+ -| cr0 | Volatile | (cr0.SO is the return error condition.) | -+--------------+--------------------+-----------------------------------------+ -| cr1, cr5-7 | Nonvolatile | | -+--------------+--------------------+-----------------------------------------+ -| lr | Nonvolatile | | -+--------------+--------------------+-----------------------------------------+ - -For the scv 0 instruction, the differences from the ELF ABI are as follows: - -+--------------+--------------------+-----------------------------------------+ -| Register | Preservation Rules | Purpose | -+==============+====================+=========================================+ -| r0 | Volatile | (System call number.) | -+--------------+--------------------+-----------------------------------------+ -| r3 | Volatile | (Parameter 1, and return value.) | -+--------------+--------------------+-----------------------------------------+ -| r4-r8 | Volatile | (Parameters 2-6.) | -+--------------+--------------------+-----------------------------------------+ - -All floating point and vector data registers as well as control and status -registers are nonvolatile. - -Transactional Memory --------------------- -Syscall behavior can change if the processor is in transactional or suspended -transaction state, and the syscall can affect the behavior of the transaction. - -If the processor is in suspended state when a syscall is made, the syscall -will be performed as normal, and will return as normal. The syscall will be -performed in suspended state, so its side effects will be persistent according -to the usual transactional memory semantics. A syscall may or may not result -in the transaction being doomed by hardware. - -If the processor is in transactional state when a syscall is made, then the -behavior depends on the presence of PPC_FEATURE2_HTM_NOSC in the AT_HWCAP2 ELF -auxiliary vector. - -- If present, which is the case for newer kernels, then the syscall will not - be performed and the transaction will be doomed by the kernel with the - failure code TM_CAUSE_SYSCALL | TM_CAUSE_PERSISTENT in the TEXASR SPR. - -- If not present (older kernels), then the kernel will suspend the - transactional state and the syscall will proceed as in the case of a - suspended state syscall, and will resume the transactional state before - returning to the caller. This case is not well defined or supported, so this - behavior should not be relied upon. - -scv 0 syscalls will always behave as PPC_FEATURE2_HTM_NOSC. - -ptrace ------- -When ptracing system calls (PTRACE_SYSCALL), the pt_regs.trap value contains -the system call type that can be used to distinguish between sc and scv 0 -system calls, and the different register conventions can be accounted for. - -If the value of (pt_regs.trap & 0xfff0) is 0xc00 then the system call was -performed with the sc instruction, if it is 0x3000 then the system call was -performed with the scv 0 instruction. - -vsyscall -======== - -vsyscall calling sequence matches the syscall calling sequence, with the -following differences. Some vsyscalls may have different calling sequences. - -Parameters and return value ---------------------------- -r0 is not used as an input. The vsyscall is selected by its address. - -Stack ------ -The vsyscall may or may not use the caller's stack frame save areas. - -Register preservation rules ---------------------------- - -=========== ======== -r0 Volatile -cr1, cr5-7 Volatile -lr Volatile -=========== ======== - -Invocation ----------- -The vsyscall is performed with a branch-with-link instruction to the vsyscall -function address. - -Transactional Memory --------------------- -vsyscalls will run in the same transactional state as the caller. A vsyscall -may or may not result in the transaction being doomed by hardware. diff --git a/Documentation/powerpc/transactional_memory.rst b/Documentation/powerpc/transactional_memory.rst deleted file mode 100644 index 040a20675f..0000000000 --- a/Documentation/powerpc/transactional_memory.rst +++ /dev/null @@ -1,274 +0,0 @@ -============================ -Transactional Memory support -============================ - -POWER kernel support for this feature is currently limited to supporting -its use by user programs. It is not currently used by the kernel itself. - -This file aims to sum up how it is supported by Linux and what behaviour you -can expect from your user programs. - - -Basic overview -============== - -Hardware Transactional Memory is supported on POWER8 processors, and is a -feature that enables a different form of atomic memory access. Several new -instructions are presented to delimit transactions; transactions are -guaranteed to either complete atomically or roll back and undo any partial -changes. - -A simple transaction looks like this:: - - begin_move_money: - tbegin - beq abort_handler - - ld r4, SAVINGS_ACCT(r3) - ld r5, CURRENT_ACCT(r3) - subi r5, r5, 1 - addi r4, r4, 1 - std r4, SAVINGS_ACCT(r3) - std r5, CURRENT_ACCT(r3) - - tend - - b continue - - abort_handler: - ... test for odd failures ... - - /* Retry the transaction if it failed because it conflicted with - * someone else: */ - b begin_move_money - - -The 'tbegin' instruction denotes the start point, and 'tend' the end point. -Between these points the processor is in 'Transactional' state; any memory -references will complete in one go if there are no conflicts with other -transactional or non-transactional accesses within the system. In this -example, the transaction completes as though it were normal straight-line code -IF no other processor has touched SAVINGS_ACCT(r3) or CURRENT_ACCT(r3); an -atomic move of money from the current account to the savings account has been -performed. Even though the normal ld/std instructions are used (note no -lwarx/stwcx), either *both* SAVINGS_ACCT(r3) and CURRENT_ACCT(r3) will be -updated, or neither will be updated. - -If, in the meantime, there is a conflict with the locations accessed by the -transaction, the transaction will be aborted by the CPU. Register and memory -state will roll back to that at the 'tbegin', and control will continue from -'tbegin+4'. The branch to abort_handler will be taken this second time; the -abort handler can check the cause of the failure, and retry. - -Checkpointed registers include all GPRs, FPRs, VRs/VSRs, LR, CCR/CR, CTR, FPCSR -and a few other status/flag regs; see the ISA for details. - -Causes of transaction aborts -============================ - -- Conflicts with cache lines used by other processors -- Signals -- Context switches -- See the ISA for full documentation of everything that will abort transactions. - - -Syscalls -======== - -Syscalls made from within an active transaction will not be performed and the -transaction will be doomed by the kernel with the failure code TM_CAUSE_SYSCALL -| TM_CAUSE_PERSISTENT. - -Syscalls made from within a suspended transaction are performed as normal and -the transaction is not explicitly doomed by the kernel. However, what the -kernel does to perform the syscall may result in the transaction being doomed -by the hardware. The syscall is performed in suspended mode so any side -effects will be persistent, independent of transaction success or failure. No -guarantees are provided by the kernel about which syscalls will affect -transaction success. - -Care must be taken when relying on syscalls to abort during active transactions -if the calls are made via a library. Libraries may cache values (which may -give the appearance of success) or perform operations that cause transaction -failure before entering the kernel (which may produce different failure codes). -Examples are glibc's getpid() and lazy symbol resolution. - - -Signals -======= - -Delivery of signals (both sync and async) during transactions provides a second -thread state (ucontext/mcontext) to represent the second transactional register -state. Signal delivery 'treclaim's to capture both register states, so signals -abort transactions. The usual ucontext_t passed to the signal handler -represents the checkpointed/original register state; the signal appears to have -arisen at 'tbegin+4'. - -If the sighandler ucontext has uc_link set, a second ucontext has been -delivered. For future compatibility the MSR.TS field should be checked to -determine the transactional state -- if so, the second ucontext in uc->uc_link -represents the active transactional registers at the point of the signal. - -For 64-bit processes, uc->uc_mcontext.regs->msr is a full 64-bit MSR and its TS -field shows the transactional mode. - -For 32-bit processes, the mcontext's MSR register is only 32 bits; the top 32 -bits are stored in the MSR of the second ucontext, i.e. in -uc->uc_link->uc_mcontext.regs->msr. The top word contains the transactional -state TS. - -However, basic signal handlers don't need to be aware of transactions -and simply returning from the handler will deal with things correctly: - -Transaction-aware signal handlers can read the transactional register state -from the second ucontext. This will be necessary for crash handlers to -determine, for example, the address of the instruction causing the SIGSEGV. - -Example signal handler:: - - void crash_handler(int sig, siginfo_t *si, void *uc) - { - ucontext_t *ucp = uc; - ucontext_t *transactional_ucp = ucp->uc_link; - - if (ucp_link) { - u64 msr = ucp->uc_mcontext.regs->msr; - /* May have transactional ucontext! */ - #ifndef __powerpc64__ - msr |= ((u64)transactional_ucp->uc_mcontext.regs->msr) << 32; - #endif - if (MSR_TM_ACTIVE(msr)) { - /* Yes, we crashed during a transaction. Oops. */ - fprintf(stderr, "Transaction to be restarted at 0x%llx, but " - "crashy instruction was at 0x%llx\n", - ucp->uc_mcontext.regs->nip, - transactional_ucp->uc_mcontext.regs->nip); - } - } - - fix_the_problem(ucp->dar); - } - -When in an active transaction that takes a signal, we need to be careful with -the stack. It's possible that the stack has moved back up after the tbegin. -The obvious case here is when the tbegin is called inside a function that -returns before a tend. In this case, the stack is part of the checkpointed -transactional memory state. If we write over this non transactionally or in -suspend, we are in trouble because if we get a tm abort, the program counter and -stack pointer will be back at the tbegin but our in memory stack won't be valid -anymore. - -To avoid this, when taking a signal in an active transaction, we need to use -the stack pointer from the checkpointed state, rather than the speculated -state. This ensures that the signal context (written tm suspended) will be -written below the stack required for the rollback. The transaction is aborted -because of the treclaim, so any memory written between the tbegin and the -signal will be rolled back anyway. - -For signals taken in non-TM or suspended mode, we use the -normal/non-checkpointed stack pointer. - -Any transaction initiated inside a sighandler and suspended on return -from the sighandler to the kernel will get reclaimed and discarded. - -Failure cause codes used by kernel -================================== - -These are defined in , and distinguish different reasons why the -kernel aborted a transaction: - - ====================== ================================ - TM_CAUSE_RESCHED Thread was rescheduled. - TM_CAUSE_TLBI Software TLB invalid. - TM_CAUSE_FAC_UNAV FP/VEC/VSX unavailable trap. - TM_CAUSE_SYSCALL Syscall from active transaction. - TM_CAUSE_SIGNAL Signal delivered. - TM_CAUSE_MISC Currently unused. - TM_CAUSE_ALIGNMENT Alignment fault. - TM_CAUSE_EMULATE Emulation that touched memory. - ====================== ================================ - -These can be checked by the user program's abort handler as TEXASR[0:7]. If -bit 7 is set, it indicates that the error is considered persistent. For example -a TM_CAUSE_ALIGNMENT will be persistent while a TM_CAUSE_RESCHED will not. - -GDB -=== - -GDB and ptrace are not currently TM-aware. If one stops during a transaction, -it looks like the transaction has just started (the checkpointed state is -presented). The transaction cannot then be continued and will take the failure -handler route. Furthermore, the transactional 2nd register state will be -inaccessible. GDB can currently be used on programs using TM, but not sensibly -in parts within transactions. - -POWER9 -====== - -TM on POWER9 has issues with storing the complete register state. This -is described in this commit:: - - commit 4bb3c7a0208fc13ca70598efd109901a7cd45ae7 - Author: Paul Mackerras - Date: Wed Mar 21 21:32:01 2018 +1100 - KVM: PPC: Book3S HV: Work around transactional memory bugs in POWER9 - -To account for this different POWER9 chips have TM enabled in -different ways. - -On POWER9N DD2.01 and below, TM is disabled. ie -HWCAP2[PPC_FEATURE2_HTM] is not set. - -On POWER9N DD2.1 TM is configured by firmware to always abort a -transaction when tm suspend occurs. So tsuspend will cause a -transaction to be aborted and rolled back. Kernel exceptions will also -cause the transaction to be aborted and rolled back and the exception -will not occur. If userspace constructs a sigcontext that enables TM -suspend, the sigcontext will be rejected by the kernel. This mode is -advertised to users with HWCAP2[PPC_FEATURE2_HTM_NO_SUSPEND] set. -HWCAP2[PPC_FEATURE2_HTM] is not set in this mode. - -On POWER9N DD2.2 and above, KVM and POWERVM emulate TM for guests (as -described in commit 4bb3c7a0208f), hence TM is enabled for guests -ie. HWCAP2[PPC_FEATURE2_HTM] is set for guest userspace. Guests that -makes heavy use of TM suspend (tsuspend or kernel suspend) will result -in traps into the hypervisor and hence will suffer a performance -degradation. Host userspace has TM disabled -ie. HWCAP2[PPC_FEATURE2_HTM] is not set. (although we make enable it -at some point in the future if we bring the emulation into host -userspace context switching). - -POWER9C DD1.2 and above are only available with POWERVM and hence -Linux only runs as a guest. On these systems TM is emulated like on -POWER9N DD2.2. - -Guest migration from POWER8 to POWER9 will work with POWER9N DD2.2 and -POWER9C DD1.2. Since earlier POWER9 processors don't support TM -emulation, migration from POWER8 to POWER9 is not supported there. - -Kernel implementation -===================== - -h/rfid mtmsrd quirk -------------------- - -As defined in the ISA, rfid has a quirk which is useful in early -exception handling. When in a userspace transaction and we enter the -kernel via some exception, MSR will end up as TM=0 and TS=01 (ie. TM -off but TM suspended). Regularly the kernel will want change bits in -the MSR and will perform an rfid to do this. In this case rfid can -have SRR0 TM = 0 and TS = 00 (ie. TM off and non transaction) and the -resulting MSR will retain TM = 0 and TS=01 from before (ie. stay in -suspend). This is a quirk in the architecture as this would normally -be a transition from TS=01 to TS=00 (ie. suspend -> non transactional) -which is an illegal transition. - -This quirk is described the architecture in the definition of rfid -with these lines: - - if (MSR 29:31 ¬ = 0b010 | SRR1 29:31 ¬ = 0b000) then - MSR 29:31 <- SRR1 29:31 - -hrfid and mtmsrd have the same quirk. - -The Linux kernel uses this quirk in its early exception handling. diff --git a/Documentation/powerpc/ultravisor.rst b/Documentation/powerpc/ultravisor.rst deleted file mode 100644 index ba6b1bf1cc..0000000000 --- a/Documentation/powerpc/ultravisor.rst +++ /dev/null @@ -1,1117 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 -.. _ultravisor: - -============================ -Protected Execution Facility -============================ - -.. contents:: - :depth: 3 - -Introduction -############ - - Protected Execution Facility (PEF) is an architectural change for - POWER 9 that enables Secure Virtual Machines (SVMs). DD2.3 chips - (PVR=0x004e1203) or greater will be PEF-capable. A new ISA release - will include the PEF RFC02487 changes. - - When enabled, PEF adds a new higher privileged mode, called Ultravisor - mode, to POWER architecture. Along with the new mode there is new - firmware called the Protected Execution Ultravisor (or Ultravisor - for short). Ultravisor mode is the highest privileged mode in POWER - architecture. - - +------------------+ - | Privilege States | - +==================+ - | Problem | - +------------------+ - | Supervisor | - +------------------+ - | Hypervisor | - +------------------+ - | Ultravisor | - +------------------+ - - PEF protects SVMs from the hypervisor, privileged users, and other - VMs in the system. SVMs are protected while at rest and can only be - executed by an authorized machine. All virtual machines utilize - hypervisor services. The Ultravisor filters calls between the SVMs - and the hypervisor to assure that information does not accidentally - leak. All hypercalls except H_RANDOM are reflected to the hypervisor. - H_RANDOM is not reflected to prevent the hypervisor from influencing - random values in the SVM. - - To support this there is a refactoring of the ownership of resources - in the CPU. Some of the resources which were previously hypervisor - privileged are now ultravisor privileged. - -Hardware -======== - - The hardware changes include the following: - - * There is a new bit in the MSR that determines whether the current - process is running in secure mode, MSR(S) bit 41. MSR(S)=1, process - is in secure mode, MSR(s)=0 process is in normal mode. - - * The MSR(S) bit can only be set by the Ultravisor. - - * HRFID cannot be used to set the MSR(S) bit. If the hypervisor needs - to return to a SVM it must use an ultracall. It can determine if - the VM it is returning to is secure. - - * There is a new Ultravisor privileged register, SMFCTRL, which has an - enable/disable bit SMFCTRL(E). - - * The privilege of a process is now determined by three MSR bits, - MSR(S, HV, PR). In each of the tables below the modes are listed - from least privilege to highest privilege. The higher privilege - modes can access all the resources of the lower privilege modes. - - **Secure Mode MSR Settings** - - +---+---+---+---------------+ - | S | HV| PR|Privilege | - +===+===+===+===============+ - | 1 | 0 | 1 | Problem | - +---+---+---+---------------+ - | 1 | 0 | 0 | Privileged(OS)| - +---+---+---+---------------+ - | 1 | 1 | 0 | Ultravisor | - +---+---+---+---------------+ - | 1 | 1 | 1 | Reserved | - +---+---+---+---------------+ - - **Normal Mode MSR Settings** - - +---+---+---+---------------+ - | S | HV| PR|Privilege | - +===+===+===+===============+ - | 0 | 0 | 1 | Problem | - +---+---+---+---------------+ - | 0 | 0 | 0 | Privileged(OS)| - +---+---+---+---------------+ - | 0 | 1 | 0 | Hypervisor | - +---+---+---+---------------+ - | 0 | 1 | 1 | Problem (Host)| - +---+---+---+---------------+ - - * Memory is partitioned into secure and normal memory. Only processes - that are running in secure mode can access secure memory. - - * The hardware does not allow anything that is not running secure to - access secure memory. This means that the Hypervisor cannot access - the memory of the SVM without using an ultracall (asking the - Ultravisor). The Ultravisor will only allow the hypervisor to see - the SVM memory encrypted. - - * I/O systems are not allowed to directly address secure memory. This - limits the SVMs to virtual I/O only. - - * The architecture allows the SVM to share pages of memory with the - hypervisor that are not protected with encryption. However, this - sharing must be initiated by the SVM. - - * When a process is running in secure mode all hypercalls - (syscall lev=1) go to the Ultravisor. - - * When a process is in secure mode all interrupts go to the - Ultravisor. - - * The following resources have become Ultravisor privileged and - require an Ultravisor interface to manipulate: - - * Processor configurations registers (SCOMs). - - * Stop state information. - - * The debug registers CIABR, DAWR, and DAWRX when SMFCTRL(D) is set. - If SMFCTRL(D) is not set they do not work in secure mode. When set, - reading and writing requires an Ultravisor call, otherwise that - will cause a Hypervisor Emulation Assistance interrupt. - - * PTCR and partition table entries (partition table is in secure - memory). An attempt to write to PTCR will cause a Hypervisor - Emulation Assitance interrupt. - - * LDBAR (LD Base Address Register) and IMC (In-Memory Collection) - non-architected registers. An attempt to write to them will cause a - Hypervisor Emulation Assistance interrupt. - - * Paging for an SVM, sharing of memory with Hypervisor for an SVM. - (Including Virtual Processor Area (VPA) and virtual I/O). - - -Software/Microcode -================== - - The software changes include: - - * SVMs are created from normal VM using (open source) tooling supplied - by IBM. - - * All SVMs start as normal VMs and utilize an ultracall, UV_ESM - (Enter Secure Mode), to make the transition. - - * When the UV_ESM ultracall is made the Ultravisor copies the VM into - secure memory, decrypts the verification information, and checks the - integrity of the SVM. If the integrity check passes the Ultravisor - passes control in secure mode. - - * The verification information includes the pass phrase for the - encrypted disk associated with the SVM. This pass phrase is given - to the SVM when requested. - - * The Ultravisor is not involved in protecting the encrypted disk of - the SVM while at rest. - - * For external interrupts the Ultravisor saves the state of the SVM, - and reflects the interrupt to the hypervisor for processing. - For hypercalls, the Ultravisor inserts neutral state into all - registers not needed for the hypercall then reflects the call to - the hypervisor for processing. The H_RANDOM hypercall is performed - by the Ultravisor and not reflected. - - * For virtual I/O to work bounce buffering must be done. - - * The Ultravisor uses AES (IAPM) for protection of SVM memory. IAPM - is a mode of AES that provides integrity and secrecy concurrently. - - * The movement of data between normal and secure pages is coordinated - with the Ultravisor by a new HMM plug-in in the Hypervisor. - - The Ultravisor offers new services to the hypervisor and SVMs. These - are accessed through ultracalls. - -Terminology -=========== - - * Hypercalls: special system calls used to request services from - Hypervisor. - - * Normal memory: Memory that is accessible to Hypervisor. - - * Normal page: Page backed by normal memory and available to - Hypervisor. - - * Shared page: A page backed by normal memory and available to both - the Hypervisor/QEMU and the SVM (i.e page has mappings in SVM and - Hypervisor/QEMU). - - * Secure memory: Memory that is accessible only to Ultravisor and - SVMs. - - * Secure page: Page backed by secure memory and only available to - Ultravisor and SVM. - - * SVM: Secure Virtual Machine. - - * Ultracalls: special system calls used to request services from - Ultravisor. - - -Ultravisor calls API -#################### - - This section describes Ultravisor calls (ultracalls) needed to - support Secure Virtual Machines (SVM)s and Paravirtualized KVM. The - ultracalls allow the SVMs and Hypervisor to request services from the - Ultravisor such as accessing a register or memory region that can only - be accessed when running in Ultravisor-privileged mode. - - The specific service needed from an ultracall is specified in register - R3 (the first parameter to the ultracall). Other parameters to the - ultracall, if any, are specified in registers R4 through R12. - - Return value of all ultracalls is in register R3. Other output values - from the ultracall, if any, are returned in registers R4 through R12. - The only exception to this register usage is the ``UV_RETURN`` - ultracall described below. - - Each ultracall returns specific error codes, applicable in the context - of the ultracall. However, like with the PowerPC Architecture Platform - Reference (PAPR), if no specific error code is defined for a - particular situation, then the ultracall will fallback to an erroneous - parameter-position based code. i.e U_PARAMETER, U_P2, U_P3 etc - depending on the ultracall parameter that may have caused the error. - - Some ultracalls involve transferring a page of data between Ultravisor - and Hypervisor. Secure pages that are transferred from secure memory - to normal memory may be encrypted using dynamically generated keys. - When the secure pages are transferred back to secure memory, they may - be decrypted using the same dynamically generated keys. Generation and - management of these keys will be covered in a separate document. - - For now this only covers ultracalls currently implemented and being - used by Hypervisor and SVMs but others can be added here when it - makes sense. - - The full specification for all hypercalls/ultracalls will eventually - be made available in the public/OpenPower version of the PAPR - specification. - - .. note:: - - If PEF is not enabled, the ultracalls will be redirected to the - Hypervisor which must handle/fail the calls. - -Ultracalls used by Hypervisor -============================= - - This section describes the virtual memory management ultracalls used - by the Hypervisor to manage SVMs. - -UV_PAGE_OUT ------------ - - Encrypt and move the contents of a page from secure memory to normal - memory. - -Syntax -~~~~~~ - -.. code-block:: c - - uint64_t ultracall(const uint64_t UV_PAGE_OUT, - uint16_t lpid, /* LPAR ID */ - uint64_t dest_ra, /* real address of destination page */ - uint64_t src_gpa, /* source guest-physical-address */ - uint8_t flags, /* flags */ - uint64_t order) /* page size order */ - -Return values -~~~~~~~~~~~~~ - - One of the following values: - - * U_SUCCESS on success. - * U_PARAMETER if ``lpid`` is invalid. - * U_P2 if ``dest_ra`` is invalid. - * U_P3 if the ``src_gpa`` address is invalid. - * U_P4 if any bit in the ``flags`` is unrecognized - * U_P5 if the ``order`` parameter is unsupported. - * U_FUNCTION if functionality is not supported. - * U_BUSY if page cannot be currently paged-out. - -Description -~~~~~~~~~~~ - - Encrypt the contents of a secure-page and make it available to - Hypervisor in a normal page. - - By default, the source page is unmapped from the SVM's partition- - scoped page table. But the Hypervisor can provide a hint to the - Ultravisor to retain the page mapping by setting the ``UV_SNAPSHOT`` - flag in ``flags`` parameter. - - If the source page is already a shared page the call returns - U_SUCCESS, without doing anything. - -Use cases -~~~~~~~~~ - - #. QEMU attempts to access an address belonging to the SVM but the - page frame for that address is not mapped into QEMU's address - space. In this case, the Hypervisor will allocate a page frame, - map it into QEMU's address space and issue the ``UV_PAGE_OUT`` - call to retrieve the encrypted contents of the page. - - #. When Ultravisor runs low on secure memory and it needs to page-out - an LRU page. In this case, Ultravisor will issue the - ``H_SVM_PAGE_OUT`` hypercall to the Hypervisor. The Hypervisor will - then allocate a normal page and issue the ``UV_PAGE_OUT`` ultracall - and the Ultravisor will encrypt and move the contents of the secure - page into the normal page. - - #. When Hypervisor accesses SVM data, the Hypervisor requests the - Ultravisor to transfer the corresponding page into a insecure page, - which the Hypervisor can access. The data in the normal page will - be encrypted though. - -UV_PAGE_IN ----------- - - Move the contents of a page from normal memory to secure memory. - -Syntax -~~~~~~ - -.. code-block:: c - - uint64_t ultracall(const uint64_t UV_PAGE_IN, - uint16_t lpid, /* the LPAR ID */ - uint64_t src_ra, /* source real address of page */ - uint64_t dest_gpa, /* destination guest physical address */ - uint64_t flags, /* flags */ - uint64_t order) /* page size order */ - -Return values -~~~~~~~~~~~~~ - - One of the following values: - - * U_SUCCESS on success. - * U_BUSY if page cannot be currently paged-in. - * U_FUNCTION if functionality is not supported - * U_PARAMETER if ``lpid`` is invalid. - * U_P2 if ``src_ra`` is invalid. - * U_P3 if the ``dest_gpa`` address is invalid. - * U_P4 if any bit in the ``flags`` is unrecognized - * U_P5 if the ``order`` parameter is unsupported. - -Description -~~~~~~~~~~~ - - Move the contents of the page identified by ``src_ra`` from normal - memory to secure memory and map it to the guest physical address - ``dest_gpa``. - - If `dest_gpa` refers to a shared address, map the page into the - partition-scoped page-table of the SVM. If `dest_gpa` is not shared, - copy the contents of the page into the corresponding secure page. - Depending on the context, decrypt the page before being copied. - - The caller provides the attributes of the page through the ``flags`` - parameter. Valid values for ``flags`` are: - - * CACHE_INHIBITED - * CACHE_ENABLED - * WRITE_PROTECTION - - The Hypervisor must pin the page in memory before making - ``UV_PAGE_IN`` ultracall. - -Use cases -~~~~~~~~~ - - #. When a normal VM switches to secure mode, all its pages residing - in normal memory, are moved into secure memory. - - #. When an SVM requests to share a page with Hypervisor the Hypervisor - allocates a page and informs the Ultravisor. - - #. When an SVM accesses a secure page that has been paged-out, - Ultravisor invokes the Hypervisor to locate the page. After - locating the page, the Hypervisor uses UV_PAGE_IN to make the - page available to Ultravisor. - -UV_PAGE_INVAL -------------- - - Invalidate the Ultravisor mapping of a page. - -Syntax -~~~~~~ - -.. code-block:: c - - uint64_t ultracall(const uint64_t UV_PAGE_INVAL, - uint16_t lpid, /* the LPAR ID */ - uint64_t guest_pa, /* destination guest-physical-address */ - uint64_t order) /* page size order */ - -Return values -~~~~~~~~~~~~~ - - One of the following values: - - * U_SUCCESS on success. - * U_PARAMETER if ``lpid`` is invalid. - * U_P2 if ``guest_pa`` is invalid (or corresponds to a secure - page mapping). - * U_P3 if the ``order`` is invalid. - * U_FUNCTION if functionality is not supported. - * U_BUSY if page cannot be currently invalidated. - -Description -~~~~~~~~~~~ - - This ultracall informs Ultravisor that the page mapping in Hypervisor - corresponding to the given guest physical address has been invalidated - and that the Ultravisor should not access the page. If the specified - ``guest_pa`` corresponds to a secure page, Ultravisor will ignore the - attempt to invalidate the page and return U_P2. - -Use cases -~~~~~~~~~ - - #. When a shared page is unmapped from the QEMU's page table, possibly - because it is paged-out to disk, Ultravisor needs to know that the - page should not be accessed from its side too. - - -UV_WRITE_PATE -------------- - - Validate and write the partition table entry (PATE) for a given - partition. - -Syntax -~~~~~~ - -.. code-block:: c - - uint64_t ultracall(const uint64_t UV_WRITE_PATE, - uint32_t lpid, /* the LPAR ID */ - uint64_t dw0 /* the first double word to write */ - uint64_t dw1) /* the second double word to write */ - -Return values -~~~~~~~~~~~~~ - - One of the following values: - - * U_SUCCESS on success. - * U_BUSY if PATE cannot be currently written to. - * U_FUNCTION if functionality is not supported. - * U_PARAMETER if ``lpid`` is invalid. - * U_P2 if ``dw0`` is invalid. - * U_P3 if the ``dw1`` address is invalid. - * U_PERMISSION if the Hypervisor is attempting to change the PATE - of a secure virtual machine or if called from a - context other than Hypervisor. - -Description -~~~~~~~~~~~ - - Validate and write a LPID and its partition-table-entry for the given - LPID. If the LPID is already allocated and initialized, this call - results in changing the partition table entry. - -Use cases -~~~~~~~~~ - - #. The Partition table resides in Secure memory and its entries, - called PATE (Partition Table Entries), point to the partition- - scoped page tables for the Hypervisor as well as each of the - virtual machines (both secure and normal). The Hypervisor - operates in partition 0 and its partition-scoped page tables - reside in normal memory. - - #. This ultracall allows the Hypervisor to register the partition- - scoped and process-scoped page table entries for the Hypervisor - and other partitions (virtual machines) with the Ultravisor. - - #. If the value of the PATE for an existing partition (VM) changes, - the TLB cache for the partition is flushed. - - #. The Hypervisor is responsible for allocating LPID. The LPID and - its PATE entry are registered together. The Hypervisor manages - the PATE entries for a normal VM and can change the PATE entry - anytime. Ultravisor manages the PATE entries for an SVM and - Hypervisor is not allowed to modify them. - -UV_RETURN ---------- - - Return control from the Hypervisor back to the Ultravisor after - processing an hypercall or interrupt that was forwarded (aka - *reflected*) to the Hypervisor. - -Syntax -~~~~~~ - -.. code-block:: c - - uint64_t ultracall(const uint64_t UV_RETURN) - -Return values -~~~~~~~~~~~~~ - - This call never returns to Hypervisor on success. It returns - U_INVALID if ultracall is not made from a Hypervisor context. - -Description -~~~~~~~~~~~ - - When an SVM makes an hypercall or incurs some other exception, the - Ultravisor usually forwards (aka *reflects*) the exceptions to the - Hypervisor. After processing the exception, Hypervisor uses the - ``UV_RETURN`` ultracall to return control back to the SVM. - - The expected register state on entry to this ultracall is: - - * Non-volatile registers are restored to their original values. - * If returning from an hypercall, register R0 contains the return - value (**unlike other ultracalls**) and, registers R4 through R12 - contain any output values of the hypercall. - * R3 contains the ultracall number, i.e UV_RETURN. - * If returning with a synthesized interrupt, R2 contains the - synthesized interrupt number. - -Use cases -~~~~~~~~~ - - #. Ultravisor relies on the Hypervisor to provide several services to - the SVM such as processing hypercall and other exceptions. After - processing the exception, Hypervisor uses UV_RETURN to return - control back to the Ultravisor. - - #. Hypervisor has to use this ultracall to return control to the SVM. - - -UV_REGISTER_MEM_SLOT --------------------- - - Register an SVM address-range with specified properties. - -Syntax -~~~~~~ - -.. code-block:: c - - uint64_t ultracall(const uint64_t UV_REGISTER_MEM_SLOT, - uint64_t lpid, /* LPAR ID of the SVM */ - uint64_t start_gpa, /* start guest physical address */ - uint64_t size, /* size of address range in bytes */ - uint64_t flags /* reserved for future expansion */ - uint16_t slotid) /* slot identifier */ - -Return values -~~~~~~~~~~~~~ - - One of the following values: - - * U_SUCCESS on success. - * U_PARAMETER if ``lpid`` is invalid. - * U_P2 if ``start_gpa`` is invalid. - * U_P3 if ``size`` is invalid. - * U_P4 if any bit in the ``flags`` is unrecognized. - * U_P5 if the ``slotid`` parameter is unsupported. - * U_PERMISSION if called from context other than Hypervisor. - * U_FUNCTION if functionality is not supported. - - -Description -~~~~~~~~~~~ - - Register a memory range for an SVM. The memory range starts at the - guest physical address ``start_gpa`` and is ``size`` bytes long. - -Use cases -~~~~~~~~~ - - - #. When a virtual machine goes secure, all the memory slots managed by - the Hypervisor move into secure memory. The Hypervisor iterates - through each of memory slots, and registers the slot with - Ultravisor. Hypervisor may discard some slots such as those used - for firmware (SLOF). - - #. When new memory is hot-plugged, a new memory slot gets registered. - - -UV_UNREGISTER_MEM_SLOT ----------------------- - - Unregister an SVM address-range that was previously registered using - UV_REGISTER_MEM_SLOT. - -Syntax -~~~~~~ - -.. code-block:: c - - uint64_t ultracall(const uint64_t UV_UNREGISTER_MEM_SLOT, - uint64_t lpid, /* LPAR ID of the SVM */ - uint64_t slotid) /* reservation slotid */ - -Return values -~~~~~~~~~~~~~ - - One of the following values: - - * U_SUCCESS on success. - * U_FUNCTION if functionality is not supported. - * U_PARAMETER if ``lpid`` is invalid. - * U_P2 if ``slotid`` is invalid. - * U_PERMISSION if called from context other than Hypervisor. - -Description -~~~~~~~~~~~ - - Release the memory slot identified by ``slotid`` and free any - resources allocated towards the reservation. - -Use cases -~~~~~~~~~ - - #. Memory hot-remove. - - -UV_SVM_TERMINATE ----------------- - - Terminate an SVM and release its resources. - -Syntax -~~~~~~ - -.. code-block:: c - - uint64_t ultracall(const uint64_t UV_SVM_TERMINATE, - uint64_t lpid, /* LPAR ID of the SVM */) - -Return values -~~~~~~~~~~~~~ - - One of the following values: - - * U_SUCCESS on success. - * U_FUNCTION if functionality is not supported. - * U_PARAMETER if ``lpid`` is invalid. - * U_INVALID if VM is not secure. - * U_PERMISSION if not called from a Hypervisor context. - -Description -~~~~~~~~~~~ - - Terminate an SVM and release all its resources. - -Use cases -~~~~~~~~~ - - #. Called by Hypervisor when terminating an SVM. - - -Ultracalls used by SVM -====================== - -UV_SHARE_PAGE -------------- - - Share a set of guest physical pages with the Hypervisor. - -Syntax -~~~~~~ - -.. code-block:: c - - uint64_t ultracall(const uint64_t UV_SHARE_PAGE, - uint64_t gfn, /* guest page frame number */ - uint64_t num) /* number of pages of size PAGE_SIZE */ - -Return values -~~~~~~~~~~~~~ - - One of the following values: - - * U_SUCCESS on success. - * U_FUNCTION if functionality is not supported. - * U_INVALID if the VM is not secure. - * U_PARAMETER if ``gfn`` is invalid. - * U_P2 if ``num`` is invalid. - -Description -~~~~~~~~~~~ - - Share the ``num`` pages starting at guest physical frame number ``gfn`` - with the Hypervisor. Assume page size is PAGE_SIZE bytes. Zero the - pages before returning. - - If the address is already backed by a secure page, unmap the page and - back it with an insecure page, with the help of the Hypervisor. If it - is not backed by any page yet, mark the PTE as insecure and back it - with an insecure page when the address is accessed. If it is already - backed by an insecure page, zero the page and return. - -Use cases -~~~~~~~~~ - - #. The Hypervisor cannot access the SVM pages since they are backed by - secure pages. Hence an SVM must explicitly request Ultravisor for - pages it can share with Hypervisor. - - #. Shared pages are needed to support virtio and Virtual Processor Area - (VPA) in SVMs. - - -UV_UNSHARE_PAGE ---------------- - - Restore a shared SVM page to its initial state. - -Syntax -~~~~~~ - -.. code-block:: c - - uint64_t ultracall(const uint64_t UV_UNSHARE_PAGE, - uint64_t gfn, /* guest page frame number */ - uint73 num) /* number of pages of size PAGE_SIZE*/ - -Return values -~~~~~~~~~~~~~ - - One of the following values: - - * U_SUCCESS on success. - * U_FUNCTION if functionality is not supported. - * U_INVALID if VM is not secure. - * U_PARAMETER if ``gfn`` is invalid. - * U_P2 if ``num`` is invalid. - -Description -~~~~~~~~~~~ - - Stop sharing ``num`` pages starting at ``gfn`` with the Hypervisor. - Assume that the page size is PAGE_SIZE. Zero the pages before - returning. - - If the address is already backed by an insecure page, unmap the page - and back it with a secure page. Inform the Hypervisor to release - reference to its shared page. If the address is not backed by a page - yet, mark the PTE as secure and back it with a secure page when that - address is accessed. If it is already backed by an secure page zero - the page and return. - -Use cases -~~~~~~~~~ - - #. The SVM may decide to unshare a page from the Hypervisor. - - -UV_UNSHARE_ALL_PAGES --------------------- - - Unshare all pages the SVM has shared with Hypervisor. - -Syntax -~~~~~~ - -.. code-block:: c - - uint64_t ultracall(const uint64_t UV_UNSHARE_ALL_PAGES) - -Return values -~~~~~~~~~~~~~ - - One of the following values: - - * U_SUCCESS on success. - * U_FUNCTION if functionality is not supported. - * U_INVAL if VM is not secure. - -Description -~~~~~~~~~~~ - - Unshare all shared pages from the Hypervisor. All unshared pages are - zeroed on return. Only pages explicitly shared by the SVM with the - Hypervisor (using UV_SHARE_PAGE ultracall) are unshared. Ultravisor - may internally share some pages with the Hypervisor without explicit - request from the SVM. These pages will not be unshared by this - ultracall. - -Use cases -~~~~~~~~~ - - #. This call is needed when ``kexec`` is used to boot a different - kernel. It may also be needed during SVM reset. - -UV_ESM ------- - - Secure the virtual machine (*enter secure mode*). - -Syntax -~~~~~~ - -.. code-block:: c - - uint64_t ultracall(const uint64_t UV_ESM, - uint64_t esm_blob_addr, /* location of the ESM blob */ - unint64_t fdt) /* Flattened device tree */ - -Return values -~~~~~~~~~~~~~ - - One of the following values: - - * U_SUCCESS on success (including if VM is already secure). - * U_FUNCTION if functionality is not supported. - * U_INVALID if VM is not secure. - * U_PARAMETER if ``esm_blob_addr`` is invalid. - * U_P2 if ``fdt`` is invalid. - * U_PERMISSION if any integrity checks fail. - * U_RETRY insufficient memory to create SVM. - * U_NO_KEY symmetric key unavailable. - -Description -~~~~~~~~~~~ - - Secure the virtual machine. On successful completion, return - control to the virtual machine at the address specified in the - ESM blob. - -Use cases -~~~~~~~~~ - - #. A normal virtual machine can choose to switch to a secure mode. - -Hypervisor Calls API -#################### - - This document describes the Hypervisor calls (hypercalls) that are - needed to support the Ultravisor. Hypercalls are services provided by - the Hypervisor to virtual machines and Ultravisor. - - Register usage for these hypercalls is identical to that of the other - hypercalls defined in the Power Architecture Platform Reference (PAPR) - document. i.e on input, register R3 identifies the specific service - that is being requested and registers R4 through R11 contain - additional parameters to the hypercall, if any. On output, register - R3 contains the return value and registers R4 through R9 contain any - other output values from the hypercall. - - This document only covers hypercalls currently implemented/planned - for Ultravisor usage but others can be added here when it makes sense. - - The full specification for all hypercalls/ultracalls will eventually - be made available in the public/OpenPower version of the PAPR - specification. - -Hypervisor calls to support Ultravisor -====================================== - - Following are the set of hypercalls needed to support Ultravisor. - -H_SVM_INIT_START ----------------- - - Begin the process of converting a normal virtual machine into an SVM. - -Syntax -~~~~~~ - -.. code-block:: c - - uint64_t hypercall(const uint64_t H_SVM_INIT_START) - -Return values -~~~~~~~~~~~~~ - - One of the following values: - - * H_SUCCESS on success. - * H_STATE if the VM is not in a position to switch to secure. - -Description -~~~~~~~~~~~ - - Initiate the process of securing a virtual machine. This involves - coordinating with the Ultravisor, using ultracalls, to allocate - resources in the Ultravisor for the new SVM, transferring the VM's - pages from normal to secure memory etc. When the process is - completed, Ultravisor issues the H_SVM_INIT_DONE hypercall. - -Use cases -~~~~~~~~~ - - #. Ultravisor uses this hypercall to inform Hypervisor that a VM - has initiated the process of switching to secure mode. - - -H_SVM_INIT_DONE ---------------- - - Complete the process of securing an SVM. - -Syntax -~~~~~~ - -.. code-block:: c - - uint64_t hypercall(const uint64_t H_SVM_INIT_DONE) - -Return values -~~~~~~~~~~~~~ - - One of the following values: - - * H_SUCCESS on success. - * H_UNSUPPORTED if called from the wrong context (e.g. - from an SVM or before an H_SVM_INIT_START - hypercall). - * H_STATE if the hypervisor could not successfully - transition the VM to Secure VM. - -Description -~~~~~~~~~~~ - - Complete the process of securing a virtual machine. This call must - be made after a prior call to ``H_SVM_INIT_START`` hypercall. - -Use cases -~~~~~~~~~ - - On successfully securing a virtual machine, the Ultravisor informs - Hypervisor about it. Hypervisor can use this call to finish setting - up its internal state for this virtual machine. - - -H_SVM_INIT_ABORT ----------------- - - Abort the process of securing an SVM. - -Syntax -~~~~~~ - -.. code-block:: c - - uint64_t hypercall(const uint64_t H_SVM_INIT_ABORT) - -Return values -~~~~~~~~~~~~~ - - One of the following values: - - * H_PARAMETER on successfully cleaning up the state, - Hypervisor will return this value to the - **guest**, to indicate that the underlying - UV_ESM ultracall failed. - - * H_STATE if called after a VM has gone secure (i.e - H_SVM_INIT_DONE hypercall was successful). - - * H_UNSUPPORTED if called from a wrong context (e.g. from a - normal VM). - -Description -~~~~~~~~~~~ - - Abort the process of securing a virtual machine. This call must - be made after a prior call to ``H_SVM_INIT_START`` hypercall and - before a call to ``H_SVM_INIT_DONE``. - - On entry into this hypercall the non-volatile GPRs and FPRs are - expected to contain the values they had at the time the VM issued - the UV_ESM ultracall. Further ``SRR0`` is expected to contain the - address of the instruction after the ``UV_ESM`` ultracall and ``SRR1`` - the MSR value with which to return to the VM. - - This hypercall will cleanup any partial state that was established for - the VM since the prior ``H_SVM_INIT_START`` hypercall, including paging - out pages that were paged-into secure memory, and issue the - ``UV_SVM_TERMINATE`` ultracall to terminate the VM. - - After the partial state is cleaned up, control returns to the VM - (**not Ultravisor**), at the address specified in ``SRR0`` with the - MSR values set to the value in ``SRR1``. - -Use cases -~~~~~~~~~ - - If after a successful call to ``H_SVM_INIT_START``, the Ultravisor - encounters an error while securing a virtual machine, either due - to lack of resources or because the VM's security information could - not be validated, Ultravisor informs the Hypervisor about it. - Hypervisor should use this call to clean up any internal state for - this virtual machine and return to the VM. - -H_SVM_PAGE_IN -------------- - - Move the contents of a page from normal memory to secure memory. - -Syntax -~~~~~~ - -.. code-block:: c - - uint64_t hypercall(const uint64_t H_SVM_PAGE_IN, - uint64_t guest_pa, /* guest-physical-address */ - uint64_t flags, /* flags */ - uint64_t order) /* page size order */ - -Return values -~~~~~~~~~~~~~ - - One of the following values: - - * H_SUCCESS on success. - * H_PARAMETER if ``guest_pa`` is invalid. - * H_P2 if ``flags`` is invalid. - * H_P3 if ``order`` of page is invalid. - -Description -~~~~~~~~~~~ - - Retrieve the content of the page, belonging to the VM at the specified - guest physical address. - - Only valid value(s) in ``flags`` are: - - * H_PAGE_IN_SHARED which indicates that the page is to be shared - with the Ultravisor. - - * H_PAGE_IN_NONSHARED indicates that the UV is not anymore - interested in the page. Applicable if the page is a shared page. - - The ``order`` parameter must correspond to the configured page size. - -Use cases -~~~~~~~~~ - - #. When a normal VM becomes a secure VM (using the UV_ESM ultracall), - the Ultravisor uses this hypercall to move contents of each page of - the VM from normal memory to secure memory. - - #. Ultravisor uses this hypercall to ask Hypervisor to provide a page - in normal memory that can be shared between the SVM and Hypervisor. - - #. Ultravisor uses this hypercall to page-in a paged-out page. This - can happen when the SVM touches a paged-out page. - - #. If SVM wants to disable sharing of pages with Hypervisor, it can - inform Ultravisor to do so. Ultravisor will then use this hypercall - and inform Hypervisor that it has released access to the normal - page. - -H_SVM_PAGE_OUT ---------------- - - Move the contents of the page to normal memory. - -Syntax -~~~~~~ - -.. code-block:: c - - uint64_t hypercall(const uint64_t H_SVM_PAGE_OUT, - uint64_t guest_pa, /* guest-physical-address */ - uint64_t flags, /* flags (currently none) */ - uint64_t order) /* page size order */ - -Return values -~~~~~~~~~~~~~ - - One of the following values: - - * H_SUCCESS on success. - * H_PARAMETER if ``guest_pa`` is invalid. - * H_P2 if ``flags`` is invalid. - * H_P3 if ``order`` is invalid. - -Description -~~~~~~~~~~~ - - Move the contents of the page identified by ``guest_pa`` to normal - memory. - - Currently ``flags`` is unused and must be set to 0. The ``order`` - parameter must correspond to the configured page size. - -Use cases -~~~~~~~~~ - - #. If Ultravisor is running low on secure pages, it can move the - contents of some secure pages, into normal pages using this - hypercall. The content will be encrypted. - -References -########## - -- `Supporting Protected Computing on IBM Power Architecture `_ diff --git a/Documentation/powerpc/vas-api.rst b/Documentation/powerpc/vas-api.rst deleted file mode 100644 index a9625a2fa0..0000000000 --- a/Documentation/powerpc/vas-api.rst +++ /dev/null @@ -1,305 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 -.. _VAS-API: - -=================================================== -Virtual Accelerator Switchboard (VAS) userspace API -=================================================== - -Introduction -============ - -Power9 processor introduced Virtual Accelerator Switchboard (VAS) which -allows both userspace and kernel communicate to co-processor -(hardware accelerator) referred to as the Nest Accelerator (NX). The NX -unit comprises of one or more hardware engines or co-processor types -such as 842 compression, GZIP compression and encryption. On power9, -userspace applications will have access to only GZIP Compression engine -which supports ZLIB and GZIP compression algorithms in the hardware. - -To communicate with NX, kernel has to establish a channel or window and -then requests can be submitted directly without kernel involvement. -Requests to the GZIP engine must be formatted as a co-processor Request -Block (CRB) and these CRBs must be submitted to the NX using COPY/PASTE -instructions to paste the CRB to hardware address that is associated with -the engine's request queue. - -The GZIP engine provides two priority levels of requests: Normal and -High. Only Normal requests are supported from userspace right now. - -This document explains userspace API that is used to interact with -kernel to setup channel / window which can be used to send compression -requests directly to NX accelerator. - - -Overview -======== - -Application access to the GZIP engine is provided through -/dev/crypto/nx-gzip device node implemented by the VAS/NX device driver. -An application must open the /dev/crypto/nx-gzip device to obtain a file -descriptor (fd). Then should issue VAS_TX_WIN_OPEN ioctl with this fd to -establish connection to the engine. It means send window is opened on GZIP -engine for this process. Once a connection is established, the application -should use the mmap() system call to map the hardware address of engine's -request queue into the application's virtual address space. - -The application can then submit one or more requests to the engine by -using copy/paste instructions and pasting the CRBs to the virtual address -(aka paste_address) returned by mmap(). User space can close the -established connection or send window by closing the file descriptor -(close(fd)) or upon the process exit. - -Note that applications can send several requests with the same window or -can establish multiple windows, but one window for each file descriptor. - -Following sections provide additional details and references about the -individual steps. - -NX-GZIP Device Node -=================== - -There is one /dev/crypto/nx-gzip node in the system and it provides -access to all GZIP engines in the system. The only valid operations on -/dev/crypto/nx-gzip are: - - * open() the device for read and write. - * issue VAS_TX_WIN_OPEN ioctl - * mmap() the engine's request queue into application's virtual - address space (i.e. get a paste_address for the co-processor - engine). - * close the device node. - -Other file operations on this device node are undefined. - -Note that the copy and paste operations go directly to the hardware and -do not go through this device. Refer COPY/PASTE document for more -details. - -Although a system may have several instances of the NX co-processor -engines (typically, one per P9 chip) there is just one -/dev/crypto/nx-gzip device node in the system. When the nx-gzip device -node is opened, Kernel opens send window on a suitable instance of NX -accelerator. It finds CPU on which the user process is executing and -determine the NX instance for the corresponding chip on which this CPU -belongs. - -Applications may chose a specific instance of the NX co-processor using -the vas_id field in the VAS_TX_WIN_OPEN ioctl as detailed below. - -A userspace library libnxz is available here but still in development: - - https://github.com/abalib/power-gzip - -Applications that use inflate / deflate calls can link with libnxz -instead of libz and use NX GZIP compression without any modification. - -Open /dev/crypto/nx-gzip -======================== - -The nx-gzip device should be opened for read and write. No special -privileges are needed to open the device. Each window corresponds to one -file descriptor. So if the userspace process needs multiple windows, -several open calls have to be issued. - -See open(2) system call man pages for other details such as return values, -error codes and restrictions. - -VAS_TX_WIN_OPEN ioctl -===================== - -Applications should use the VAS_TX_WIN_OPEN ioctl as follows to establish -a connection with NX co-processor engine: - - :: - - struct vas_tx_win_open_attr { - __u32 version; - __s16 vas_id; /* specific instance of vas or -1 - for default */ - __u16 reserved1; - __u64 flags; /* For future use */ - __u64 reserved2[6]; - }; - - version: - The version field must be currently set to 1. - vas_id: - If '-1' is passed, kernel will make a best-effort attempt - to assign an optimal instance of NX for the process. To - select the specific VAS instance, refer - "Discovery of available VAS engines" section below. - - flags, reserved1 and reserved2[6] fields are for future extension - and must be set to 0. - - The attributes attr for the VAS_TX_WIN_OPEN ioctl are defined as - follows:: - - #define VAS_MAGIC 'v' - #define VAS_TX_WIN_OPEN _IOW(VAS_MAGIC, 1, - struct vas_tx_win_open_attr) - - struct vas_tx_win_open_attr attr; - rc = ioctl(fd, VAS_TX_WIN_OPEN, &attr); - - The VAS_TX_WIN_OPEN ioctl returns 0 on success. On errors, it - returns -1 and sets the errno variable to indicate the error. - - Error conditions: - - ====== ================================================ - EINVAL fd does not refer to a valid VAS device. - EINVAL Invalid vas ID - EINVAL version is not set with proper value - EEXIST Window is already opened for the given fd - ENOMEM Memory is not available to allocate window - ENOSPC System has too many active windows (connections) - opened - EINVAL reserved fields are not set to 0. - ====== ================================================ - - See the ioctl(2) man page for more details, error codes and - restrictions. - -mmap() NX-GZIP device -===================== - -The mmap() system call for a NX-GZIP device fd returns a paste_address -that the application can use to copy/paste its CRB to the hardware engines. - - :: - - paste_addr = mmap(addr, size, prot, flags, fd, offset); - - Only restrictions on mmap for a NX-GZIP device fd are: - - * size should be PAGE_SIZE - * offset parameter should be 0ULL - - Refer to mmap(2) man page for additional details/restrictions. - In addition to the error conditions listed on the mmap(2) man - page, can also fail with one of the following error codes: - - ====== ============================================= - EINVAL fd is not associated with an open window - (i.e mmap() does not follow a successful call - to the VAS_TX_WIN_OPEN ioctl). - EINVAL offset field is not 0ULL. - ====== ============================================= - -Discovery of available VAS engines -================================== - -Each available VAS instance in the system will have a device tree node -like /proc/device-tree/vas@* or /proc/device-tree/xscom@*/vas@*. -Determine the chip or VAS instance and use the corresponding ibm,vas-id -property value in this node to select specific VAS instance. - -Copy/Paste operations -===================== - -Applications should use the copy and paste instructions to send CRB to NX. -Refer section 4.4 in PowerISA for Copy/Paste instructions: -https://openpowerfoundation.org/?resource_lib=power-isa-version-3-0 - -CRB Specification and use NX -============================ - -Applications should format requests to the co-processor using the -co-processor Request Block (CRBs). Refer NX-GZIP user's manual for the format -of CRB and use NX from userspace such as sending requests and checking -request status. - -NX Fault handling -================= - -Applications send requests to NX and wait for the status by polling on -co-processor Status Block (CSB) flags. NX updates status in CSB after each -request is processed. Refer NX-GZIP user's manual for the format of CSB and -status flags. - -In case if NX encounters translation error (called NX page fault) on CSB -address or any request buffer, raises an interrupt on the CPU to handle the -fault. Page fault can happen if an application passes invalid addresses or -request buffers are not in memory. The operating system handles the fault by -updating CSB with the following data:: - - csb.flags = CSB_V; - csb.cc = CSB_CC_FAULT_ADDRESS; - csb.ce = CSB_CE_TERMINATION; - csb.address = fault_address; - -When an application receives translation error, it can touch or access -the page that has a fault address so that this page will be in memory. Then -the application can resend this request to NX. - -If the OS can not update CSB due to invalid CSB address, sends SEGV signal -to the process who opened the send window on which the original request was -issued. This signal returns with the following siginfo struct:: - - siginfo.si_signo = SIGSEGV; - siginfo.si_errno = EFAULT; - siginfo.si_code = SEGV_MAPERR; - siginfo.si_addr = CSB address; - -In the case of multi-thread applications, NX send windows can be shared -across all threads. For example, a child thread can open a send window, -but other threads can send requests to NX using this window. These -requests will be successful even in the case of OS handling faults as long -as CSB address is valid. If the NX request contains an invalid CSB address, -the signal will be sent to the child thread that opened the window. But if -the thread is exited without closing the window and the request is issued -using this window. the signal will be issued to the thread group leader -(tgid). It is up to the application whether to ignore or handle these -signals. - -NX-GZIP User's Manual: -https://github.com/libnxz/power-gzip/blob/master/doc/power_nx_gzip_um.pdf - -Simple example -============== - - :: - - int use_nx_gzip() - { - int rc, fd; - void *addr; - struct vas_setup_attr txattr; - - fd = open("/dev/crypto/nx-gzip", O_RDWR); - if (fd < 0) { - fprintf(stderr, "open nx-gzip failed\n"); - return -1; - } - memset(&txattr, 0, sizeof(txattr)); - txattr.version = 1; - txattr.vas_id = -1 - rc = ioctl(fd, VAS_TX_WIN_OPEN, - (unsigned long)&txattr); - if (rc < 0) { - fprintf(stderr, "ioctl() n %d, error %d\n", - rc, errno); - return rc; - } - addr = mmap(NULL, 4096, PROT_READ|PROT_WRITE, - MAP_SHARED, fd, 0ULL); - if (addr == MAP_FAILED) { - fprintf(stderr, "mmap() failed, errno %d\n", - errno); - return -errno; - } - do { - //Format CRB request with compression or - //uncompression - // Refer tests for vas_copy/vas_paste - vas_copy((&crb, 0, 1); - vas_paste(addr, 0, 1); - // Poll on csb.flags with timeout - // csb address is listed in CRB - } while (true) - close(fd) or window can be closed upon process exit - } - - Refer https://github.com/libnxz/power-gzip for tests or more - use cases. diff --git a/Documentation/powerpc/vcpudispatch_stats.rst b/Documentation/powerpc/vcpudispatch_stats.rst deleted file mode 100644 index 5704657a59..0000000000 --- a/Documentation/powerpc/vcpudispatch_stats.rst +++ /dev/null @@ -1,75 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -======================== -VCPU Dispatch Statistics -======================== - -For Shared Processor LPARs, the POWER Hypervisor maintains a relatively -static mapping of the LPAR processors (vcpus) to physical processor -chips (representing the "home" node) and tries to always dispatch vcpus -on their associated physical processor chip. However, under certain -scenarios, vcpus may be dispatched on a different processor chip (away -from its home node). - -/proc/powerpc/vcpudispatch_stats can be used to obtain statistics -related to the vcpu dispatch behavior. Writing '1' to this file enables -collecting the statistics, while writing '0' disables the statistics. -By default, the DTLB log for each vcpu is processed 50 times a second so -as not to miss any entries. This processing frequency can be changed -through /proc/powerpc/vcpudispatch_stats_freq. - -The statistics themselves are available by reading the procfs file -/proc/powerpc/vcpudispatch_stats. Each line in the output corresponds to -a vcpu as represented by the first field, followed by 8 numbers. - -The first number corresponds to: - -1. total vcpu dispatches since the beginning of statistics collection - -The next 4 numbers represent vcpu dispatch dispersions: - -2. number of times this vcpu was dispatched on the same processor as last - time -3. number of times this vcpu was dispatched on a different processor core - as last time, but within the same chip -4. number of times this vcpu was dispatched on a different chip -5. number of times this vcpu was dispatches on a different socket/drawer - (next numa boundary) - -The final 3 numbers represent statistics in relation to the home node of -the vcpu: - -6. number of times this vcpu was dispatched in its home node (chip) -7. number of times this vcpu was dispatched in a different node -8. number of times this vcpu was dispatched in a node further away (numa - distance) - -An example output:: - - $ sudo cat /proc/powerpc/vcpudispatch_stats - cpu0 6839 4126 2683 30 0 6821 18 0 - cpu1 2515 1274 1229 12 0 2509 6 0 - cpu2 2317 1198 1109 10 0 2312 5 0 - cpu3 2259 1165 1088 6 0 2256 3 0 - cpu4 2205 1143 1056 6 0 2202 3 0 - cpu5 2165 1121 1038 6 0 2162 3 0 - cpu6 2183 1127 1050 6 0 2180 3 0 - cpu7 2193 1133 1052 8 0 2187 6 0 - cpu8 2165 1115 1032 18 0 2156 9 0 - cpu9 2301 1252 1033 16 0 2293 8 0 - cpu10 2197 1138 1041 18 0 2187 10 0 - cpu11 2273 1185 1062 26 0 2260 13 0 - cpu12 2186 1125 1043 18 0 2177 9 0 - cpu13 2161 1115 1030 16 0 2153 8 0 - cpu14 2206 1153 1033 20 0 2196 10 0 - cpu15 2163 1115 1032 16 0 2155 8 0 - -In the output above, for vcpu0, there have been 6839 dispatches since -statistics were enabled. 4126 of those dispatches were on the same -physical cpu as the last time. 2683 were on a different core, but within -the same chip, while 30 dispatches were on a different chip compared to -its last dispatch. - -Also, out of the total of 6839 dispatches, we see that there have been -6821 dispatches on the vcpu's home node, while 18 dispatches were -outside its home node, on a neighbouring chip. diff --git a/Documentation/powerpc/vmemmap_dedup.rst b/Documentation/powerpc/vmemmap_dedup.rst deleted file mode 100644 index dc4db59fdf..0000000000 --- a/Documentation/powerpc/vmemmap_dedup.rst +++ /dev/null @@ -1,101 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -========== -Device DAX -========== - -The device-dax interface uses the tail deduplication technique explained in -Documentation/mm/vmemmap_dedup.rst - -On powerpc, vmemmap deduplication is only used with radix MMU translation. Also -with a 64K page size, only the devdax namespace with 1G alignment uses vmemmap -deduplication. - -With 2M PMD level mapping, we require 32 struct pages and a single 64K vmemmap -page can contain 1024 struct pages (64K/sizeof(struct page)). Hence there is no -vmemmap deduplication possible. - -With 1G PUD level mapping, we require 16384 struct pages and a single 64K -vmemmap page can contain 1024 struct pages (64K/sizeof(struct page)). Hence we -require 16 64K pages in vmemmap to map the struct page for 1G PUD level mapping. - -Here's how things look like on device-dax after the sections are populated:: - +-----------+ ---virt_to_page---> +-----------+ mapping to +-----------+ - | | | 0 | -------------> | 0 | - | | +-----------+ +-----------+ - | | | 1 | -------------> | 1 | - | | +-----------+ +-----------+ - | | | 2 | ----------------^ ^ ^ ^ ^ ^ - | | +-----------+ | | | | | - | | | 3 | ------------------+ | | | | - | | +-----------+ | | | | - | | | 4 | --------------------+ | | | - | PUD | +-----------+ | | | - | level | | . | ----------------------+ | | - | mapping | +-----------+ | | - | | | . | ------------------------+ | - | | +-----------+ | - | | | 15 | --------------------------+ - | | +-----------+ - | | - | | - | | - +-----------+ - - -With 4K page size, 2M PMD level mapping requires 512 struct pages and a single -4K vmemmap page contains 64 struct pages(4K/sizeof(struct page)). Hence we -require 8 4K pages in vmemmap to map the struct page for 2M pmd level mapping. - -Here's how things look like on device-dax after the sections are populated:: - - +-----------+ ---virt_to_page---> +-----------+ mapping to +-----------+ - | | | 0 | -------------> | 0 | - | | +-----------+ +-----------+ - | | | 1 | -------------> | 1 | - | | +-----------+ +-----------+ - | | | 2 | ----------------^ ^ ^ ^ ^ ^ - | | +-----------+ | | | | | - | | | 3 | ------------------+ | | | | - | | +-----------+ | | | | - | | | 4 | --------------------+ | | | - | PMD | +-----------+ | | | - | level | | 5 | ----------------------+ | | - | mapping | +-----------+ | | - | | | 6 | ------------------------+ | - | | +-----------+ | - | | | 7 | --------------------------+ - | | +-----------+ - | | - | | - | | - +-----------+ - -With 1G PUD level mapping, we require 262144 struct pages and a single 4K -vmemmap page can contain 64 struct pages (4K/sizeof(struct page)). Hence we -require 4096 4K pages in vmemmap to map the struct pages for 1G PUD level -mapping. - -Here's how things look like on device-dax after the sections are populated:: - - +-----------+ ---virt_to_page---> +-----------+ mapping to +-----------+ - | | | 0 | -------------> | 0 | - | | +-----------+ +-----------+ - | | | 1 | -------------> | 1 | - | | +-----------+ +-----------+ - | | | 2 | ----------------^ ^ ^ ^ ^ ^ - | | +-----------+ | | | | | - | | | 3 | ------------------+ | | | | - | | +-----------+ | | | | - | | | 4 | --------------------+ | | | - | PUD | +-----------+ | | | - | level | | . | ----------------------+ | | - | mapping | +-----------+ | | - | | | . | ------------------------+ | - | | +-----------+ | - | | | 4095 | --------------------------+ - | | +-----------+ - | | - | | - | | - +-----------+ diff --git a/Documentation/process/7.AdvancedTopics.rst b/Documentation/process/7.AdvancedTopics.rst index bf7cbfb4ca..4329170433 100644 --- a/Documentation/process/7.AdvancedTopics.rst +++ b/Documentation/process/7.AdvancedTopics.rst @@ -146,6 +146,7 @@ pull. The git request-pull command can be helpful in this regard; it will format the request as other developers expect, and will also check to be sure that you have remembered to push those changes to the public server. +.. _development_advancedtopics_reviews: Reviewing patches ----------------- @@ -167,6 +168,12 @@ comments as questions rather than criticisms. Asking "how does the lock get released in this path?" will always work better than stating "the locking here is wrong." +Another technique that is useful in case of a disagreement is to ask for others +to chime in. If a discussion reaches a stalemate after a few exchanges, +then call for opinions of other reviewers or maintainers. Often those in +agreement with a reviewer remain silent unless called upon. +The opinion of multiple people carries exponentially more weight. + Different developers will review code from different points of view. Some are mostly concerned with coding style and whether code lines have trailing white space. Others will focus primarily on whether the change implemented @@ -176,3 +183,14 @@ security issues, duplication of code found elsewhere, adequate documentation, adverse effects on performance, user-space ABI changes, etc. All types of review, if they lead to better code going into the kernel, are welcome and worthwhile. + +There is no strict requirement to use specific tags like ``Reviewed-by``. +In fact reviews in plain English are more informative and encouraged +even when a tag is provided, e.g. "I looked at aspects A, B and C of this +submission and it looks good to me." +Some form of a review message or reply is obviously necessary otherwise +maintainers will not know that the reviewer has looked at the patch at all! + +Last but not least patch review may become a negative process, focused +on pointing out problems. Please throw in a compliment once in a while, +particularly for newbies! diff --git a/Documentation/process/backporting.rst b/Documentation/process/backporting.rst new file mode 100644 index 0000000000..e1a6ea0a1e --- /dev/null +++ b/Documentation/process/backporting.rst @@ -0,0 +1,604 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=================================== +Backporting and conflict resolution +=================================== + +:Author: Vegard Nossum + +.. contents:: + :local: + :depth: 3 + :backlinks: none + +Introduction +============ + +Some developers may never really have to deal with backporting patches, +merging branches, or resolving conflicts in their day-to-day work, so +when a merge conflict does pop up, it can be daunting. Luckily, +resolving conflicts is a skill like any other, and there are many useful +techniques you can use to make the process smoother and increase your +confidence in the result. + +This document aims to be a comprehensive, step-by-step guide to +backporting and conflict resolution. + +Applying the patch to a tree +============================ + +Sometimes the patch you are backporting already exists as a git commit, +in which case you just cherry-pick it directly using +``git cherry-pick``. However, if the patch comes from an email, as it +often does for the Linux kernel, you will need to apply it to a tree +using ``git am``. + +If you've ever used ``git am``, you probably already know that it is +quite picky about the patch applying perfectly to your source tree. In +fact, you've probably had nightmares about ``.rej`` files and trying to +edit the patch to make it apply. + +It is strongly recommended to instead find an appropriate base version +where the patch applies cleanly and *then* cherry-pick it over to your +destination tree, as this will make git output conflict markers and let +you resolve conflicts with the help of git and any other conflict +resolution tools you might prefer to use. For example, if you want to +apply a patch that just arrived on LKML to an older stable kernel, you +can apply it to the most recent mainline kernel and then cherry-pick it +to your older stable branch. + +It's generally better to use the exact same base as the one the patch +was generated from, but it doesn't really matter that much as long as it +applies cleanly and isn't too far from the original base. The only +problem with applying the patch to the "wrong" base is that it may pull +in more unrelated changes in the context of the diff when cherry-picking +it to the older branch. + +A good reason to prefer ``git cherry-pick`` over ``git am`` is that git +knows the precise history of an existing commit, so it will know when +code has moved around and changed the line numbers; this in turn makes +it less likely to apply the patch to the wrong place (which can result +in silent mistakes or messy conflicts). + +If you are using `b4`_. and you are applying the patch directly from an +email, you can use ``b4 am`` with the options ``-g``/``--guess-base`` +and ``-3``/``--prep-3way`` to do some of this automatically (see the +`b4 presentation`_ for more information). However, the rest of this +article will assume that you are doing a plain ``git cherry-pick``. + +.. _b4: https://people.kernel.org/monsieuricon/introducing-b4-and-patch-attestation +.. _b4 presentation: https://youtu.be/mF10hgVIx9o?t=2996 + +Once you have the patch in git, you can go ahead and cherry-pick it into +your source tree. Don't forget to cherry-pick with ``-x`` if you want a +written record of where the patch came from! + +Note that if you are submiting a patch for stable, the format is +slightly different; the first line after the subject line needs tobe +either:: + + commit upstream + +or:: + + [ Upstream commit ] + +Resolving conflicts +=================== + +Uh-oh; the cherry-pick failed with a vaguely threatening message:: + + CONFLICT (content): Merge conflict + +What to do now? + +In general, conflicts appear when the context of the patch (i.e., the +lines being changed and/or the lines surrounding the changes) doesn't +match what's in the tree you are trying to apply the patch *to*. + +For backports, what likely happened was that the branch you are +backporting from contains patches not in the branch you are backporting +to. However, the reverse is also possible. In any case, the result is a +conflict that needs to be resolved. + +If your attempted cherry-pick fails with a conflict, git automatically +edits the files to include so-called conflict markers showing you where +the conflict is and how the two branches have diverged. Resolving the +conflict typically means editing the end result in such a way that it +takes into account these other commits. + +Resolving the conflict can be done either by hand in a regular text +editor or using a dedicated conflict resolution tool. + +Many people prefer to use their regular text editor and edit the +conflict directly, as it may be easier to understand what you're doing +and to control the final result. There are definitely pros and cons to +each method, and sometimes there's value in using both. + +We will not cover using dedicated merge tools here beyond providing some +pointers to various tools that you could use: + +- `Emacs Ediff mode `__ +- `vimdiff/gvimdiff `__ +- `KDiff3 `__ +- `TortoiseMerge `__ +- `Meld `__ +- `P4Merge `__ +- `Beyond Compare `__ +- `IntelliJ `__ +- `VSCode `__ + +To configure git to work with these, see ``git mergetool --help`` or +the official `git-mergetool documentation`_. + +.. _git-mergetool documentation: https://git-scm.com/docs/git-mergetool + +Prerequisite patches +-------------------- + +Most conflicts happen because the branch you are backporting to is +missing some patches compared to the branch you are backporting *from*. +In the more general case (such as merging two independent branches), +development could have happened on either branch, or the branches have +simply diverged -- perhaps your older branch had some other backports +applied to it that themselves needed conflict resolutions, causing a +divergence. + +It's important to always identify the commit or commits that caused the +conflict, as otherwise you cannot be confident in the correctness of +your resolution. As an added bonus, especially if the patch is in an +area you're not that famliar with, the changelogs of these commits will +often give you the context to understand the code and potential problems +or pitfalls with your conflict resolution. + +git log +~~~~~~~ + +A good first step is to look at ``git log`` for the file that has the +conflict -- this is usually sufficient when there aren't a lot of +patches to the file, but may get confusing if the file is big and +frequently patched. You should run ``git log`` on the range of commits +between your currently checked-out branch (``HEAD``) and the parent of +the patch you are picking (````), i.e.:: + + git log HEAD..^ -- + +Even better, if you want to restrict this output to a single function +(because that's where the conflict appears), you can use the following +syntax:: + + git log -L:'\': HEAD..^ + +.. note:: + The ``\<`` and ``\>`` around the function name ensure that the + matches are anchored on a word boundary. This is important, as this + part is actually a regex and git only follows the first match, so + if you use ``-L:thread_stack:kernel/fork.c`` it may only give you + results for the function ``try_release_thread_stack_to_cache`` even + though there are many other functions in that file containing the + string ``thread_stack`` in their names. + +Another useful option for ``git log`` is ``-G``, which allows you to +filter on certain strings appearing in the diffs of the commits you are +listing:: + + git log -G'regex' HEAD..^ -- + +This can also be a handy way to quickly find when something (e.g. a +function call or a variable) was changed, added, or removed. The search +string is a regular expression, which means you can potentially search +for more specific things like assignments to a specific struct member:: + + git log -G'\->index\>.*=' + +git blame +~~~~~~~~~ + +Another way to find prerequisite commits (albeit only the most recent +one for a given conflict) is to run ``git blame``. In this case, you +need to run it against the parent commit of the patch you are +cherry-picking and the file where the conflict appared, i.e.:: + + git blame ^ -- + +This command also accepts the ``-L`` argument (for restricting the +output to a single function), but in this case you specify the filename +at the end of the command as usual:: + + git blame -L:'\' ^ -- + +Navigate to the place where the conflict occurred. The first column of +the blame output is the commit ID of the patch that added a given line +of code. + +It might be a good idea to ``git show`` these commits and see if they +look like they might be the source of the conflict. Sometimes there will +be more than one of these commits, either because multiple commits +changed different lines of the same conflict area *or* because multiple +subsequent patches changed the same line (or lines) multiple times. In +the latter case, you may have to run ``git blame`` again and specify the +older version of the file to look at in order to dig further back in +the history of the file. + +Prerequisite vs. incidental patches +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Having found the patch that caused the conflict, you need to determine +whether it is a prerequisite for the patch you are backporting or +whether it is just incidental and can be skipped. An incidental patch +would be one that touches the same code as the patch you are +backporting, but does not change the semantics of the code in any +material way. For example, a whitespace cleanup patch is completely +incidental -- likewise, a patch that simply renames a function or a +variable would be incidental as well. On the other hand, if the function +being changed does not even exist in your current branch then this would +not be incidental at all and you need to carefully consider whether the +patch adding the function should be cherry-picked first. + +If you find that there is a necessary prerequisite patch, then you need +to stop and cherry-pick that instead. If you've already resolved some +conflicts in a different file and don't want to do it again, you can +create a temporary copy of that file. + +To abort the current cherry-pick, go ahead and run +``git cherry-pick --abort``, then restart the cherry-picking process +with the commit ID of the prerequisite patch instead. + +Understanding conflict markers +------------------------------ + +Combined diffs +~~~~~~~~~~~~~~ + +Let's say you've decided against picking (or reverting) additional +patches and you just want to resolve the conflict. Git will have +inserted conflict markers into your file. Out of the box, this will look +something like:: + + <<<<<<< HEAD + this is what's in your current tree before cherry-picking + ======= + this is what the patch wants it to be after cherry-picking + >>>>>>> ... title + +This is what you would see if you opened the file in your editor. +However, if you were to run ``git diff`` without any arguments, the +output would look something like this:: + + $ git diff + [...] + ++<<<<<<<< HEAD + +this is what's in your current tree before cherry-picking + ++======== + + this is what the patch wants it to be after cherry-picking + ++>>>>>>>> ... title + +When you are resolving a conflict, the behavior of ``git diff`` differs +from its normal behavior. Notice the two columns of diff markers +instead of the usual one; this is a so-called "`combined diff`_", here +showing the 3-way diff (or diff-of-diffs) between + +#. the current branch (before cherry-picking) and the current working + directory, and +#. the current branch (before cherry-picking) and the file as it looks + after the original patch has been applied. + +.. _combined diff: https://git-scm.com/docs/diff-format#_combined_diff_format + + +Better diffs +~~~~~~~~~~~~ + +3-way combined diffs include all the other changes that happened to the +file between your current branch and the branch you are cherry-picking +from. While this is useful for spotting other changes that you need to +take into account, this also makes the output of ``git diff`` somewhat +intimidating and difficult to read. You may instead prefer to run +``git diff HEAD`` (or ``git diff --ours``) which shows only the diff +between the current branch before cherry-picking and the current working +directory. It looks like this:: + + $ git diff HEAD + [...] + +<<<<<<<< HEAD + this is what's in your current tree before cherry-picking + +======== + +this is what the patch wants it to be after cherry-picking + +>>>>>>>> ... title + +As you can see, this reads just like any other diff and makes it clear +which lines are in the current branch and which lines are being added +because they are part of the merge conflict or the patch being +cherry-picked. + +Merge styles and diff3 +~~~~~~~~~~~~~~~~~~~~~~ + +The default conflict marker style shown above is known as the ``merge`` +style. There is also another style available, known as the ``diff3`` +style, which looks like this:: + + <<<<<<< HEAD + this is what is in your current tree before cherry-picking + ||||||| parent of (title) + this is what the patch expected to find there + ======= + this is what the patch wants it to be after being applied + >>>>>>> (title) + +As you can see, this has 3 parts instead of 2, and includes what git +expected to find there but didn't. It is *highly recommended* to use +this conflict style as it makes it much clearer what the patch actually +changed; i.e., it allows you to compare the before-and-after versions +of the file for the commit you are cherry-picking. This allows you to +make better decisions about how to resolve the conflict. + +To change conflict marker styles, you can use the following command:: + + git config merge.conflictStyle diff3 + +There is a third option, ``zdiff3``, introduced in `Git 2.35`_, +which has the same 3 sections as ``diff3``, but where common lines have +been trimmed off, making the conflict area smaller in some cases. + +.. _Git 2.35: https://github.blog/2022-01-24-highlights-from-git-2-35/ + +Iterating on conflict resolutions +--------------------------------- + +The first step in any conflict resolution process is to understand the +patch you are backporting. For the Linux kernel this is especially +important, since an incorrect change can lead to the whole system +crashing -- or worse, an undetected security vulnerability. + +Understanding the patch can be easy or difficult depending on the patch +itself, the changelog, and your familiarity with the code being changed. +However, a good question for every change (or every hunk of the patch) +might be: "Why is this hunk in the patch?" The answers to these +questions will inform your conflict resolution. + +Resolution process +~~~~~~~~~~~~~~~~~~ + +Sometimes the easiest thing to do is to just remove all but the first +part of the conflict, leaving the file essentially unchanged, and apply +the changes by hand. Perhaps the patch is changing a function call +argument from ``0`` to ``1`` while a conflicting change added an +entirely new (and insignificant) parameter to the end of the parameter +list; in that case, it's easy enough to change the argument from ``0`` +to ``1`` by hand and leave the rest of the arguments alone. This +technique of manually applying changes is mostly useful if the conflict +pulled in a lot of unrelated context that you don't really need to care +about. + +For particularly nasty conflicts with many conflict markers, you can use +``git add`` or ``git add -i`` to selectively stage your resolutions to +get them out of the way; this also lets you use ``git diff HEAD`` to +always see what remains to be resolved or ``git diff --cached`` to see +what your patch looks like so far. + +Dealing with file renames +~~~~~~~~~~~~~~~~~~~~~~~~~ + +One of the most annoying things that can happen while backporting a +patch is discovering that one of the files being patched has been +renamed, as that typically means git won't even put in conflict markers, +but will just throw up its hands and say (paraphrased): "Unmerged path! +You do the work..." + +There are generally a few ways to deal with this. If the patch to the +renamed file is small, like a one-line change, the easiest thing is to +just go ahead and apply the change by hand and be done with it. On the +other hand, if the change is big or complicated, you definitely don't +want to do it by hand. + +As a first pass, you can try something like this, which will lower the +rename detection threshold to 30% (by default, git uses 50%, meaning +that two files need to have at least 50% in common for it to consider +an add-delete pair to be a potential rename):: + + git cherry-pick -strategy=recursive -Xrename-threshold=30 + +Sometimes the right thing to do will be to also backport the patch that +did the rename, but that's definitely not the most common case. Instead, +what you can do is to temporarily rename the file in the branch you're +backporting to (using ``git mv`` and committing the result), restart the +attempt to cherry-pick the patch, rename the file back (``git mv`` and +committing again), and finally squash the result using ``git rebase -i`` +(see the `rebase tutorial`_) so it appears as a single commit when you +are done. + +.. _rebase tutorial: https://medium.com/@slamflipstrom/a-beginners-guide-to-squashing-commits-with-git-rebase-8185cf6e62ec + +Gotchas +------- + +Function arguments +~~~~~~~~~~~~~~~~~~ + +Pay attention to changing function arguments! It's easy to gloss over +details and think that two lines are the same but actually they differ +in some small detail like which variable was passed as an argument +(especially if the two variables are both a single character that look +the same, like i and j). + +Error handling +~~~~~~~~~~~~~~ + +If you cherry-pick a patch that includes a ``goto`` statement (typically +for error handling), it is absolutely imperative to double check that +the target label is still correct in the branch you are backporting to. +The same goes for added ``return``, ``break``, and ``continue`` +statements. + +Error handling is typically located at the bottom of the function, so it +may not be part of the conflict even though could have been changed by +other patches. + +A good way to ensure that you review the error paths is to always use +``git diff -W`` and ``git show -W`` (AKA ``--function-context``) when +inspecting your changes. For C code, this will show you the whole +function that's being changed in a patch. One of the things that often +go wrong during backports is that something else in the function changed +on either of the branches that you're backporting from or to. By +including the whole function in the diff you get more context and can +more easily spot problems that might otherwise go unnoticed. + +Refactored code +~~~~~~~~~~~~~~~ + +Something that happens quite often is that code gets refactored by +"factoring out" a common code sequence or pattern into a helper +function. When backporting patches to an area where such a refactoring +has taken place, you effectively need to do the reverse when +backporting: a patch to a single location may need to be applied to +multiple locations in the backported version. (One giveaway for this +scenario is that a function was renamed -- but that's not always the +case.) + +To avoid incomplete backports, it's worth trying to figure out if the +patch fixes a bug that appears in more than one place. One way to do +this would be to use ``git grep``. (This is actually a good idea to do +in general, not just for backports.) If you do find that the same kind +of fix would apply to other places, it's also worth seeing if those +places exist upstream -- if they don't, it's likely the patch may need +to be adjusted. ``git log`` is your friend to figure out what happened +to these areas as ``git blame`` won't show you code that has been +removed. + +If you do find other instances of the same pattern in the upstream tree +and you're not sure whether it's also a bug, it may be worth asking the +patch author. It's not uncommon to find new bugs during backporting! + +Verifying the result +==================== + +colordiff +--------- + +Having committed a conflict-free new patch, you can now compare your +patch to the original patch. It is highly recommended that you use a +tool such as `colordiff`_ that can show two files side by side and color +them according to the changes between them:: + + colordiff -yw -W 200 <(git diff -W ^-) <(git diff -W HEAD^-) | less -SR + +.. _colordiff: https://www.colordiff.org/ + +Here, ``-y`` means to do a side-by-side comparison; ``-w`` ignores +whitespace, and ``-W 200`` sets the width of the output (as otherwise it +will use 130 by default, which is often a bit too little). + +The ``rev^-`` syntax is a handy shorthand for ``rev^..rev``, essentially +giving you just the diff for that single commit; also see +the official `git rev-parse documentation`_. + +.. _git rev-parse documentation: https://git-scm.com/docs/git-rev-parse#_other_rev_parent_shorthand_notations + +Again, note the inclusion of ``-W`` for ``git diff``; this ensures that +you will see the full function for any function that has changed. + +One incredibly important thing that colordiff does is to highlight lines +that are different. For example, if an error-handling ``goto`` has +changed labels between the original and backported patch, colordiff will +show these side-by-side but highlighted in a different color. Thus, it +is easy to see that the two ``goto`` statements are jumping to different +labels. Likewise, lines that were not modified by either patch but +differ in the context will also be highlighted and thus stand out during +a manual inspection. + +Of course, this is just a visual inspection; the real test is building +and running the patched kernel (or program). + +Build testing +------------- + +We won't cover runtime testing here, but it can be a good idea to build +just the files touched by the patch as a quick sanity check. For the +Linux kernel you can build single files like this, assuming you have the +``.config`` and build environment set up correctly:: + + make path/to/file.o + +Note that this won't discover linker errors, so you should still do a +full build after verifying that the single file compiles. By compiling +the single file first you can avoid having to wait for a full build *in +case* there are compiler errors in any of the files you've changed. + +Runtime testing +--------------- + +Even a successful build or boot test is not necessarily enough to rule +out a missing dependency somewhere. Even though the chances are small, +there could be code changes where two independent changes to the same +file result in no conflicts, no compile-time errors, and runtime errors +only in exceptional cases. + +One concrete example of this was a pair of patches to the system call +entry code where the first patch saved/restored a register and a later +patch made use of the same register somewhere in the middle of this +sequence. Since there was no overlap between the changes, one could +cherry-pick the second patch, have no conflicts, and believe that +everything was fine, when in fact the code was now scribbling over an +unsaved register. + +Although the vast majority of errors will be caught during compilation +or by superficially exercising the code, the only way to *really* verify +a backport is to review the final patch with the same level of scrutiny +as you would (or should) give to any other patch. Having unit tests and +regression tests or other types of automatic testing can help increase +the confidence in the correctness of a backport. + +Submitting backports to stable +============================== + +As the stable maintainers try to cherry-pick mainline fixes onto their +stable kernels, they may send out emails asking for backports when when +encountering conflicts, see e.g. +. +These emails typically include the exact steps you need to cherry-pick +the patch to the correct tree and submit the patch. + +One thing to make sure is that your changelog conforms to the expected +format:: + + + + [ Upstream commit ] + + + [ ] + Signed-off-by: + +The "Upstream commit" line is sometimes slightly different depending on +the stable version. Older version used this format:: + + commit upstream. + +It is most common to indicate the kernel version the patch applies to +in the email subject line (using e.g. +``git send-email --subject-prefix='PATCH 6.1.y'``), but you can also put +it in the Signed-off-by:-area or below the ``---`` line. + +The stable maintainers expect separate submissions for each active +stable version, and each submission should also be tested separately. + +A few final words of advice +=========================== + +1) Approach the backporting process with humility. +2) Understand the patch you are backporting; this means reading both + the changelog and the code. +3) Be honest about your confidence in the result when submitting the + patch. +4) Ask relevant maintainers for explicit acks. + +Examples +======== + +The above shows roughly the idealized process of backporting a patch. +For a more concrete example, see this video tutorial where two patches +are backported from mainline to stable: +`Backporting Linux Kernel Patches`_. + +.. _Backporting Linux Kernel Patches: https://youtu.be/sBR7R1V2FeA diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst index b48da698d6..bb96ca0f77 100644 --- a/Documentation/process/changes.rst +++ b/Documentation/process/changes.rst @@ -31,7 +31,7 @@ you probably needn't concern yourself with pcmciautils. ====================== =============== ======================================== GNU C 5.1 gcc --version Clang/LLVM (optional) 11.0.0 clang --version -Rust (optional) 1.71.1 rustc --version +Rust (optional) 1.73.0 rustc --version bindgen (optional) 0.65.1 bindgen --version GNU make 3.82 make --version bash 4.2 bash --version diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst index b501cd9770..a1daa309b5 100644 --- a/Documentation/process/index.rst +++ b/Documentation/process/index.rst @@ -66,12 +66,13 @@ lack of a better place. :maxdepth: 1 applying-patches + backporting adding-syscalls magic-number volatile-considered-harmful botching-up-ioctls clang-format - ../riscv/patch-acceptance + ../arch/riscv/patch-acceptance ../core-api/unaligned-memory-access .. only:: subproject and html diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst index 09dcf6377c..84ee60fcee 100644 --- a/Documentation/process/maintainer-netdev.rst +++ b/Documentation/process/maintainer-netdev.rst @@ -193,9 +193,23 @@ Review timelines Generally speaking, the patches get triaged quickly (in less than 48h). But be patient, if your patch is active in patchwork (i.e. it's listed on the project's patch list) the chances it was missed are close to zero. -Asking the maintainer for status updates on your -patch is a good way to ensure your patch is ignored or pushed to the -bottom of the priority list. + +The high volume of development on netdev makes reviewers move on +from discussions relatively quickly. New comments and replies +are very unlikely to arrive after a week of silence. If a patch +is no longer active in patchwork and the thread went idle for more +than a week - clarify the next steps and/or post the next version. + +For RFC postings specifically, if nobody responded in a week - reviewers +either missed the posting or have no strong opinions. If the code is ready, +repost as a PATCH. + +Emails saying just "ping" or "bump" are considered rude. If you can't figure +out the status of the patch from patchwork or where the discussion has +landed - describe your best guess and ask if it's correct. For example:: + + I don't understand what the next steps are. Person X seems to be unhappy + with A, should I do B and repost the patches? .. _Changes requested: @@ -441,6 +455,21 @@ in a way which would break what would normally be considered uAPI. new ``netdevsim`` features must be accompanied by selftests under ``tools/testing/selftests/``. +Reviewer guidance +----------------- + +Reviewing other people's patches on the list is highly encouraged, +regardless of the level of expertise. For general guidance and +helpful tips please see :ref:`development_advancedtopics_reviews`. + +It's safe to assume that netdev maintainers know the community and the level +of expertise of the reviewers. The reviewers should not be concerned about +their comments impeding or derailing the patch flow. + +Less experienced reviewers are highly encouraged to do more in-depth +review of submissions and not focus exclusively on trivial or subjective +matters like code formatting, tags etc. + Testimonials / feedback ----------------------- diff --git a/Documentation/process/security-bugs.rst b/Documentation/process/security-bugs.rst index 5a6993795b..692a3ba56c 100644 --- a/Documentation/process/security-bugs.rst +++ b/Documentation/process/security-bugs.rst @@ -66,15 +66,32 @@ lifted, in perpetuity. Coordination with other groups ------------------------------ -The kernel security team strongly recommends that reporters of potential -security issues NEVER contact the "linux-distros" mailing list until -AFTER discussing it with the kernel security team. Do not Cc: both -lists at once. You may contact the linux-distros mailing list after a -fix has been agreed on and you fully understand the requirements that -doing so will impose on you and the kernel community. - -The different lists have different goals and the linux-distros rules do -not contribute to actually fixing any potential security problems. +While the kernel security team solely focuses on getting bugs fixed, +other groups focus on fixing issues in distros and coordinating +disclosure between operating system vendors. Coordination is usually +handled by the "linux-distros" mailing list and disclosure by the +public "oss-security" mailing list, both of which are closely related +and presented in the linux-distros wiki: + + +Please note that the respective policies and rules are different since +the 3 lists pursue different goals. Coordinating between the kernel +security team and other teams is difficult since for the kernel security +team occasional embargoes (as subject to a maximum allowed number of +days) start from the availability of a fix, while for "linux-distros" +they start from the initial post to the list regardless of the +availability of a fix. + +As such, the kernel security team strongly recommends that as a reporter +of a potential security issue you DO NOT contact the "linux-distros" +mailing list UNTIL a fix is accepted by the affected code's maintainers +and you have read the distros wiki page above and you fully understand +the requirements that contacting "linux-distros" will impose on you and +the kernel community. This also means that in general it doesn't make +sense to Cc: both lists at once, except maybe for coordination if and +while an accepted fix has not yet been merged. In other words, until a +fix is accepted do not Cc: "linux-distros", and after it's merged do not +Cc: the kernel security team. CVE assignment -------------- diff --git a/Documentation/process/stable-kernel-rules.rst b/Documentation/process/stable-kernel-rules.rst index 41f1e07abf..1704f1c686 100644 --- a/Documentation/process/stable-kernel-rules.rst +++ b/Documentation/process/stable-kernel-rules.rst @@ -101,6 +101,19 @@ comment: git cherry-pick fd21073 git cherry-pick + Note that for a patch series, you do not have to list as prerequisites the + patches present in the series itself. For example, if you have the following + patch series: + + .. code-block:: none + + patch1 + patch2 + + where patch2 depends on patch1, you do not have to list patch1 as + prerequisite of patch2 if you have already marked patch1 for stable + inclusion. + * For patches that may have kernel version prerequisites specify them using the following format in the sign-off area: diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst index efac910e26..86d346bcb8 100644 --- a/Documentation/process/submitting-patches.rst +++ b/Documentation/process/submitting-patches.rst @@ -327,6 +327,8 @@ politely and address the problems they have pointed out. When sending a next version, add a ``patch changelog`` to the cover letter or to individual patches explaining difference against previous submission (see :ref:`the_canonical_patch_format`). +Notify people that commented on your patch about new versions by adding them to +the patches CC list. See Documentation/process/email-clients.rst for recommendations on email clients and mailing list etiquette. @@ -366,10 +368,10 @@ busy people and may not get to your patch right away. Once upon a time, patches used to disappear into the void without comment, but the development process works more smoothly than that now. You should -receive comments within a week or so; if that does not happen, make sure -that you have sent your patches to the right place. Wait for a minimum of -one week before resubmitting or pinging reviewers - possibly longer during -busy times like merge windows. +receive comments within a few weeks (typically 2-3); if that does not +happen, make sure that you have sent your patches to the right place. +Wait for a minimum of one week before resubmitting or pinging reviewers +- possibly longer during busy times like merge windows. It's also ok to resend the patch or the patch series after a couple of weeks with the word "RESEND" added to the subject line:: diff --git a/Documentation/riscv/acpi.rst b/Documentation/riscv/acpi.rst deleted file mode 100644 index 9870a28281..0000000000 --- a/Documentation/riscv/acpi.rst +++ /dev/null @@ -1,10 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -============== -ACPI on RISC-V -============== - -The ISA string parsing rules for ACPI are defined by `Version ASCIIDOC -Conversion, 12/2022 of the RISC-V specifications, as defined by tag -"riscv-isa-release-1239329-2023-05-23" (commit 1239329 -) `_ diff --git a/Documentation/riscv/boot-image-header.rst b/Documentation/riscv/boot-image-header.rst deleted file mode 100644 index df2ffc173e..0000000000 --- a/Documentation/riscv/boot-image-header.rst +++ /dev/null @@ -1,59 +0,0 @@ -================================= -Boot image header in RISC-V Linux -================================= - -:Author: Atish Patra -:Date: 20 May 2019 - -This document only describes the boot image header details for RISC-V Linux. - -The following 64-byte header is present in decompressed Linux kernel image:: - - u32 code0; /* Executable code */ - u32 code1; /* Executable code */ - u64 text_offset; /* Image load offset, little endian */ - u64 image_size; /* Effective Image size, little endian */ - u64 flags; /* kernel flags, little endian */ - u32 version; /* Version of this header */ - u32 res1 = 0; /* Reserved */ - u64 res2 = 0; /* Reserved */ - u64 magic = 0x5643534952; /* Magic number, little endian, "RISCV" */ - u32 magic2 = 0x05435352; /* Magic number 2, little endian, "RSC\x05" */ - u32 res3; /* Reserved for PE COFF offset */ - -This header format is compliant with PE/COFF header and largely inspired from -ARM64 header. Thus, both ARM64 & RISC-V header can be combined into one common -header in future. - -Notes -===== - -- This header is also reused to support EFI stub for RISC-V. EFI specification - needs PE/COFF image header in the beginning of the kernel image in order to - load it as an EFI application. In order to support EFI stub, code0 is replaced - with "MZ" magic string and res3(at offset 0x3c) points to the rest of the - PE/COFF header. - -- version field indicate header version number - - ========== ============= - Bits 0:15 Minor version - Bits 16:31 Major version - ========== ============= - - This preserves compatibility across newer and older version of the header. - The current version is defined as 0.2. - -- The "magic" field is deprecated as of version 0.2. In a future - release, it may be removed. This originally should have matched up - with the ARM64 header "magic" field, but unfortunately does not. - The "magic2" field replaces it, matching up with the ARM64 header. - -- In current header, the flags field has only one field. - - ===== ==================================== - Bit 0 Kernel endianness. 1 if BE, 0 if LE. - ===== ==================================== - -- Image size is mandatory for boot loader to load kernel image. Booting will - fail otherwise. diff --git a/Documentation/riscv/boot.rst b/Documentation/riscv/boot.rst deleted file mode 100644 index 6077b587a8..0000000000 --- a/Documentation/riscv/boot.rst +++ /dev/null @@ -1,169 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -=============================================== -RISC-V Kernel Boot Requirements and Constraints -=============================================== - -:Author: Alexandre Ghiti -:Date: 23 May 2023 - -This document describes what the RISC-V kernel expects from bootloaders and -firmware, and also the constraints that any developer must have in mind when -touching the early boot process. For the purposes of this document, the -``early boot process`` refers to any code that runs before the final virtual -mapping is set up. - -Pre-kernel Requirements and Constraints -======================================= - -The RISC-V kernel expects the following of bootloaders and platform firmware: - -Register state --------------- - -The RISC-V kernel expects: - - * ``$a0`` to contain the hartid of the current core. - * ``$a1`` to contain the address of the devicetree in memory. - -CSR state ---------- - -The RISC-V kernel expects: - - * ``$satp = 0``: the MMU, if present, must be disabled. - -Reserved memory for resident firmware -------------------------------------- - -The RISC-V kernel must not map any resident memory, or memory protected with -PMPs, in the direct mapping, so the firmware must correctly mark those regions -as per the devicetree specification and/or the UEFI specification. - -Kernel location ---------------- - -The RISC-V kernel expects to be placed at a PMD boundary (2MB aligned for rv64 -and 4MB aligned for rv32). Note that the EFI stub will physically relocate the -kernel if that's not the case. - -Hardware description --------------------- - -The firmware can pass either a devicetree or ACPI tables to the RISC-V kernel. - -The devicetree is either passed directly to the kernel from the previous stage -using the ``$a1`` register, or when booting with UEFI, it can be passed using the -EFI configuration table. - -The ACPI tables are passed to the kernel using the EFI configuration table. In -this case, a tiny devicetree is still created by the EFI stub. Please refer to -"EFI stub and devicetree" section below for details about this devicetree. - -Kernel entry ------------- - -On SMP systems, there are 2 methods to enter the kernel: - -- ``RISCV_BOOT_SPINWAIT``: the firmware releases all harts in the kernel, one hart - wins a lottery and executes the early boot code while the other harts are - parked waiting for the initialization to finish. This method is mostly used to - support older firmwares without SBI HSM extension and M-mode RISC-V kernel. -- ``Ordered booting``: the firmware releases only one hart that will execute the - initialization phase and then will start all other harts using the SBI HSM - extension. The ordered booting method is the preferred booting method for - booting the RISC-V kernel because it can support CPU hotplug and kexec. - -UEFI ----- - -UEFI memory map -~~~~~~~~~~~~~~~ - -When booting with UEFI, the RISC-V kernel will use only the EFI memory map to -populate the system memory. - -The UEFI firmware must parse the subnodes of the ``/reserved-memory`` devicetree -node and abide by the devicetree specification to convert the attributes of -those subnodes (``no-map`` and ``reusable``) into their correct EFI equivalent -(refer to section "3.5.4 /reserved-memory and UEFI" of the devicetree -specification v0.4-rc1). - -RISCV_EFI_BOOT_PROTOCOL -~~~~~~~~~~~~~~~~~~~~~~~ - -When booting with UEFI, the EFI stub requires the boot hartid in order to pass -it to the RISC-V kernel in ``$a1``. The EFI stub retrieves the boot hartid using -one of the following methods: - -- ``RISCV_EFI_BOOT_PROTOCOL`` (**preferred**). -- ``boot-hartid`` devicetree subnode (**deprecated**). - -Any new firmware must implement ``RISCV_EFI_BOOT_PROTOCOL`` as the devicetree -based approach is deprecated now. - -Early Boot Requirements and Constraints -======================================= - -The RISC-V kernel's early boot process operates under the following constraints: - -EFI stub and devicetree ------------------------ - -When booting with UEFI, the devicetree is supplemented (or created) by the EFI -stub with the same parameters as arm64 which are described at the paragraph -"UEFI kernel support on ARM" in Documentation/arch/arm/uefi.rst. - -Virtual mapping installation ----------------------------- - -The installation of the virtual mapping is done in 2 steps in the RISC-V kernel: - -1. ``setup_vm()`` installs a temporary kernel mapping in ``early_pg_dir`` which - allows discovery of the system memory. Only the kernel text/data are mapped - at this point. When establishing this mapping, no allocation can be done - (since the system memory is not known yet), so ``early_pg_dir`` page table is - statically allocated (using only one table for each level). - -2. ``setup_vm_final()`` creates the final kernel mapping in ``swapper_pg_dir`` - and takes advantage of the discovered system memory to create the linear - mapping. When establishing this mapping, the kernel can allocate memory but - cannot access it directly (since the direct mapping is not present yet), so - it uses temporary mappings in the fixmap region to be able to access the - newly allocated page table levels. - -For ``virt_to_phys()`` and ``phys_to_virt()`` to be able to correctly convert -direct mapping addresses to physical addresses, they need to know the start of -the DRAM. This happens after step 1, right before step 2 installs the direct -mapping (see ``setup_bootmem()`` function in arch/riscv/mm/init.c). Any usage of -those macros before the final virtual mapping is installed must be carefully -examined. - -Devicetree mapping via fixmap ------------------------------ - -As the ``reserved_mem`` array is initialized with virtual addresses established -by ``setup_vm()``, and used with the mapping established by -``setup_vm_final()``, the RISC-V kernel uses the fixmap region to map the -devicetree. This ensures that the devicetree remains accessible by both virtual -mappings. - -Pre-MMU execution ------------------ - -A few pieces of code need to run before even the first virtual mapping is -established. These are the installation of the first virtual mapping itself, -patching of early alternatives and the early parsing of the kernel command line. -That code must be very carefully compiled as: - -- ``-fno-pie``: This is needed for relocatable kernels which use ``-fPIE``, - since otherwise, any access to a global symbol would go through the GOT which - is only relocated virtually. -- ``-mcmodel=medany``: Any access to a global symbol must be PC-relative to - avoid any relocations to happen before the MMU is setup. -- *all* instrumentation must also be disabled (that includes KASAN, ftrace and - others). - -As using a symbol from a different compilation unit requires this unit to be -compiled with those flags, we advise, as much as possible, not to use external -symbols. diff --git a/Documentation/riscv/features.rst b/Documentation/riscv/features.rst deleted file mode 100644 index 36e90144ad..0000000000 --- a/Documentation/riscv/features.rst +++ /dev/null @@ -1,3 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -.. kernel-feat:: features riscv diff --git a/Documentation/riscv/hwprobe.rst b/Documentation/riscv/hwprobe.rst deleted file mode 100644 index a52996b22f..0000000000 --- a/Documentation/riscv/hwprobe.rst +++ /dev/null @@ -1,98 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -RISC-V Hardware Probing Interface ---------------------------------- - -The RISC-V hardware probing interface is based around a single syscall, which -is defined in :: - - struct riscv_hwprobe { - __s64 key; - __u64 value; - }; - - long sys_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count, - size_t cpu_count, cpu_set_t *cpus, - unsigned int flags); - -The arguments are split into three groups: an array of key-value pairs, a CPU -set, and some flags. The key-value pairs are supplied with a count. Userspace -must prepopulate the key field for each element, and the kernel will fill in the -value if the key is recognized. If a key is unknown to the kernel, its key field -will be cleared to -1, and its value set to 0. The CPU set is defined by -CPU_SET(3). For value-like keys (eg. vendor/arch/impl), the returned value will -be only be valid if all CPUs in the given set have the same value. Otherwise -1 -will be returned. For boolean-like keys, the value returned will be a logical -AND of the values for the specified CPUs. Usermode can supply NULL for cpus and -0 for cpu_count as a shortcut for all online CPUs. There are currently no flags, -this value must be zero for future compatibility. - -On success 0 is returned, on failure a negative error code is returned. - -The following keys are defined: - -* :c:macro:`RISCV_HWPROBE_KEY_MVENDORID`: Contains the value of ``mvendorid``, - as defined by the RISC-V privileged architecture specification. - -* :c:macro:`RISCV_HWPROBE_KEY_MARCHID`: Contains the value of ``marchid``, as - defined by the RISC-V privileged architecture specification. - -* :c:macro:`RISCV_HWPROBE_KEY_MIMPLID`: Contains the value of ``mimplid``, as - defined by the RISC-V privileged architecture specification. - -* :c:macro:`RISCV_HWPROBE_KEY_BASE_BEHAVIOR`: A bitmask containing the base - user-visible behavior that this kernel supports. The following base user ABIs - are defined: - - * :c:macro:`RISCV_HWPROBE_BASE_BEHAVIOR_IMA`: Support for rv32ima or - rv64ima, as defined by version 2.2 of the user ISA and version 1.10 of the - privileged ISA, with the following known exceptions (more exceptions may be - added, but only if it can be demonstrated that the user ABI is not broken): - - * The ``fence.i`` instruction cannot be directly executed by userspace - programs (it may still be executed in userspace via a - kernel-controlled mechanism such as the vDSO). - -* :c:macro:`RISCV_HWPROBE_KEY_IMA_EXT_0`: A bitmask containing the extensions - that are compatible with the :c:macro:`RISCV_HWPROBE_BASE_BEHAVIOR_IMA`: - base system behavior. - - * :c:macro:`RISCV_HWPROBE_IMA_FD`: The F and D extensions are supported, as - defined by commit cd20cee ("FMIN/FMAX now implement - minimumNumber/maximumNumber, not minNum/maxNum") of the RISC-V ISA manual. - - * :c:macro:`RISCV_HWPROBE_IMA_C`: The C extension is supported, as defined - by version 2.2 of the RISC-V ISA manual. - - * :c:macro:`RISCV_HWPROBE_IMA_V`: The V extension is supported, as defined by - version 1.0 of the RISC-V Vector extension manual. - - * :c:macro:`RISCV_HWPROBE_EXT_ZBA`: The Zba address generation extension is - supported, as defined in version 1.0 of the Bit-Manipulation ISA - extensions. - - * :c:macro:`RISCV_HWPROBE_EXT_ZBB`: The Zbb extension is supported, as defined - in version 1.0 of the Bit-Manipulation ISA extensions. - - * :c:macro:`RISCV_HWPROBE_EXT_ZBS`: The Zbs extension is supported, as defined - in version 1.0 of the Bit-Manipulation ISA extensions. - -* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance - information about the selected set of processors. - - * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNKNOWN`: The performance of misaligned - accesses is unknown. - - * :c:macro:`RISCV_HWPROBE_MISALIGNED_EMULATED`: Misaligned accesses are - emulated via software, either in or below the kernel. These accesses are - always extremely slow. - - * :c:macro:`RISCV_HWPROBE_MISALIGNED_SLOW`: Misaligned accesses are slower - than equivalent byte accesses. Misaligned accesses may be supported - directly in hardware, or trapped and emulated by software. - - * :c:macro:`RISCV_HWPROBE_MISALIGNED_FAST`: Misaligned accesses are faster - than equivalent byte accesses. - - * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNSUPPORTED`: Misaligned accesses are - not supported at all and will generate a misaligned address fault. diff --git a/Documentation/riscv/index.rst b/Documentation/riscv/index.rst deleted file mode 100644 index 4dab0cb4b9..0000000000 --- a/Documentation/riscv/index.rst +++ /dev/null @@ -1,24 +0,0 @@ -=================== -RISC-V architecture -=================== - -.. toctree:: - :maxdepth: 1 - - acpi - boot - boot-image-header - vm-layout - hwprobe - patch-acceptance - uabi - vector - - features - -.. only:: subproject and html - - Indices - ======= - - * :ref:`genindex` diff --git a/Documentation/riscv/patch-acceptance.rst b/Documentation/riscv/patch-acceptance.rst deleted file mode 100644 index 634aa222b4..0000000000 --- a/Documentation/riscv/patch-acceptance.rst +++ /dev/null @@ -1,59 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -arch/riscv maintenance guidelines for developers -================================================ - -Overview --------- -The RISC-V instruction set architecture is developed in the open: -in-progress drafts are available for all to review and to experiment -with implementations. New module or extension drafts can change -during the development process - sometimes in ways that are -incompatible with previous drafts. This flexibility can present a -challenge for RISC-V Linux maintenance. Linux maintainers disapprove -of churn, and the Linux development process prefers well-reviewed and -tested code over experimental code. We wish to extend these same -principles to the RISC-V-related code that will be accepted for -inclusion in the kernel. - -Patchwork ---------- - -RISC-V has a patchwork instance, where the status of patches can be checked: - - https://patchwork.kernel.org/project/linux-riscv/list/ - -If your patch does not appear in the default view, the RISC-V maintainers have -likely either requested changes, or expect it to be applied to another tree. - -Automation runs against this patchwork instance, building/testing patches as -they arrive. The automation applies patches against the current HEAD of the -RISC-V `for-next` and `fixes` branches, depending on whether the patch has been -detected as a fix. Failing those, it will use the RISC-V `master` branch. -The exact commit to which a series has been applied will be noted on patchwork. -Patches for which any of the checks fail are unlikely to be applied and in most -cases will need to be resubmitted. - -Submit Checklist Addendum -------------------------- -We'll only accept patches for new modules or extensions if the -specifications for those modules or extensions are listed as being -unlikely to be incompatibly changed in the future. For -specifications from the RISC-V foundation this means "Frozen" or -"Ratified", for the UEFI forum specifications this means a published -ECR. (Developers may, of course, maintain their own Linux kernel trees -that contain code for any draft extensions that they wish.) - -Additionally, the RISC-V specification allows implementers to create -their own custom extensions. These custom extensions aren't required -to go through any review or ratification process by the RISC-V -Foundation. To avoid the maintenance complexity and potential -performance impact of adding kernel code for implementor-specific -RISC-V extensions, we'll only consider patches for extensions that either: - -- Have been officially frozen or ratified by the RISC-V Foundation, or -- Have been implemented in hardware that is widely available, per standard - Linux practice. - -(Implementers, may, of course, maintain their own Linux kernel trees containing -code for any custom extensions that they wish.) diff --git a/Documentation/riscv/uabi.rst b/Documentation/riscv/uabi.rst deleted file mode 100644 index 8960fac42c..0000000000 --- a/Documentation/riscv/uabi.rst +++ /dev/null @@ -1,48 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -RISC-V Linux User ABI -===================== - -ISA string ordering in /proc/cpuinfo ------------------------------------- - -The canonical order of ISA extension names in the ISA string is defined in -chapter 27 of the unprivileged specification. -The specification uses vague wording, such as should, when it comes to ordering, -so for our purposes the following rules apply: - -#. Single-letter extensions come first, in canonical order. - The canonical order is "IMAFDQLCBKJTPVH". - -#. All multi-letter extensions will be separated from other extensions by an - underscore. - -#. Additional standard extensions (starting with 'Z') will be sorted after - single-letter extensions and before any higher-privileged extensions. - -#. For additional standard extensions, the first letter following the 'Z' - conventionally indicates the most closely related alphabetical - extension category. If multiple 'Z' extensions are named, they will be - ordered first by category, in canonical order, as listed above, then - alphabetically within a category. - -#. Standard supervisor-level extensions (starting with 'S') will be listed - after standard unprivileged extensions. If multiple supervisor-level - extensions are listed, they will be ordered alphabetically. - -#. Standard machine-level extensions (starting with 'Zxm') will be listed - after any lower-privileged, standard extensions. If multiple machine-level - extensions are listed, they will be ordered alphabetically. - -#. Non-standard extensions (starting with 'X') will be listed after all standard - extensions. If multiple non-standard extensions are listed, they will be - ordered alphabetically. - -An example string following the order is:: - - rv64imadc_zifoo_zigoo_zafoo_sbar_scar_zxmbaz_xqux_xrux - -Misaligned accesses -------------------- - -Misaligned accesses are supported in userspace, but they may perform poorly. diff --git a/Documentation/riscv/vector.rst b/Documentation/riscv/vector.rst deleted file mode 100644 index 75dd88a62e..0000000000 --- a/Documentation/riscv/vector.rst +++ /dev/null @@ -1,140 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -========================================= -Vector Extension Support for RISC-V Linux -========================================= - -This document briefly outlines the interface provided to userspace by Linux in -order to support the use of the RISC-V Vector Extension. - -1. prctl() Interface ---------------------- - -Two new prctl() calls are added to allow programs to manage the enablement -status for the use of Vector in userspace. The intended usage guideline for -these interfaces is to give init systems a way to modify the availability of V -for processes running under its domain. Calling these interfaces is not -recommended in libraries routines because libraries should not override policies -configured from the parant process. Also, users must noted that these interfaces -are not portable to non-Linux, nor non-RISC-V environments, so it is discourage -to use in a portable code. To get the availability of V in an ELF program, -please read :c:macro:`COMPAT_HWCAP_ISA_V` bit of :c:macro:`ELF_HWCAP` in the -auxiliary vector. - -* prctl(PR_RISCV_V_SET_CONTROL, unsigned long arg) - - Sets the Vector enablement status of the calling thread, where the control - argument consists of two 2-bit enablement statuses and a bit for inheritance - mode. Other threads of the calling process are unaffected. - - Enablement status is a tri-state value each occupying 2-bit of space in - the control argument: - - * :c:macro:`PR_RISCV_V_VSTATE_CTRL_DEFAULT`: Use the system-wide default - enablement status on execve(). The system-wide default setting can be - controlled via sysctl interface (see sysctl section below). - - * :c:macro:`PR_RISCV_V_VSTATE_CTRL_ON`: Allow Vector to be run for the - thread. - - * :c:macro:`PR_RISCV_V_VSTATE_CTRL_OFF`: Disallow Vector. Executing Vector - instructions under such condition will trap and casuse the termination of the thread. - - arg: The control argument is a 5-bit value consisting of 3 parts, and - accessed by 3 masks respectively. - - The 3 masks, PR_RISCV_V_VSTATE_CTRL_CUR_MASK, - PR_RISCV_V_VSTATE_CTRL_NEXT_MASK, and PR_RISCV_V_VSTATE_CTRL_INHERIT - represents bit[1:0], bit[3:2], and bit[4]. bit[1:0] accounts for the - enablement status of current thread, and the setting at bit[3:2] takes place - at next execve(). bit[4] defines the inheritance mode of the setting in - bit[3:2]. - - * :c:macro:`PR_RISCV_V_VSTATE_CTRL_CUR_MASK`: bit[1:0]: Account for the - Vector enablement status for the calling thread. The calling thread is - not able to turn off Vector once it has been enabled. The prctl() call - fails with EPERM if the value in this mask is PR_RISCV_V_VSTATE_CTRL_OFF - but the current enablement status is not off. Setting - PR_RISCV_V_VSTATE_CTRL_DEFAULT here takes no effect but to set back - the original enablement status. - - * :c:macro:`PR_RISCV_V_VSTATE_CTRL_NEXT_MASK`: bit[3:2]: Account for the - Vector enablement setting for the calling thread at the next execve() - system call. If PR_RISCV_V_VSTATE_CTRL_DEFAULT is used in this mask, - then the enablement status will be decided by the system-wide - enablement status when execve() happen. - - * :c:macro:`PR_RISCV_V_VSTATE_CTRL_INHERIT`: bit[4]: the inheritance - mode for the setting at PR_RISCV_V_VSTATE_CTRL_NEXT_MASK. If the bit - is set then the following execve() will not clear the setting in both - PR_RISCV_V_VSTATE_CTRL_NEXT_MASK and PR_RISCV_V_VSTATE_CTRL_INHERIT. - This setting persists across changes in the system-wide default value. - - Return value: - * 0 on success; - * EINVAL: Vector not supported, invalid enablement status for current or - next mask; - * EPERM: Turning off Vector in PR_RISCV_V_VSTATE_CTRL_CUR_MASK if Vector - was enabled for the calling thread. - - On success: - * A valid setting for PR_RISCV_V_VSTATE_CTRL_CUR_MASK takes place - immediately. The enablement status specified in - PR_RISCV_V_VSTATE_CTRL_NEXT_MASK happens at the next execve() call, or - all following execve() calls if PR_RISCV_V_VSTATE_CTRL_INHERIT bit is - set. - * Every successful call overwrites a previous setting for the calling - thread. - -* prctl(PR_RISCV_V_GET_CONTROL) - - Gets the same Vector enablement status for the calling thread. Setting for - next execve() call and the inheritance bit are all OR-ed together. - - Note that ELF programs are able to get the availability of V for itself by - reading :c:macro:`COMPAT_HWCAP_ISA_V` bit of :c:macro:`ELF_HWCAP` in the - auxiliary vector. - - Return value: - * a nonnegative value on success; - * EINVAL: Vector not supported. - -2. System runtime configuration (sysctl) ------------------------------------------ - -To mitigate the ABI impact of expansion of the signal stack, a -policy mechanism is provided to the administrators, distro maintainers, and -developers to control the default Vector enablement status for userspace -processes in form of sysctl knob: - -* /proc/sys/abi/riscv_v_default_allow - - Writing the text representation of 0 or 1 to this file sets the default - system enablement status for new starting userspace programs. Valid values - are: - - * 0: Do not allow Vector code to be executed as the default for new processes. - * 1: Allow Vector code to be executed as the default for new processes. - - Reading this file returns the current system default enablement status. - - At every execve() call, a new enablement status of the new process is set to - the system default, unless: - - * PR_RISCV_V_VSTATE_CTRL_INHERIT is set for the calling process, and the - setting in PR_RISCV_V_VSTATE_CTRL_NEXT_MASK is not - PR_RISCV_V_VSTATE_CTRL_DEFAULT. Or, - - * The setting in PR_RISCV_V_VSTATE_CTRL_NEXT_MASK is not - PR_RISCV_V_VSTATE_CTRL_DEFAULT. - - Modifying the system default enablement status does not affect the enablement - status of any existing process of thread that do not make an execve() call. - -3. Vector Register State Across System Calls ---------------------------------------------- - -As indicated by version 1.0 of the V extension [1], vector registers are -clobbered by system calls. - -1: https://github.com/riscv/riscv-v-spec/blob/master/calling-convention.adoc diff --git a/Documentation/riscv/vm-layout.rst b/Documentation/riscv/vm-layout.rst deleted file mode 100644 index 69ff6da1db..0000000000 --- a/Documentation/riscv/vm-layout.rst +++ /dev/null @@ -1,157 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -===================================== -Virtual Memory Layout on RISC-V Linux -===================================== - -:Author: Alexandre Ghiti -:Date: 12 February 2021 - -This document describes the virtual memory layout used by the RISC-V Linux -Kernel. - -RISC-V Linux Kernel 32bit -========================= - -RISC-V Linux Kernel SV32 ------------------------- - -TODO - -RISC-V Linux Kernel 64bit -========================= - -The RISC-V privileged architecture document states that the 64bit addresses -"must have bits 63–48 all equal to bit 47, or else a page-fault exception will -occur.": that splits the virtual address space into 2 halves separated by a very -big hole, the lower half is where the userspace resides, the upper half is where -the RISC-V Linux Kernel resides. - -RISC-V Linux Kernel SV39 ------------------------- - -:: - - ======================================================================================================================== - Start addr | Offset | End addr | Size | VM area description - ======================================================================================================================== - | | | | - 0000000000000000 | 0 | 0000003fffffffff | 256 GB | user-space virtual memory, different per mm - __________________|____________|__________________|_________|___________________________________________________________ - | | | | - 0000004000000000 | +256 GB | ffffffbfffffffff | ~16M TB | ... huge, almost 64 bits wide hole of non-canonical - | | | | virtual memory addresses up to the -256 GB - | | | | starting offset of kernel mappings. - __________________|____________|__________________|_________|___________________________________________________________ - | - | Kernel-space virtual memory, shared between all processes: - ____________________________________________________________|___________________________________________________________ - | | | | - ffffffc6fea00000 | -228 GB | ffffffc6feffffff | 6 MB | fixmap - ffffffc6ff000000 | -228 GB | ffffffc6ffffffff | 16 MB | PCI io - ffffffc700000000 | -228 GB | ffffffc7ffffffff | 4 GB | vmemmap - ffffffc800000000 | -224 GB | ffffffd7ffffffff | 64 GB | vmalloc/ioremap space - ffffffd800000000 | -160 GB | fffffff6ffffffff | 124 GB | direct mapping of all physical memory - fffffff700000000 | -36 GB | fffffffeffffffff | 32 GB | kasan - __________________|____________|__________________|_________|____________________________________________________________ - | - | - ____________________________________________________________|____________________________________________________________ - | | | | - ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules, BPF - ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel - __________________|____________|__________________|_________|____________________________________________________________ - - -RISC-V Linux Kernel SV48 ------------------------- - -:: - - ======================================================================================================================== - Start addr | Offset | End addr | Size | VM area description - ======================================================================================================================== - | | | | - 0000000000000000 | 0 | 00007fffffffffff | 128 TB | user-space virtual memory, different per mm - __________________|____________|__________________|_________|___________________________________________________________ - | | | | - 0000800000000000 | +128 TB | ffff7fffffffffff | ~16M TB | ... huge, almost 64 bits wide hole of non-canonical - | | | | virtual memory addresses up to the -128 TB - | | | | starting offset of kernel mappings. - __________________|____________|__________________|_________|___________________________________________________________ - | - | Kernel-space virtual memory, shared between all processes: - ____________________________________________________________|___________________________________________________________ - | | | | - ffff8d7ffea00000 | -114.5 TB | ffff8d7ffeffffff | 6 MB | fixmap - ffff8d7fff000000 | -114.5 TB | ffff8d7fffffffff | 16 MB | PCI io - ffff8d8000000000 | -114.5 TB | ffff8f7fffffffff | 2 TB | vmemmap - ffff8f8000000000 | -112.5 TB | ffffaf7fffffffff | 32 TB | vmalloc/ioremap space - ffffaf8000000000 | -80.5 TB | ffffef7fffffffff | 64 TB | direct mapping of all physical memory - ffffef8000000000 | -16.5 TB | fffffffeffffffff | 16.5 TB | kasan - __________________|____________|__________________|_________|____________________________________________________________ - | - | Identical layout to the 39-bit one from here on: - ____________________________________________________________|____________________________________________________________ - | | | | - ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules, BPF - ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel - __________________|____________|__________________|_________|____________________________________________________________ - - -RISC-V Linux Kernel SV57 ------------------------- - -:: - - ======================================================================================================================== - Start addr | Offset | End addr | Size | VM area description - ======================================================================================================================== - | | | | - 0000000000000000 | 0 | 00ffffffffffffff | 64 PB | user-space virtual memory, different per mm - __________________|____________|__________________|_________|___________________________________________________________ - | | | | - 0100000000000000 | +64 PB | feffffffffffffff | ~16K PB | ... huge, almost 64 bits wide hole of non-canonical - | | | | virtual memory addresses up to the -64 PB - | | | | starting offset of kernel mappings. - __________________|____________|__________________|_________|___________________________________________________________ - | - | Kernel-space virtual memory, shared between all processes: - ____________________________________________________________|___________________________________________________________ - | | | | - ff1bfffffea00000 | -57 PB | ff1bfffffeffffff | 6 MB | fixmap - ff1bffffff000000 | -57 PB | ff1bffffffffffff | 16 MB | PCI io - ff1c000000000000 | -57 PB | ff1fffffffffffff | 1 PB | vmemmap - ff20000000000000 | -56 PB | ff5fffffffffffff | 16 PB | vmalloc/ioremap space - ff60000000000000 | -40 PB | ffdeffffffffffff | 32 PB | direct mapping of all physical memory - ffdf000000000000 | -8 PB | fffffffeffffffff | 8 PB | kasan - __________________|____________|__________________|_________|____________________________________________________________ - | - | Identical layout to the 39-bit one from here on: - ____________________________________________________________|____________________________________________________________ - | | | | - ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules, BPF - ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel - __________________|____________|__________________|_________|____________________________________________________________ - - -Userspace VAs --------------------- -To maintain compatibility with software that relies on the VA space with a -maximum of 48 bits the kernel will, by default, return virtual addresses to -userspace from a 48-bit range (sv48). This default behavior is achieved by -passing 0 into the hint address parameter of mmap. On CPUs with an address space -smaller than sv48, the CPU maximum supported address space will be the default. - -Software can "opt-in" to receiving VAs from another VA space by providing -a hint address to mmap. A hint address passed to mmap will cause the largest -address space that fits entirely into the hint to be used, unless there is no -space left in the address space. If there is no space available in the requested -address space, an address in the next smallest available address space will be -returned. - -For example, in order to obtain 48-bit VA space, a hint address greater than -:code:`1 << 47` must be provided. Note that this is 47 due to sv48 userspace -ending at :code:`1 << 47` and the addresses beyond this are reserved for the -kernel. Similarly, to obtain 57-bit VA space addresses, a hint address greater -than or equal to :code:`1 << 56` must be provided. diff --git a/Documentation/rust/index.rst b/Documentation/rust/index.rst index e599be2cec..965f2db529 100644 --- a/Documentation/rust/index.rst +++ b/Documentation/rust/index.rst @@ -6,6 +6,25 @@ Rust Documentation related to Rust within the kernel. To start using Rust in the kernel, please read the quick-start.rst guide. + +The Rust experiment +------------------- + +The Rust support was merged in v6.1 into mainline in order to help in +determining whether Rust as a language was suitable for the kernel, i.e. worth +the tradeoffs. + +Currently, the Rust support is primarily intended for kernel developers and +maintainers interested in the Rust support, so that they can start working on +abstractions and drivers, as well as helping the development of infrastructure +and tools. + +If you are an end user, please note that there are currently no in-tree +drivers/modules suitable or intended for production use, and that the Rust +support is still in development/experimental, especially for certain kernel +configurations. + + .. only:: rustdoc and html You can also browse `rustdoc documentation `_. diff --git a/Documentation/scheduler/sched-arch.rst b/Documentation/scheduler/sched-arch.rst index 505cd27f9a..ed07efea7d 100644 --- a/Documentation/scheduler/sched-arch.rst +++ b/Documentation/scheduler/sched-arch.rst @@ -10,7 +10,7 @@ Context switch By default, the switch_to arch function is called with the runqueue locked. This is usually not a problem unless switch_to may need to take the runqueue lock. This is usually due to a wake up operation in -the context switch. See arch/ia64/include/asm/switch_to.h for an example. +the context switch. To request the scheduler call switch_to with the runqueue unlocked, you must `#define __ARCH_WANT_UNLOCKED_CTXSW` in a header file @@ -68,7 +68,5 @@ Possible arch/ problems Possible arch problems I found (and either tried to fix or didn't): -ia64 - is safe_halt call racy vs interrupts? (does it sleep?) (See #4a) - sparc - IRQs on at this point(?), change local_irq_save to _disable. - TODO: needs secondary CPUs to disable preempt (See #1) diff --git a/Documentation/scheduler/sched-capacity.rst b/Documentation/scheduler/sched-capacity.rst index e2c1cf7431..de414b33dd 100644 --- a/Documentation/scheduler/sched-capacity.rst +++ b/Documentation/scheduler/sched-capacity.rst @@ -39,14 +39,15 @@ per Hz, leading to:: ------------------- Two different capacity values are used within the scheduler. A CPU's -``capacity_orig`` is its maximum attainable capacity, i.e. its maximum -attainable performance level. A CPU's ``capacity`` is its ``capacity_orig`` to -which some loss of available performance (e.g. time spent handling IRQs) is -subtracted. +``original capacity`` is its maximum attainable capacity, i.e. its maximum +attainable performance level. This original capacity is returned by +the function arch_scale_cpu_capacity(). A CPU's ``capacity`` is its ``original +capacity`` to which some loss of available performance (e.g. time spent +handling IRQs) is subtracted. Note that a CPU's ``capacity`` is solely intended to be used by the CFS class, -while ``capacity_orig`` is class-agnostic. The rest of this document will use -the term ``capacity`` interchangeably with ``capacity_orig`` for the sake of +while ``original capacity`` is class-agnostic. The rest of this document will use +the term ``capacity`` interchangeably with ``original capacity`` for the sake of brevity. 1.3 Platform examples diff --git a/Documentation/scheduler/sched-energy.rst b/Documentation/scheduler/sched-energy.rst index fc853c8cc3..70e2921ef7 100644 --- a/Documentation/scheduler/sched-energy.rst +++ b/Documentation/scheduler/sched-energy.rst @@ -359,32 +359,9 @@ in milli-Watts or in an 'abstract scale'. 6.3 - Energy Model complexity ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The task wake-up path is very latency-sensitive. When the EM of a platform is -too complex (too many CPUs, too many performance domains, too many performance -states, ...), the cost of using it in the wake-up path can become prohibitive. -The energy-aware wake-up algorithm has a complexity of: - - C = Nd * (Nc + Ns) - -with: Nd the number of performance domains; Nc the number of CPUs; and Ns the -total number of OPPs (ex: for two perf. domains with 4 OPPs each, Ns = 8). - -A complexity check is performed at the root domain level, when scheduling -domains are built. EAS will not start on a root domain if its C happens to be -higher than the completely arbitrary EM_MAX_COMPLEXITY threshold (2048 at the -time of writing). - -If you really want to use EAS but the complexity of your platform's Energy -Model is too high to be used with a single root domain, you're left with only -two possible options: - - 1. split your system into separate, smaller, root domains using exclusive - cpusets and enable EAS locally on each of them. This option has the - benefit to work out of the box but the drawback of preventing load - balance between root domains, which can result in an unbalanced system - overall; - 2. submit patches to reduce the complexity of the EAS wake-up algorithm, - hence enabling it to cope with larger EMs in reasonable time. +EAS does not impose any complexity limit on the number of PDs/OPPs/CPUs but +restricts the number of CPUs to EM_MAX_NUM_CPUS to prevent overflows during +the energy estimation. 6.4 - Schedutil governor diff --git a/Documentation/scheduler/sched-rt-group.rst b/Documentation/scheduler/sched-rt-group.rst index 655a096ec8..d685609ed3 100644 --- a/Documentation/scheduler/sched-rt-group.rst +++ b/Documentation/scheduler/sched-rt-group.rst @@ -39,10 +39,10 @@ Most notable: 1.1 The problem --------------- -Realtime scheduling is all about determinism, a group has to be able to rely on +Real-time scheduling is all about determinism, a group has to be able to rely on the amount of bandwidth (eg. CPU time) being constant. In order to schedule -multiple groups of realtime tasks, each group must be assigned a fixed portion -of the CPU time available. Without a minimum guarantee a realtime group can +multiple groups of real-time tasks, each group must be assigned a fixed portion +of the CPU time available. Without a minimum guarantee a real-time group can obviously fall short. A fuzzy upper limit is of no use since it cannot be relied upon. Which leaves us with just the single fixed portion. @@ -50,14 +50,14 @@ relied upon. Which leaves us with just the single fixed portion. ---------------- CPU time is divided by means of specifying how much time can be spent running -in a given period. We allocate this "run time" for each realtime group which -the other realtime groups will not be permitted to use. +in a given period. We allocate this "run time" for each real-time group which +the other real-time groups will not be permitted to use. -Any time not allocated to a realtime group will be used to run normal priority +Any time not allocated to a real-time group will be used to run normal priority tasks (SCHED_OTHER). Any allocated run time not used will also be picked up by SCHED_OTHER. -Let's consider an example: a frame fixed realtime renderer must deliver 25 +Let's consider an example: a frame fixed real-time renderer must deliver 25 frames a second, which yields a period of 0.04s per frame. Now say it will also have to play some music and respond to input, leaving it with around 80% CPU time dedicated for the graphics. We can then give this group a run time of 0.8 @@ -70,7 +70,7 @@ needs only about 3% CPU time to do so, it can do with a 0.03 * 0.005s = of 0.00015s. The remaining CPU time will be used for user input and other tasks. Because -realtime tasks have explicitly allocated the CPU time they need to perform +real-time tasks have explicitly allocated the CPU time they need to perform their tasks, buffer underruns in the graphics or audio can be eliminated. NOTE: the above example is not fully implemented yet. We still @@ -87,18 +87,20 @@ lack an EDF scheduler to make non-uniform periods usable. The system wide settings are configured under the /proc virtual file system: /proc/sys/kernel/sched_rt_period_us: - The scheduling period that is equivalent to 100% CPU bandwidth + The scheduling period that is equivalent to 100% CPU bandwidth. /proc/sys/kernel/sched_rt_runtime_us: - A global limit on how much time realtime scheduling may use. Even without - CONFIG_RT_GROUP_SCHED enabled, this will limit time reserved to realtime - processes. With CONFIG_RT_GROUP_SCHED it signifies the total bandwidth - available to all realtime groups. + A global limit on how much time real-time scheduling may use. This is always + less or equal to the period_us, as it denotes the time allocated from the + period_us for the real-time tasks. Even without CONFIG_RT_GROUP_SCHED enabled, + this will limit time reserved to real-time processes. With + CONFIG_RT_GROUP_SCHED=y it signifies the total bandwidth available to all + real-time groups. * Time is specified in us because the interface is s32. This gives an operating range from 1us to about 35 minutes. * sched_rt_period_us takes values from 1 to INT_MAX. - * sched_rt_runtime_us takes values from -1 to (INT_MAX - 1). + * sched_rt_runtime_us takes values from -1 to sched_rt_period_us. * A run time of -1 specifies runtime == period, ie. no limit. @@ -108,7 +110,7 @@ The system wide settings are configured under the /proc virtual file system: The default values for sched_rt_period_us (1000000 or 1s) and sched_rt_runtime_us (950000 or 0.95s). This gives 0.05s to be used by SCHED_OTHER (non-RT tasks). These defaults were chosen so that a run-away -realtime tasks will not lock up the machine but leave a little time to recover +real-time tasks will not lock up the machine but leave a little time to recover it. By setting runtime to -1 you'd get the old behaviour back. By default all bandwidth is assigned to the root group and new groups get the @@ -116,10 +118,10 @@ period from /proc/sys/kernel/sched_rt_period_us and a run time of 0. If you want to assign bandwidth to another group, reduce the root group's bandwidth and assign some or all of the difference to another group. -Realtime group scheduling means you have to assign a portion of total CPU -bandwidth to the group before it will accept realtime tasks. Therefore you will -not be able to run realtime tasks as any user other than root until you have -done that, even if the user has the rights to run processes with realtime +Real-time group scheduling means you have to assign a portion of total CPU +bandwidth to the group before it will accept real-time tasks. Therefore you will +not be able to run real-time tasks as any user other than root until you have +done that, even if the user has the rights to run processes with real-time priority! diff --git a/Documentation/security/index.rst b/Documentation/security/index.rst index 6ed8d2fa6f..59f8fc106c 100644 --- a/Documentation/security/index.rst +++ b/Documentation/security/index.rst @@ -6,6 +6,7 @@ Security Documentation :maxdepth: 1 credentials + snp-tdx-threat-model IMA-templates keys/index lsm diff --git a/Documentation/security/snp-tdx-threat-model.rst b/Documentation/security/snp-tdx-threat-model.rst new file mode 100644 index 0000000000..ec66f2ed80 --- /dev/null +++ b/Documentation/security/snp-tdx-threat-model.rst @@ -0,0 +1,253 @@ +====================================================== +Confidential Computing in Linux for x86 virtualization +====================================================== + +.. contents:: :local: + +By: Elena Reshetova and Carlos Bilbao + +Motivation +========== + +Kernel developers working on confidential computing for virtualized +environments in x86 operate under a set of assumptions regarding the Linux +kernel threat model that differ from the traditional view. Historically, +the Linux threat model acknowledges attackers residing in userspace, as +well as a limited set of external attackers that are able to interact with +the kernel through various networking or limited HW-specific exposed +interfaces (USB, thunderbolt). The goal of this document is to explain +additional attack vectors that arise in the confidential computing space +and discuss the proposed protection mechanisms for the Linux kernel. + +Overview and terminology +======================== + +Confidential Computing (CoCo) is a broad term covering a wide range of +security technologies that aim to protect the confidentiality and integrity +of data in use (vs. data at rest or data in transit). At its core, CoCo +solutions provide a Trusted Execution Environment (TEE), where secure data +processing can be performed and, as a result, they are typically further +classified into different subtypes depending on the SW that is intended +to be run in TEE. This document focuses on a subclass of CoCo technologies +that are targeting virtualized environments and allow running Virtual +Machines (VM) inside TEE. From now on in this document will be referring +to this subclass of CoCo as 'Confidential Computing (CoCo) for the +virtualized environments (VE)'. + +CoCo, in the virtualization context, refers to a set of HW and/or SW +technologies that allow for stronger security guarantees for the SW running +inside a CoCo VM. Namely, confidential computing allows its users to +confirm the trustworthiness of all SW pieces to include in its reduced +Trusted Computing Base (TCB) given its ability to attest the state of these +trusted components. + +While the concrete implementation details differ between technologies, all +available mechanisms aim to provide increased confidentiality and +integrity for the VM's guest memory and execution state (vCPU registers), +more tightly controlled guest interrupt injection, as well as some +additional mechanisms to control guest-host page mapping. More details on +the x86-specific solutions can be found in +:doc:`Intel Trust Domain Extensions (TDX) ` and +`AMD Memory Encryption `_. + +The basic CoCo guest layout includes the host, guest, the interfaces that +communicate guest and host, a platform capable of supporting CoCo VMs, and +a trusted intermediary between the guest VM and the underlying platform +that acts as a security manager. The host-side virtual machine monitor +(VMM) typically consists of a subset of traditional VMM features and +is still in charge of the guest lifecycle, i.e. create or destroy a CoCo +VM, manage its access to system resources, etc. However, since it +typically stays out of CoCo VM TCB, its access is limited to preserve the +security objectives. + +In the following diagram, the "<--->" lines represent bi-directional +communication channels or interfaces between the CoCo security manager and +the rest of the components (data flow for guest, host, hardware) :: + + +-------------------+ +-----------------------+ + | CoCo guest VM |<---->| | + +-------------------+ | | + | Interfaces | | CoCo security manager | + +-------------------+ | | + | Host VMM |<---->| | + +-------------------+ | | + | | + +--------------------+ | | + | CoCo platform |<--->| | + +--------------------+ +-----------------------+ + +The specific details of the CoCo security manager vastly diverge between +technologies. For example, in some cases, it will be implemented in HW +while in others it may be pure SW. + +Existing Linux kernel threat model +================================== + +The overall components of the current Linux kernel threat model are:: + + +-----------------------+ +-------------------+ + | |<---->| Userspace | + | | +-------------------+ + | External attack | | Interfaces | + | vectors | +-------------------+ + | |<---->| Linux Kernel | + | | +-------------------+ + +-----------------------+ +-------------------+ + | Bootloader/BIOS | + +-------------------+ + +-------------------+ + | HW platform | + +-------------------+ + +There is also communication between the bootloader and the kernel during +the boot process, but this diagram does not represent it explicitly. The +"Interfaces" box represents the various interfaces that allow +communication between kernel and userspace. This includes system calls, +kernel APIs, device drivers, etc. + +The existing Linux kernel threat model typically assumes execution on a +trusted HW platform with all of the firmware and bootloaders included on +its TCB. The primary attacker resides in the userspace, and all of the data +coming from there is generally considered untrusted, unless userspace is +privileged enough to perform trusted actions. In addition, external +attackers are typically considered, including those with access to enabled +external networks (e.g. Ethernet, Wireless, Bluetooth), exposed hardware +interfaces (e.g. USB, Thunderbolt), and the ability to modify the contents +of disks offline. + +Regarding external attack vectors, it is interesting to note that in most +cases external attackers will try to exploit vulnerabilities in userspace +first, but that it is possible for an attacker to directly target the +kernel; particularly if the host has physical access. Examples of direct +kernel attacks include the vulnerabilities CVE-2019-19524, CVE-2022-0435 +and CVE-2020-24490. + +Confidential Computing threat model and its security objectives +=============================================================== + +Confidential Computing adds a new type of attacker to the above list: a +potentially misbehaving host (which can also include some part of a +traditional VMM or all of it), which is typically placed outside of the +CoCo VM TCB due to its large SW attack surface. It is important to note +that this doesn’t imply that the host or VMM are intentionally +malicious, but that there exists a security value in having a small CoCo +VM TCB. This new type of adversary may be viewed as a more powerful type +of external attacker, as it resides locally on the same physical machine +(in contrast to a remote network attacker) and has control over the guest +kernel communication with most of the HW:: + + +------------------------+ + | CoCo guest VM | + +-----------------------+ | +-------------------+ | + | |<--->| | Userspace | | + | | | +-------------------+ | + | External attack | | | Interfaces | | + | vectors | | +-------------------+ | + | |<--->| | Linux Kernel | | + | | | +-------------------+ | + +-----------------------+ | +-------------------+ | + | | Bootloader/BIOS | | + +-----------------------+ | +-------------------+ | + | |<--->+------------------------+ + | | | Interfaces | + | | +------------------------+ + | CoCo security |<--->| Host/Host-side VMM | + | manager | +------------------------+ + | | +------------------------+ + | |<--->| CoCo platform | + +-----------------------+ +------------------------+ + +While traditionally the host has unlimited access to guest data and can +leverage this access to attack the guest, the CoCo systems mitigate such +attacks by adding security features like guest data confidentiality and +integrity protection. This threat model assumes that those features are +available and intact. + +The **Linux kernel CoCo VM security objectives** can be summarized as follows: + +1. Preserve the confidentiality and integrity of CoCo guest's private +memory and registers. + +2. Prevent privileged escalation from a host into a CoCo guest Linux kernel. +While it is true that the host (and host-side VMM) requires some level of +privilege to create, destroy, or pause the guest, part of the goal of +preventing privileged escalation is to ensure that these operations do not +provide a pathway for attackers to gain access to the guest's kernel. + +The above security objectives result in two primary **Linux kernel CoCo +VM assets**: + +1. Guest kernel execution context. +2. Guest kernel private memory. + +The host retains full control over the CoCo guest resources, and can deny +access to them at any time. Examples of resources include CPU time, memory +that the guest can consume, network bandwidth, etc. Because of this, the +host Denial of Service (DoS) attacks against CoCo guests are beyond the +scope of this threat model. + +The **Linux CoCo VM attack surface** is any interface exposed from a CoCo +guest Linux kernel towards an untrusted host that is not covered by the +CoCo technology SW/HW protection. This includes any possible +side-channels, as well as transient execution side channels. Examples of +explicit (not side-channel) interfaces include accesses to port I/O, MMIO +and DMA interfaces, access to PCI configuration space, VMM-specific +hypercalls (towards Host-side VMM), access to shared memory pages, +interrupts allowed to be injected into the guest kernel by the host, as +well as CoCo technology-specific hypercalls, if present. Additionally, the +host in a CoCo system typically controls the process of creating a CoCo +guest: it has a method to load into a guest the firmware and bootloader +images, the kernel image together with the kernel command line. All of this +data should also be considered untrusted until its integrity and +authenticity is established via attestation. + +The table below shows a threat matrix for the CoCo guest Linux kernel but +does not discuss potential mitigation strategies. The matrix refers to +CoCo-specific versions of the guest, host and platform. + +.. list-table:: CoCo Linux guest kernel threat matrix + :widths: auto + :align: center + :header-rows: 1 + + * - Threat name + - Threat description + + * - Guest malicious configuration + - A misbehaving host modifies one of the following guest's + configuration: + + 1. Guest firmware or bootloader + + 2. Guest kernel or module binaries + + 3. Guest command line parameters + + This allows the host to break the integrity of the code running + inside a CoCo guest, and violates the CoCo security objectives. + + * - CoCo guest data attacks + - A misbehaving host retains full control of the CoCo guest's data + in-transit between the guest and the host-managed physical or + virtual devices. This allows any attack against confidentiality, + integrity or freshness of such data. + + * - Malformed runtime input + - A misbehaving host injects malformed input via any communication + interface used by the guest's kernel code. If the code is not + prepared to handle this input correctly, this can result in a host + --> guest kernel privilege escalation. This includes traditional + side-channel and/or transient execution attack vectors. + + * - Malicious runtime input + - A misbehaving host injects a specific input value via any + communication interface used by the guest's kernel code. The + difference with the previous attack vector (malformed runtime input) + is that this input is not malformed, but its value is crafted to + impact the guest's kernel security. Examples of such inputs include + providing a malicious time to the guest or the entropy to the guest + random number generator. Additionally, the timing of such events can + be an attack vector on its own, if it results in a particular guest + kernel action (i.e. processing of a host-injected interrupt). + resistant to supplied host input. + diff --git a/Documentation/sound/soc/codec-to-codec.rst b/Documentation/sound/soc/codec-to-codec.rst index 4eaa9a0c41..0418521b6e 100644 --- a/Documentation/sound/soc/codec-to-codec.rst +++ b/Documentation/sound/soc/codec-to-codec.rst @@ -70,7 +70,8 @@ file: .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBM_CFM, .ignore_suspend = 1, - .params = &dsp_codec_params, + .c2c_params = &dsp_codec_params, + .num_c2c_params = 1, }, { .name = "DSP-CODEC", @@ -81,12 +82,13 @@ file: .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBM_CFM, .ignore_suspend = 1, - .params = &dsp_codec_params, + .c2c_params = &dsp_codec_params, + .num_c2c_params = 1, }, Above code snippet is motivated from sound/soc/samsung/speyside.c. -Note the "params" callback which lets the dapm know that this +Note the "c2c_params" callback which lets the dapm know that this dai_link is a codec to codec connection. In dapm core a route is created between cpu_dai playback widget diff --git a/Documentation/sound/soc/dapm.rst b/Documentation/sound/soc/dapm.rst index 8e44107933..c3154ce6e1 100644 --- a/Documentation/sound/soc/dapm.rst +++ b/Documentation/sound/soc/dapm.rst @@ -234,7 +234,7 @@ corresponding soft power control. In this case it is necessary to create a virtual widget - a widget with no control bits e.g. :: - SND_SOC_DAPM_MIXER("AC97 Mixer", SND_SOC_DAPM_NOPM, 0, 0, NULL, 0), + SND_SOC_DAPM_MIXER("AC97 Mixer", SND_SOC_NOPM, 0, 0, NULL, 0), This can be used to merge to signal paths together in software. diff --git a/Documentation/sound/soc/dpcm.rst b/Documentation/sound/soc/dpcm.rst index 77f67ded53..2d7ad1d915 100644 --- a/Documentation/sound/soc/dpcm.rst +++ b/Documentation/sound/soc/dpcm.rst @@ -368,7 +368,8 @@ The machine driver sets some additional parameters to the DAI link i.e. .codec_name = "modem", .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBM_CFM, - .params = &dai_params, + .c2c_params = &dai_params, + .num_c2c_params = 1, } < ... more DAI links here ... > diff --git a/Documentation/sphinx/cdomain.py b/Documentation/sphinx/cdomain.py index a99716bf44..4eb150bf50 100644 --- a/Documentation/sphinx/cdomain.py +++ b/Documentation/sphinx/cdomain.py @@ -93,7 +93,7 @@ def markup_ctype_refs(match): # RE_expr = re.compile(r':c:(expr|texpr):`([^\`]+)`') def markup_c_expr(match): - return '\ ``' + match.group(2) + '``\ ' + return '\\ ``' + match.group(2) + '``\\ ' # # Parse Sphinx 3.x C markups, replacing them by backward-compatible ones @@ -151,7 +151,7 @@ class CObject(Base_CObject): def handle_func_like_macro(self, sig, signode): u"""Handles signatures of function-like macros. - If the objtype is 'function' and the the signature ``sig`` is a + If the objtype is 'function' and the signature ``sig`` is a function-like macro, the name of the macro is returned. Otherwise ``False`` is returned. """ diff --git a/Documentation/sphinx/kernel_abi.py b/Documentation/sphinx/kernel_abi.py index 6d8a637ad5..5911bd0d79 100644 --- a/Documentation/sphinx/kernel_abi.py +++ b/Documentation/sphinx/kernel_abi.py @@ -102,7 +102,7 @@ class KernelCmd(Directive): code_block += "\n " + l lines = code_block + "\n\n" - line_regex = re.compile("^\.\. LINENO (\S+)\#([0-9]+)$") + line_regex = re.compile(r"^\.\. LINENO (\S+)\#([0-9]+)$") ln = 0 n = 0 f = fname diff --git a/Documentation/sphinx/kernel_feat.py b/Documentation/sphinx/kernel_feat.py index bdfaa3e4b2..03ace5f01b 100644 --- a/Documentation/sphinx/kernel_feat.py +++ b/Documentation/sphinx/kernel_feat.py @@ -95,7 +95,7 @@ class KernelFeat(Directive): lines = subprocess.check_output(args, cwd=os.path.dirname(doc.current_source)).decode('utf-8') - line_regex = re.compile("^\.\. FILE (\S+)$") + line_regex = re.compile(r"^\.\. FILE (\S+)$") out_lines = "" @@ -109,7 +109,7 @@ class KernelFeat(Directive): else: out_lines += line + "\n" - nodeList = self.nestedParse(out_lines, fname) + nodeList = self.nestedParse(out_lines, self.arguments[0]) return nodeList def nestedParse(self, lines, fname): diff --git a/Documentation/sphinx/kerneldoc.py b/Documentation/sphinx/kerneldoc.py index 9395892c7b..7acf09963d 100644 --- a/Documentation/sphinx/kerneldoc.py +++ b/Documentation/sphinx/kerneldoc.py @@ -130,7 +130,7 @@ class KernelDocDirective(Directive): result = ViewList() lineoffset = 0; - line_regex = re.compile("^\.\. LINENO ([0-9]+)$") + line_regex = re.compile(r"^\.\. LINENO ([0-9]+)$") for line in lines: match = line_regex.search(line) if match: @@ -138,7 +138,7 @@ class KernelDocDirective(Directive): lineoffset = int(match.group(1)) - 1 # we must eat our comments since the upset the markup else: - doc = env.srcdir + "/" + env.docname + ":" + str(self.lineno) + doc = str(env.srcdir) + "/" + env.docname + ":" + str(self.lineno) result.append(line, doc + ": " + filename, lineoffset) lineoffset += 1 diff --git a/Documentation/sphinx/kfigure.py b/Documentation/sphinx/kfigure.py index cefdbb7e75..13e885bbd4 100644 --- a/Documentation/sphinx/kfigure.py +++ b/Documentation/sphinx/kfigure.py @@ -309,7 +309,7 @@ def convert_image(img_node, translator, src_fname=None): if dst_fname: # the builder needs not to copy one more time, so pop it if exists. translator.builder.images.pop(img_node['uri'], None) - _name = dst_fname[len(translator.builder.outdir) + 1:] + _name = dst_fname[len(str(translator.builder.outdir)) + 1:] if isNewer(dst_fname, src_fname): kernellog.verbose(app, diff --git a/Documentation/sphinx/maintainers_include.py b/Documentation/sphinx/maintainers_include.py index 328b3631a5..dcad0fff47 100755 --- a/Documentation/sphinx/maintainers_include.py +++ b/Documentation/sphinx/maintainers_include.py @@ -77,7 +77,7 @@ class MaintainersInclude(Include): line = line.rstrip() # Linkify all non-wildcard refs to ReST files in Documentation/. - pat = '(Documentation/([^\s\?\*]*)\.rst)' + pat = r'(Documentation/([^\s\?\*]*)\.rst)' m = re.search(pat, line) if m: # maintainers.rst is in a subdirectory, so include "../". @@ -90,11 +90,11 @@ class MaintainersInclude(Include): output = "| %s" % (line.replace("\\", "\\\\")) # Look for and record field letter to field name mappings: # R: Designated *reviewer*: FullName - m = re.search("\s(\S):\s", line) + m = re.search(r"\s(\S):\s", line) if m: field_letter = m.group(1) if field_letter and not field_letter in fields: - m = re.search("\*([^\*]+)\*", line) + m = re.search(r"\*([^\*]+)\*", line) if m: fields[field_letter] = m.group(1) elif subsystems: @@ -112,7 +112,7 @@ class MaintainersInclude(Include): field_content = "" # Collapse whitespace in subsystem name. - heading = re.sub("\s+", " ", line) + heading = re.sub(r"\s+", " ", line) output = output + "%s\n%s" % (heading, "~" * len(heading)) field_prev = "" else: diff --git a/Documentation/subsystem-apis.rst b/Documentation/subsystem-apis.rst index 90a0535a93..930dc23998 100644 --- a/Documentation/subsystem-apis.rst +++ b/Documentation/subsystem-apis.rst @@ -35,6 +35,7 @@ Human interfaces sound/index gpu/index fb/index + leds/index Networking interfaces --------------------- @@ -70,7 +71,6 @@ Storage interfaces fpga/index i2c/index iio/index - leds/index pcmcia/index spi/index w1/index diff --git a/Documentation/trace/coresight/coresight.rst b/Documentation/trace/coresight/coresight.rst index 4a71ea6cb3..826e59a698 100644 --- a/Documentation/trace/coresight/coresight.rst +++ b/Documentation/trace/coresight/coresight.rst @@ -130,7 +130,7 @@ Misc: Device Tree Bindings -------------------- -See Documentation/devicetree/bindings/arm/arm,coresight-\*.yaml for details. +See ``Documentation/devicetree/bindings/arm/arm,coresight-*.yaml`` for details. As of this writing drivers for ITM, STMs and CTIs are not provided but are expected to be added as the solution matures. diff --git a/Documentation/trace/fprobetrace.rst b/Documentation/trace/fprobetrace.rst index 8e9bebcf0a..e35e6b18df 100644 --- a/Documentation/trace/fprobetrace.rst +++ b/Documentation/trace/fprobetrace.rst @@ -59,8 +59,12 @@ Synopsis of fprobe-events and bitfield are supported. (\*1) This is available only when BTF is enabled. - (\*2) only for the probe on function entry (offs == 0). - (\*3) only for return probe. + (\*2) only for the probe on function entry (offs == 0). Note, this argument access + is best effort, because depending on the argument type, it may be passed on + the stack. But this only support the arguments via registers. + (\*3) only for return probe. Note that this is also best effort. Depending on the + return value type, it might be passed via a pair of registers. But this only + accesses one register. (\*4) this is useful for fetching a field of data structures. (\*5) "u" means user-space dereference. diff --git a/Documentation/trace/kprobes.rst b/Documentation/trace/kprobes.rst index fc7ce76eab..f825970a14 100644 --- a/Documentation/trace/kprobes.rst +++ b/Documentation/trace/kprobes.rst @@ -315,7 +315,6 @@ architectures: - i386 (Supports jump optimization) - x86_64 (AMD-64, EM64T) (Supports jump optimization) - ppc64 -- ia64 (Does not support probes on instruction slot1.) - sparc64 (Return probes not yet implemented.) - arm - ppc diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst index 8a2dfee381..bf9cecb69f 100644 --- a/Documentation/trace/kprobetrace.rst +++ b/Documentation/trace/kprobetrace.rst @@ -61,8 +61,12 @@ Synopsis of kprobe_events (x8/x16/x32/x64), "char", "string", "ustring", "symbol", "symstr" and bitfield are supported. - (\*1) only for the probe on function entry (offs == 0). - (\*2) only for return probe. + (\*1) only for the probe on function entry (offs == 0). Note, this argument access + is best effort, because depending on the argument type, it may be passed on + the stack. But this only support the arguments via registers. + (\*2) only for return probe. Note that this is also best effort. Depending on the + return value type, it might be passed via a pair of registers. But this only + accesses one register. (\*3) this is useful for fetching a field of data structures. (\*4) "u" means user-space dereference. See :ref:`user_mem_access`. diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl index e24c009789..048dc0dbce 100644 --- a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl +++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl @@ -107,14 +107,14 @@ GetOptions( ); # Defaults for dynamically discovered regex's -my $regex_direct_begin_default = 'order=([0-9]*) may_writepage=([0-9]*) gfp_flags=([A-Z_|]*)'; +my $regex_direct_begin_default = 'order=([0-9]*) gfp_flags=([A-Z_|]*)'; my $regex_direct_end_default = 'nr_reclaimed=([0-9]*)'; my $regex_kswapd_wake_default = 'nid=([0-9]*) order=([0-9]*)'; my $regex_kswapd_sleep_default = 'nid=([0-9]*)'; -my $regex_wakeup_kswapd_default = 'nid=([0-9]*) zid=([0-9]*) order=([0-9]*) gfp_flags=([A-Z_|]*)'; -my $regex_lru_isolate_default = 'isolate_mode=([0-9]*) classzone_idx=([0-9]*) order=([0-9]*) nr_requested=([0-9]*) nr_scanned=([0-9]*) nr_skipped=([0-9]*) nr_taken=([0-9]*) lru=([a-z_]*)'; +my $regex_wakeup_kswapd_default = 'nid=([0-9]*) order=([0-9]*) gfp_flags=([A-Z_|]*)'; +my $regex_lru_isolate_default = 'classzone=([0-9]*) order=([0-9]*) nr_requested=([0-9]*) nr_scanned=([0-9]*) nr_skipped=([0-9]*) nr_taken=([0-9]*) lru=([a-z_]*)'; my $regex_lru_shrink_inactive_default = 'nid=([0-9]*) nr_scanned=([0-9]*) nr_reclaimed=([0-9]*) nr_dirty=([0-9]*) nr_writeback=([0-9]*) nr_congested=([0-9]*) nr_immediate=([0-9]*) nr_activate_anon=([0-9]*) nr_activate_file=([0-9]*) nr_ref_keep=([0-9]*) nr_unmap_fail=([0-9]*) priority=([0-9]*) flags=([A-Z_|]*)'; -my $regex_lru_shrink_active_default = 'lru=([A-Z_]*) nr_scanned=([0-9]*) nr_rotated=([0-9]*) priority=([0-9]*)'; +my $regex_lru_shrink_active_default = 'lru=([A-Z_]*) nr_taken=([0-9]*) nr_active=([0-9]*) nr_deactivated=([0-9]*) nr_referenced=([0-9]*) priority=([0-9]*) flags=([A-Z_|]*)' ; my $regex_writepage_default = 'page=([0-9a-f]*) pfn=([0-9]*) flags=([A-Z_|]*)'; # Dyanically discovered regex @@ -184,8 +184,7 @@ sub generate_traceevent_regex { $regex_direct_begin = generate_traceevent_regex( "vmscan/mm_vmscan_direct_reclaim_begin", $regex_direct_begin_default, - "order", "may_writepage", - "gfp_flags"); + "order", "gfp_flags"); $regex_direct_end = generate_traceevent_regex( "vmscan/mm_vmscan_direct_reclaim_end", $regex_direct_end_default, @@ -201,11 +200,11 @@ $regex_kswapd_sleep = generate_traceevent_regex( $regex_wakeup_kswapd = generate_traceevent_regex( "vmscan/mm_vmscan_wakeup_kswapd", $regex_wakeup_kswapd_default, - "nid", "zid", "order", "gfp_flags"); + "nid", "order", "gfp_flags"); $regex_lru_isolate = generate_traceevent_regex( "vmscan/mm_vmscan_lru_isolate", $regex_lru_isolate_default, - "isolate_mode", "classzone_idx", "order", + "classzone", "order", "nr_requested", "nr_scanned", "nr_skipped", "nr_taken", "lru"); $regex_lru_shrink_inactive = generate_traceevent_regex( @@ -218,11 +217,10 @@ $regex_lru_shrink_inactive = generate_traceevent_regex( $regex_lru_shrink_active = generate_traceevent_regex( "vmscan/mm_vmscan_lru_shrink_active", $regex_lru_shrink_active_default, - "nid", "zid", - "lru", - "nr_scanned", "nr_rotated", "priority"); + "nid", "nr_taken", "nr_active", "nr_deactivated", "nr_referenced", + "priority", "flags"); $regex_writepage = generate_traceevent_regex( - "vmscan/mm_vmscan_writepage", + "vmscan/mm_vmscan_write_folio", $regex_writepage_default, "page", "pfn", "flags"); @@ -371,7 +369,7 @@ EVENT_PROCESS: print " $regex_wakeup_kswapd\n"; next; } - my $order = $3; + my $order = $2; $perprocesspid{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD_PERORDER}[$order]++; } elsif ($tracepoint eq "mm_vmscan_lru_isolate") { $details = $6; @@ -381,18 +379,14 @@ EVENT_PROCESS: print " $regex_lru_isolate/o\n"; next; } - my $isolate_mode = $1; - my $nr_scanned = $5; - my $file = $8; - - # To closer match vmstat scanning statistics, only count isolate_both - # and isolate_inactive as scanning. isolate_active is rotation - # isolate_inactive == 1 - # isolate_active == 2 - # isolate_both == 3 - if ($isolate_mode != 2) { + my $nr_scanned = $4; + my $lru = $7; + + # To closer match vmstat scanning statistics, only count + # inactive lru as scanning + if ($lru =~ /inactive_/) { $perprocesspid{$process_pid}->{HIGH_NR_SCANNED} += $nr_scanned; - if ($file =~ /_file/) { + if ($lru =~ /_file/) { $perprocesspid{$process_pid}->{HIGH_NR_FILE_SCANNED} += $nr_scanned; } else { $perprocesspid{$process_pid}->{HIGH_NR_ANON_SCANNED} += $nr_scanned; diff --git a/Documentation/trace/user_events.rst b/Documentation/trace/user_events.rst index f9530d0ac5..d8f12442aa 100644 --- a/Documentation/trace/user_events.rst +++ b/Documentation/trace/user_events.rst @@ -14,6 +14,11 @@ Programs can view status of the events via /sys/kernel/tracing/user_events_status and can both register and write data out via /sys/kernel/tracing/user_events_data. +Programs can also use /sys/kernel/tracing/dynamic_events to register and +delete user based events via the u: prefix. The format of the command to +dynamic_events is the same as the ioctl with the u: prefix applied. This +requires CAP_PERFMON due to the event persisting, otherwise -EPERM is returned. + Typically programs will register a set of events that they wish to expose to tools that can read trace_events (such as ftrace and perf). The registration process tells the kernel which address and bit to reflect if any tool has @@ -45,7 +50,7 @@ This command takes a packed struct user_reg as an argument:: /* Input: Enable size in bytes at address */ __u8 enable_size; - /* Input: Flags for future use, set to 0 */ + /* Input: Flags to use, if any */ __u16 flags; /* Input: Address to update when enabled */ @@ -69,7 +74,7 @@ The struct user_reg requires all the above inputs to be set appropriately. This must be 4 (32-bit) or 8 (64-bit). 64-bit values are only allowed to be used on 64-bit kernels, however, 32-bit can be used on all kernels. -+ flags: The flags to use, if any. For the initial version this must be 0. ++ flags: The flags to use, if any. Callers should first attempt to use flags and retry without flags to ensure support for lower versions of the kernel. If a flag is not supported -EINVAL is returned. @@ -80,6 +85,13 @@ The struct user_reg requires all the above inputs to be set appropriately. + name_args: The name and arguments to describe the event, see command format for details. +The following flags are currently supported. + ++ USER_EVENT_REG_PERSIST: The event will not delete upon the last reference + closing. Callers may use this if an event should exist even after the + process closes or unregisters the event. Requires CAP_PERFMON otherwise + -EPERM is returned. + Upon successful registration the following is set. + write_index: The index to use for this file descriptor that represents this @@ -141,7 +153,10 @@ event (in both user and kernel space). User programs should use a separate file to request deletes than the one used for registration due to this. **NOTE:** By default events will auto-delete when there are no references left -to the event. Flags in the future may change this logic. +to the event. If programs do not want auto-delete, they must use the +USER_EVENT_REG_PERSIST flag when registering the event. Once that flag is used +the event exists until DIAG_IOCSDEL is invoked. Both register and delete of an +event that persists requires CAP_PERFMON, otherwise -EPERM is returned. Unregistering ------------- diff --git a/Documentation/translations/it_IT/riscv/patch-acceptance.rst b/Documentation/translations/it_IT/riscv/patch-acceptance.rst index edf67252b3..2d7afb1f69 100644 --- a/Documentation/translations/it_IT/riscv/patch-acceptance.rst +++ b/Documentation/translations/it_IT/riscv/patch-acceptance.rst @@ -1,6 +1,6 @@ .. include:: ../disclaimer-ita.rst -:Original: :doc:`../../../riscv/patch-acceptance` +:Original: :doc:`../../../arch/riscv/patch-acceptance` :Translator: Federico Vaga arch/riscv linee guida alla manutenzione per gli sviluppatori diff --git a/Documentation/translations/sp_SP/process/embargoed-hardware-issues.rst b/Documentation/translations/sp_SP/process/embargoed-hardware-issues.rst new file mode 100644 index 0000000000..c261b428b3 --- /dev/null +++ b/Documentation/translations/sp_SP/process/embargoed-hardware-issues.rst @@ -0,0 +1,341 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../disclaimer-sp.rst + +:Original: Documentation/process/embargoed-hardware-issues.rst +:Translator: Avadhut Naik + +Problemas de hardware embargados +================================ + +Alcance +------- + +Los problemas de hardware que resultan en problemas de seguridad son una +categoría diferente de errores de seguridad que los errores de software +puro que solo afectan al kernel de Linux. + +Los problemas de hardware como Meltdown, Spectre, L1TF, etc. deben +tratarse de manera diferente porque usualmente afectan a todos los +sistemas operativos (“OS”) y, por lo tanto, necesitan coordinación entre +vendedores diferentes de OS, distribuciones, vendedores de hardware y +otras partes. Para algunos de los problemas, las mitigaciones de software +pueden depender de actualizaciones de microcódigo o firmware, los cuales +necesitan una coordinación adicional. + +.. _Contacto: + +Contacto +-------- + +El equipo de seguridad de hardware del kernel de Linux es separado del +equipo regular de seguridad del kernel de Linux. + +El equipo solo maneja la coordinación de los problemas de seguridad de +hardware embargados. Los informes de errores de seguridad de software puro +en el kernel de Linux no son manejados por este equipo y el "reportero" +(quien informa del error) será guiado a contactar el equipo de seguridad +del kernel de Linux (:doc:`errores de seguridad `) en su +lugar. + +El equipo puede contactar por correo electrónico en +. Esta es una lista privada de oficiales de +seguridad que lo ayudarán a coordinar un problema de acuerdo con nuestro +proceso documentado. + +La lista esta encriptada y el correo electrónico a la lista puede ser +enviado por PGP o S/MIME encriptado y debe estar firmado con la llave de +PGP del reportero o el certificado de S/MIME. La llave de PGP y el +certificado de S/MIME de la lista están disponibles en las siguientes +URLs: + + - PGP: https://www.kernel.org/static/files/hardware-security.asc + - S/MIME: https://www.kernel.org/static/files/hardware-security.crt + +Si bien los problemas de seguridad del hardware a menudo son manejados por +el vendedor de hardware afectado, damos la bienvenida al contacto de +investigadores o individuos que hayan identificado una posible falla de +hardware. + +Oficiales de seguridad de hardware +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +El equipo actual de oficiales de seguridad de hardware: + + - Linus Torvalds (Linux Foundation Fellow) + - Greg Kroah-Hartman (Linux Foundation Fellow) + - Thomas Gleixner (Linux Foundation Fellow) + +Operación de listas de correo +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Las listas de correo encriptadas que se utilizan en nuestro proceso están +alojados en la infraestructura de IT de la Fundación Linux. Al proporcionar +este servicio, los miembros del personal de operaciones de IT de la +Fundación Linux técnicamente tienen la capacidad de acceder a la +información embargada, pero están obligados a la confidencialidad por su +contrato de trabajo. El personal de IT de la Fundación Linux también es +responsable para operar y administrar el resto de la infraestructura de +kernel.org. + +El actual director de infraestructura de proyecto de IT de la Fundación +Linux es Konstantin Ryabitsev. + +Acuerdos de no divulgación +-------------------------- + +El equipo de seguridad de hardware del kernel de Linux no es un organismo +formal y, por lo tanto, no puede firmar cualquier acuerdo de no +divulgación. La comunidad del kernel es consciente de la naturaleza +delicada de tales problemas y ofrece un Memorando de Entendimiento en su +lugar. + +Memorando de Entendimiento +-------------------------- + +La comunidad del kernel de Linux tiene una comprensión profunda del +requisito de mantener los problemas de seguridad de hardware bajo embargo +para la coordinación entre diferentes vendedores de OS, distribuidores, +vendedores de hardware y otras partes. + +La comunidad del kernel de Linux ha manejado con éxito los problemas de +seguridad del hardware en el pasado y tiene los mecanismos necesarios para +permitir el desarrollo compatible con la comunidad bajo restricciones de +embargo. + +La comunidad del kernel de Linux tiene un equipo de seguridad de hardware +dedicado para el contacto inicial, el cual supervisa el proceso de manejo +de tales problemas bajo las reglas de embargo. + +El equipo de seguridad de hardware identifica a los desarrolladores +(expertos en dominio) que formarán el equipo de respuesta inicial para un +problema en particular. El equipo de respuesta inicial puede involucrar +más desarrolladores (expertos en dominio) para abordar el problema de la +mejor manera técnica. + +Todos los desarrolladores involucrados se comprometen a adherirse a las +reglas del embargo y a mantener confidencial la información recibida. La +violación de la promesa conducirá a la exclusión inmediata del problema +actual y la eliminación de todas las listas de correo relacionadas. +Además, el equipo de seguridad de hardware también excluirá al +delincuente de problemas futuros. El impacto de esta consecuencia es un +elemento de disuasión altamente efectivo en nuestra comunidad. En caso de +que ocurra una violación, el equipo de seguridad de hardware informará a +las partes involucradas inmediatamente. Si usted o alguien tiene +conocimiento de una posible violación, por favor, infórmelo inmediatamente +a los oficiales de seguridad de hardware. + +Proceso +^^^^^^^ + +Debido a la naturaleza distribuida globalmente del desarrollo del kernel +de Linux, las reuniones cara a cara hacen imposible abordar los +problemas de seguridad del hardware. Las conferencias telefónicas son +difíciles de coordinar debido a las zonas horarias y otros factores y +solo deben usarse cuando sea absolutamente necesario. El correo +electrónico encriptado ha demostrado ser el método de comunicación más +efectivo y seguro para estos tipos de problemas. + +Inicio de la divulgación +"""""""""""""""""""""""" + +La divulgación comienza contactado al equipo de seguridad de hardware del +kernel de Linux por correo electrónico. Este contacto inicial debe +contener una descripción del problema y una lista de cualquier hardware +afectado conocido. Si su organización fabrica o distribuye el hardware +afectado, le animamos a considerar también que otro hardware podría estar +afectado. + +El equipo de seguridad de hardware proporcionará una lista de correo +encriptada específica para el incidente que se utilizará para la discusión +inicial con el reportero, la divulgación adicional y la coordinación. + +El equipo de seguridad de hardware proporcionará a la parte reveladora una +lista de desarrolladores (expertos de dominios) a quienes se debe informar +inicialmente sobre el problema después de confirmar con los +desarrolladores que se adherirán a este Memorando de Entendimiento y al +proceso documentado. Estos desarrolladores forman el equipo de respuesta +inicial y serán responsables de manejar el problema después del contacto +inicial. El equipo de seguridad de hardware apoyará al equipo de +respuesta, pero no necesariamente involucrandose en el proceso de desarrollo +de mitigación. + +Si bien los desarrolladores individuales pueden estar cubiertos por un +acuerdo de no divulgación a través de su empleador, no pueden firmar +acuerdos individuales de no divulgación en su papel de desarrolladores +del kernel de Linux. Sin embargo, aceptarán adherirse a este proceso +documentado y al Memorando de Entendimiento. + +La parte reveladora debe proporcionar una lista de contactos para todas +las demás entidades ya que han sido, o deberían ser, informadas sobre el +problema. Esto sirve para varios propósitos: + + - La lista de entidades divulgadas permite la comunicación en toda la + industria, por ejemplo, otros vendedores de OS, vendedores de HW, etc. + + - Las entidades divulgadas pueden ser contactadas para nombrar a expertos + que deben participar en el desarrollo de la mitigación. + + - Si un experto que se requiere para manejar un problema es empleado por + una entidad cotizada o un miembro de una entidad cotizada, los equipos + de respuesta pueden solicitar la divulgación de ese experto a esa + entidad. Esto asegura que el experto también forme parte del equipo de + respuesta de la entidad. + +Divulgación +""""""""""" + +La parte reveladora proporcionará información detallada al equipo de +respuesta inicial a través de la lista de correo encriptada especifica. + +Según nuestra experiencia, la documentación técnica de estos problemas +suele ser un punto de partida suficiente y es mejor hacer aclaraciones +técnicas adicionales a través del correo electrónico. + +Desarrollo de la mitigación +""""""""""""""""""""""""""" + +El equipo de respuesta inicial configura una lista de correo encriptada o +reutiliza una existente si es apropiada. + +El uso de una lista de correo está cerca del proceso normal de desarrollo +de Linux y se ha utilizado con éxito en el desarrollo de mitigación para +varios problemas de seguridad de hardware en el pasado. + +La lista de correo funciona en la misma manera que el desarrollo normal de +Linux. Los parches se publican, discuten y revisan y, si se acuerda, se +aplican a un repositorio git no público al que solo pueden acceder los +desarrolladores participantes a través de una conexión segura. El +repositorio contiene la rama principal de desarrollo en comparación con +el kernel principal y las ramas backport para versiones estables del +kernel según sea necesario. + +El equipo de respuesta inicial identificará a más expertos de la +comunidad de desarrolladores del kernel de Linux según sea necesario. La +incorporación de expertos puede ocurrir en cualquier momento del proceso +de desarrollo y debe manejarse de manera oportuna. + +Si un experto es empleado por o es miembro de una entidad en la lista de +divulgación proporcionada por la parte reveladora, entonces se solicitará +la participación de la entidad pertinente. + +Si no es así, entonces se informará a la parte reveladora sobre la +participación de los expertos. Los expertos están cubiertos por el +Memorando de Entendimiento y se solicita a la parte reveladora que +reconozca la participación. En caso de que la parte reveladora tenga una +razón convincente para objetar, entonces esta objeción debe plantearse +dentro de los cinco días laborables y resolverse con el equipo de +incidente inmediatamente. Si la parte reveladora no reacciona dentro de +los cinco días laborables, esto se toma como un reconocimiento silencioso. + +Después del reconocimiento o la resolución de una objeción, el experto es +revelado por el equipo de incidente y se incorpora al proceso de +desarrollo. + +Lanzamiento coordinado +"""""""""""""""""""""" + +Las partes involucradas negociarán la fecha y la hora en la que termina el +embargo. En ese momento, las mitigaciones preparadas se integran en los +árboles de kernel relevantes y se publican. + +Si bien entendemos que los problemas de seguridad del hardware requieren +un tiempo de embargo coordinado, el tiempo de embargo debe limitarse al +tiempo mínimo que se requiere para que todas las partes involucradas +desarrollen, prueben y preparen las mitigaciones. Extender el tiempo de +embargo artificialmente para cumplir con las fechas de discusión de la +conferencia u otras razones no técnicas está creando más trabajo y carga +para los desarrolladores y los equipos de respuesta involucrados, ya que +los parches necesitan mantenerse actualizados para seguir el desarrollo en +curso del kernel upstream, lo cual podría crear cambios conflictivos. + +Asignación de CVE +""""""""""""""""" + +Ni el equipo de seguridad de hardware ni el equipo de respuesta inicial +asignan CVEs, ni se requieren para el proceso de desarrollo. Si los CVEs +son proporcionados por la parte reveladora, pueden usarse con fines de +documentación. + +Embajadores del proceso +----------------------- + +Para obtener asistencia con este proceso, hemos establecido embajadores +en varias organizaciones, que pueden responder preguntas o proporcionar +orientación sobre el proceso de reporte y el manejo posterior. Los +embajadores no están involucrados en la divulgación de un problema en +particular, a menos que lo solicite un equipo de respuesta o una parte +revelada involucrada. La lista de embajadores actuales: + + ============= ======================================================== + AMD Tom Lendacky + Ampere Darren Hart + ARM Catalin Marinas + IBM Power Anton Blanchard + IBM Z Christian Borntraeger + Intel Tony Luck + Qualcomm Trilok Soni + Samsung Javier González + + Microsoft James Morris + Xen Andrew Cooper + + Canonical John Johansen + Debian Ben Hutchings + Oracle Konrad Rzeszutek Wilk + Red Hat Josh Poimboeuf + SUSE Jiri Kosina + + Google Kees Cook + + LLVM Nick Desaulniers + ============= ======================================================== + +Si quiere que su organización se añada a la lista de embajadores, por +favor póngase en contacto con el equipo de seguridad de hardware. El +embajador nominado tiene que entender y apoyar nuestro proceso +completamente y está idealmente bien conectado en la comunidad del kernel +de Linux. + +Listas de correo encriptadas +---------------------------- + +Usamos listas de correo encriptadas para la comunicación. El principio de +funcionamiento de estas listas es que el correo electrónico enviado a la +lista se encripta con la llave PGP de la lista o con el certificado S/MIME +de la lista. El software de lista de correo descifra el correo electrónico +y lo vuelve a encriptar individualmente para cada suscriptor con la llave +PGP del suscriptor o el certificado S/MIME. Los detalles sobre el software +de la lista de correo y la configuración que se usa para asegurar la +seguridad de las listas y la protección de los datos se pueden encontrar +aquí: https://korg.wiki.kernel.org/userdoc/remail. + +Llaves de lista +^^^^^^^^^^^^^^^ + +Para el contacto inicial, consulte :ref:`Contacto`. Para las listas de +correo especificas de incidentes, la llave y el certificado S/MIME se +envían a los suscriptores por correo electrónico desde la lista +especifica. + +Suscripción a listas específicas de incidentes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +La suscripción es manejada por los equipos de respuesta. Las partes +reveladas que quieren participar en la comunicación envían una lista de +suscriptores potenciales al equipo de respuesta para que el equipo de +respuesta pueda validar las solicitudes de suscripción. + +Cada suscriptor necesita enviar una solicitud de suscripción al equipo de +respuesta por correo electrónico. El correo electrónico debe estar firmado +con la llave PGP del suscriptor o el certificado S/MIME. Si se usa una +llave PGP, debe estar disponible desde un servidor de llave publica y esta +idealmente conectada a la red de confianza PGP del kernel de Linux. Véase +también: https://www.kernel.org/signature.html. + +El equipo de respuesta verifica que la solicitud del suscriptor sea válida +y añade al suscriptor a la lista. Después de la suscripción, el suscriptor +recibirá un correo electrónico de la lista que está firmado con la llave +PGP de la lista o el certificado S/MIME de la lista. El cliente de correo +electrónico del suscriptor puede extraer la llave PGP o el certificado +S/MIME de la firma, de modo que el suscriptor pueda enviar correo +electrónico encriptado a la lista. diff --git a/Documentation/translations/sp_SP/process/index.rst b/Documentation/translations/sp_SP/process/index.rst index 09bfece0f5..d6f3ccfb16 100644 --- a/Documentation/translations/sp_SP/process/index.rst +++ b/Documentation/translations/sp_SP/process/index.rst @@ -22,3 +22,5 @@ adding-syscalls researcher-guidelines contribution-maturity-model + security-bugs + embargoed-hardware-issues diff --git a/Documentation/translations/sp_SP/process/security-bugs.rst b/Documentation/translations/sp_SP/process/security-bugs.rst new file mode 100644 index 0000000000..d07c7e579b --- /dev/null +++ b/Documentation/translations/sp_SP/process/security-bugs.rst @@ -0,0 +1,103 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../disclaimer-sp.rst + +:Original: Documentation/process/security-bugs.rst +:Translator: Avadhut Naik + +Errores de seguridad +==================== + +Los desarrolladores del kernel de Linux se toman la seguridad muy en +serio. Como tal, nos gustaría saber cuándo se encuentra un error de +seguridad para que pueda ser corregido y divulgado lo más rápido posible. +Por favor, informe sobre los errores de seguridad al equipo de seguridad +del kernel de Linux. + +Contacto +-------- + +El equipo de seguridad del kernel de Linux puede ser contactado por correo +electrónico en . Esta es una lista privada de +oficiales de seguridad que ayudarán a verificar el informe del error y +desarrollarán y publicarán una corrección. Si ya tiene una corrección, por +favor, inclúyala con su informe, ya que eso puede acelerar considerablemente +el proceso. Es posible que el equipo de seguridad traiga ayuda adicional +de mantenedores del área para comprender y corregir la vulnerabilidad de +seguridad. + +Como ocurre con cualquier error, cuanta más información se proporcione, +más fácil será diagnosticarlo y corregirlo. Por favor, revise el +procedimiento descrito en 'Documentation/admin-guide/reporting-issues.rst' +si no tiene claro que información es útil. Cualquier código de explotación +es muy útil y no será divulgado sin el consentimiento del "reportero" (el +que envia el error) a menos que ya se haya hecho público. + +Por favor, envíe correos electrónicos en texto plano sin archivos +adjuntos cuando sea posible. Es mucho más difícil tener una discusión +citada en contexto sobre un tema complejo si todos los detalles están +ocultos en archivos adjuntos. Piense en ello como un +:doc:`envío de parche regular ` (incluso si no tiene +un parche todavía) describa el problema y el impacto, enumere los pasos +de reproducción, y sígalo con una solución propuesta, todo en texto plano. + + +Divulgación e información embargada +----------------------------------- + +La lista de seguridad no es un canal de divulgación. Para eso, ver +Coordinación debajo. Una vez que se ha desarrollado una solución robusta, +comienza el proceso de lanzamiento. Las soluciones para errores conocidos +públicamente se lanzan inmediatamente. + +Aunque nuestra preferencia es lanzar soluciones para errores no divulgados +públicamente tan pronto como estén disponibles, esto puede postponerse a +petición del reportero o una parte afectada por hasta 7 días calendario +desde el inicio del proceso de lanzamiento, con una extensión excepcional +a 14 días de calendario si se acuerda que la criticalidad del error requiere +más tiempo. La única razón válida para aplazar la publicación de una +solución es para acomodar la logística de QA y los despliegues a gran +escala que requieren coordinación de lanzamiento. + +Si bien la información embargada puede compartirse con personas de +confianza para desarrollar una solución, dicha información no se publicará +junto con la solución o en cualquier otro canal de divulgación sin el +permiso del reportero. Esto incluye, pero no se limita al informe original +del error y las discusiones de seguimiento (si las hay), exploits, +información sobre CVE o la identidad del reportero. + +En otras palabras, nuestro único interés es solucionar los errores. Toda +otra información presentada a la lista de seguridad y cualquier discusión +de seguimiento del informe se tratan confidencialmente incluso después de +que se haya levantado el embargo, en perpetuidad. + +Coordinación con otros grupos +----------------------------- + +El equipo de seguridad del kernel recomienda encarecidamente que los +reporteros de posibles problemas de seguridad NUNCA contacten la lista +de correo “linux-distros” hasta DESPUES de discutirlo con el equipo de +seguridad del kernel. No Cc: ambas listas a la vez. Puede ponerse en +contacto con la lista de correo linux-distros después de que se haya +acordado una solución y comprenda completamente los requisitos que al +hacerlo le impondrá a usted y la comunidad del kernel. + +Las diferentes listas tienen diferentes objetivos y las reglas de +linux-distros no contribuyen en realidad a solucionar ningún problema de +seguridad potencial. + +Asignación de CVE +----------------- + +El equipo de seguridad no asigna CVEs, ni los requerimos para informes o +correcciones, ya que esto puede complicar innecesariamente el proceso y +puede retrasar el manejo de errores. Si un reportero desea que se le +asigne un identificador CVE, debe buscar uno por sí mismo, por ejemplo, +poniéndose en contacto directamente con MITRE. Sin embargo, en ningún +caso se retrasará la inclusión de un parche para esperar a que llegue un +identificador CVE. + +Acuerdos de no divulgación +-------------------------- + +El equipo de seguridad del kernel de Linux no es un organismo formal y, +por lo tanto, no puede firmar cualquier acuerdo de no divulgación. diff --git a/Documentation/translations/zh_CN/arch/index.rst b/Documentation/translations/zh_CN/arch/index.rst index e3d273d7d5..71186d9df7 100644 --- a/Documentation/translations/zh_CN/arch/index.rst +++ b/Documentation/translations/zh_CN/arch/index.rst @@ -10,7 +10,7 @@ mips/index arm64/index - ../riscv/index + ../arch/riscv/index openrisc/index parisc/index loongarch/index @@ -18,7 +18,6 @@ TODOList: * arm/index -* ia64/index * m68k/index * nios2/index * powerpc/index diff --git a/Documentation/translations/zh_CN/arch/loongarch/introduction.rst b/Documentation/translations/zh_CN/arch/loongarch/introduction.rst index 59d6bf3305..bf463c5a4c 100644 --- a/Documentation/translations/zh_CN/arch/loongarch/introduction.rst +++ b/Documentation/translations/zh_CN/arch/loongarch/introduction.rst @@ -338,9 +338,9 @@ Loongson与LoongArch的开发者网站(软件与文档资源): LoongArch指令集架构的文档: - https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.02-CN.pdf (中文版) + https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.10-CN.pdf (中文版) - https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.02-EN.pdf (英文版) + https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.10-EN.pdf (英文版) LoongArch的ELF psABI文档: diff --git a/Documentation/translations/zh_CN/arch/riscv/boot-image-header.rst b/Documentation/translations/zh_CN/arch/riscv/boot-image-header.rst new file mode 100644 index 0000000000..779b5172fe --- /dev/null +++ b/Documentation/translations/zh_CN/arch/riscv/boot-image-header.rst @@ -0,0 +1,69 @@ +.. include:: ../../disclaimer-zh_CN.rst + +:Original: Documentation/arch/riscv/boot-image-header.rst + +:翻译: + + 司延腾 Yanteng Si + +.. _cn_boot-image-header.rst: + +========================== +RISC-V Linux启动镜像文件头 +========================== + +:Author: Atish Patra +:Date: 20 May 2019 + +此文档仅描述RISC-V Linux 启动文件头的详情。 + +TODO: + 写一个完整的启动指南。 + +在解压后的Linux内核镜像中存在以下64字节的文件头:: + + u32 code0; /* Executable code */ + u32 code1; /* Executable code */ + u64 text_offset; /* Image load offset, little endian */ + u64 image_size; /* Effective Image size, little endian */ + u64 flags; /* kernel flags, little endian */ + u32 version; /* Version of this header */ + u32 res1 = 0; /* Reserved */ + u64 res2 = 0; /* Reserved */ + u64 magic = 0x5643534952; /* Magic number, little endian, "RISCV" */ + u32 magic2 = 0x05435352; /* Magic number 2, little endian, "RSC\x05" */ + u32 res3; /* Reserved for PE COFF offset */ + +这种头格式与PE/COFF文件头兼容,并在很大程度上受到ARM64文件头的启发。因此,ARM64 +和RISC-V文件头可以在未来合并为一个共同的头。 + +注意 +==== + +- 将来也可以复用这个文件头,用来对RISC-V的EFI桩提供支持。为了使内核镜像如同一个 + EFI应用程序一样加载,EFI规范中规定在内核镜像的开始需要PE/COFF镜像文件头。为了 + 支持EFI桩,应该用“MZ”魔术字符替换掉code0,并且res3(偏移量未0x3c)应指向PE/COFF + 文件头的其余部分. + +- 表示文件头版本号的Drop-bit位域 + + ========== ========== + Bits 0:15 次要 版本 + Bits 16:31 主要 版本 + ========== ========== + + 这保持了新旧版本之间的兼容性。 + 当前版本被定义为0.2。 + +- 从版本0.2开始,结构体成员“magic”就已经被弃用,在之后的版本中,可能会移除掉它。 + 最初,该成员应该与ARM64头的“magic”成员匹配,但遗憾的是并没有。 + “magic2”成员代替“magic”成员与ARM64头相匹配。 + +- 在当前的文件头,标志位域只剩下了一个位。 + + ===== ============================== + Bit 0 内核字节序。1 if BE, 0 if LE. + ===== ============================== + +- 对于引导加载程序加载内核映像来说,image_size成员对引导加载程序而言是必须的,否 + 则将引导失败。 diff --git a/Documentation/translations/zh_CN/arch/riscv/index.rst b/Documentation/translations/zh_CN/arch/riscv/index.rst new file mode 100644 index 0000000000..3b041c1161 --- /dev/null +++ b/Documentation/translations/zh_CN/arch/riscv/index.rst @@ -0,0 +1,30 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../../disclaimer-zh_CN.rst + +:Original: Documentation/arch/riscv/index.rst + +:翻译: + + 司延腾 Yanteng Si + +.. _cn_riscv_index: + +=============== +RISC-V 体系结构 +=============== + +.. toctree:: + :maxdepth: 1 + + boot-image-header + vm-layout + patch-acceptance + + +.. only:: subproject and html + + 目录 + ==== + + * :ref:`genindex` diff --git a/Documentation/translations/zh_CN/arch/riscv/patch-acceptance.rst b/Documentation/translations/zh_CN/arch/riscv/patch-acceptance.rst new file mode 100644 index 0000000000..c8eb230ca8 --- /dev/null +++ b/Documentation/translations/zh_CN/arch/riscv/patch-acceptance.rst @@ -0,0 +1,33 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../../disclaimer-zh_CN.rst + +:Original: Documentation/arch/riscv/patch-acceptance.rst + +:翻译: + + 司延腾 Yanteng Si + +.. _cn_riscv_patch-acceptance: + +arch/riscv 开发者维护指南 +========================= + +概述 +---- +RISC-V指令集体系结构是公开开发的: +正在进行的草案可供所有人查看和测试实现。新模块或者扩展草案可能会在开发过程中发 +生更改---有时以不兼容的方式对以前的草案进行更改。这种灵活性可能会给RISC-V Linux +维护者带来挑战。Linux开发过程更喜欢经过良好检查和测试的代码,而不是试验代码。我 +们希望推广同样的规则到即将被内核合并的RISC-V相关代码。 + +附加的提交检查单 +---------------- +我们仅接受相关标准已经被RISC-V基金会标准为“已批准”或“已冻结”的扩展或模块的补丁。 +(开发者当然可以维护自己的Linux内核树,其中包含所需代码扩展草案的代码。) + +此外,RISC-V规范允许爱好者创建自己的自定义扩展。这些自定义拓展不需要通过RISC-V +基金会的任何审核或批准。为了避免将爱好者一些特别的RISC-V拓展添加进内核代码带来 +的维护复杂性和对性能的潜在影响,我们将只接受RISC-V基金会正式冻结或批准的的扩展 +补丁。(开发者当然可以维护自己的Linux内核树,其中包含他们想要的任何自定义扩展 +的代码。) diff --git a/Documentation/translations/zh_CN/arch/riscv/vm-layout.rst b/Documentation/translations/zh_CN/arch/riscv/vm-layout.rst new file mode 100644 index 0000000000..4b9f4dcf6c --- /dev/null +++ b/Documentation/translations/zh_CN/arch/riscv/vm-layout.rst @@ -0,0 +1,104 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../../disclaimer-zh_CN.rst + +:Original: Documentation/arch/riscv/vm-layout.rst + +:翻译: + + 司延腾 Yanteng Si + Binbin Zhou + +============================ +RISC-V Linux上的虚拟内存布局 +============================ + +:作者: Alexandre Ghiti +:日期: 12 February 2021 + +这份文件描述了RISC-V Linux内核使用的虚拟内存布局。 + +32位 RISC-V Linux 内核 +====================== + +RISC-V Linux Kernel SV32 +------------------------ + +TODO + +64位 RISC-V Linux 内核 +====================== + +RISC-V特权架构文档指出,64位地址 "必须使第63-48位值都等于第47位,否则将发生缺页异常。":这将虚 +拟地址空间分成两半,中间有一个非常大的洞,下半部分是用户空间所在的地方,上半部分是RISC-V Linux +内核所在的地方。 + +RISC-V Linux Kernel SV39 +------------------------ + +:: + + ======================================================================================================================== + 开始地址 | 偏移 | 结束地址 | 大小 | 虚拟内存区域描述 + ======================================================================================================================== + | | | | + 0000000000000000 | 0 | 0000003fffffffff | 256 GB | 用户空间虚拟内存,每个内存管理器不同 + __________________|____________|__________________|_________|___________________________________________________________ + | | | | + 0000004000000000 | +256 GB | ffffffbfffffffff | ~16M TB | ... 巨大的、几乎64位宽的直到内核映射的-256GB地方 + | | | | 开始偏移的非经典虚拟内存地址空洞。 + | | | | + __________________|____________|__________________|_________|___________________________________________________________ + | + | 内核空间的虚拟内存,在所有进程之间共享: + ____________________________________________________________|___________________________________________________________ + | | | | + ffffffc6fee00000 | -228 GB | ffffffc6feffffff | 2 MB | fixmap + ffffffc6ff000000 | -228 GB | ffffffc6ffffffff | 16 MB | PCI io + ffffffc700000000 | -228 GB | ffffffc7ffffffff | 4 GB | vmemmap + ffffffc800000000 | -224 GB | ffffffd7ffffffff | 64 GB | vmalloc/ioremap space + ffffffd800000000 | -160 GB | fffffff6ffffffff | 124 GB | 直接映射所有物理内存 + fffffff700000000 | -36 GB | fffffffeffffffff | 32 GB | kasan + __________________|____________|__________________|_________|____________________________________________________________ + | + | + ____________________________________________________________|____________________________________________________________ + | | | | + ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules, BPF + ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel + __________________|____________|__________________|_________|____________________________________________________________ + + +RISC-V Linux Kernel SV48 +------------------------ + +:: + + ======================================================================================================================== + 开始地址 | 偏移 | 结束地址 | 大小 | 虚拟内存区域描述 + ======================================================================================================================== + | | | | + 0000000000000000 | 0 | 00007fffffffffff | 128 TB | 用户空间虚拟内存,每个内存管理器不同 + __________________|____________|__________________|_________|___________________________________________________________ + | | | | + 0000800000000000 | +128 TB | ffff7fffffffffff | ~16M TB | ... 巨大的、几乎64位宽的直到内核映射的-128TB地方 + | | | | 开始偏移的非经典虚拟内存地址空洞。 + | | | | + __________________|____________|__________________|_________|___________________________________________________________ + | + | 内核空间的虚拟内存,在所有进程之间共享: + ____________________________________________________________|___________________________________________________________ + | | | | + ffff8d7ffee00000 | -114.5 TB | ffff8d7ffeffffff | 2 MB | fixmap + ffff8d7fff000000 | -114.5 TB | ffff8d7fffffffff | 16 MB | PCI io + ffff8d8000000000 | -114.5 TB | ffff8f7fffffffff | 2 TB | vmemmap + ffff8f8000000000 | -112.5 TB | ffffaf7fffffffff | 32 TB | vmalloc/ioremap space + ffffaf8000000000 | -80.5 TB | ffffef7fffffffff | 64 TB | 直接映射所有物理内存 + ffffef8000000000 | -16.5 TB | fffffffeffffffff | 16.5 TB | kasan + __________________|____________|__________________|_________|____________________________________________________________ + | + | 从此处开始,与39-bit布局相同: + ____________________________________________________________|____________________________________________________________ + | | | | + ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules, BPF + ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel + __________________|____________|__________________|_________|____________________________________________________________ diff --git a/Documentation/translations/zh_CN/core-api/cpu_hotplug.rst b/Documentation/translations/zh_CN/core-api/cpu_hotplug.rst index 4772a900c3..bc0d7ea6d8 100644 --- a/Documentation/translations/zh_CN/core-api/cpu_hotplug.rst +++ b/Documentation/translations/zh_CN/core-api/cpu_hotplug.rst @@ -49,12 +49,6 @@ CPU热拔插支持的一个更新颖的用途是它在SMP的暂停恢复支持 限制内核将支持的CPU总量。如果这里提供的数量低于实际可用的CPU数量,那么其他CPU 以后就不能上线了。 -``additional_cpus=n`` - 使用它来限制可热插拔的CPU。该选项设置 - ``cpu_possible_mask = cpu_present_mask + additional_cpus`` - - 这个选项只限于IA64架构。 - ``possible_cpus=n`` 这个选项设置 ``cpu_possible_mask`` 中的 ``possible_cpus`` 位。 diff --git a/Documentation/translations/zh_CN/index.rst b/Documentation/translations/zh_CN/index.rst index 299704c081..6ccec9657c 100644 --- a/Documentation/translations/zh_CN/index.rst +++ b/Documentation/translations/zh_CN/index.rst @@ -52,12 +52,9 @@ core-api/index driver-api/index + subsystem-apis 内核中的锁 -TODOList: - -* subsystem-apis - 开发工具和流程 -------------- diff --git a/Documentation/translations/zh_CN/maintainer/maintainer-entry-profile.rst b/Documentation/translations/zh_CN/maintainer/maintainer-entry-profile.rst index a1ee99c478..0f5acfb101 100644 --- a/Documentation/translations/zh_CN/maintainer/maintainer-entry-profile.rst +++ b/Documentation/translations/zh_CN/maintainer/maintainer-entry-profile.rst @@ -89,4 +89,4 @@ ../doc-guide/maintainer-profile ../../../nvdimm/maintainer-entry-profile - ../../../riscv/patch-acceptance + ../../../arch/riscv/patch-acceptance diff --git a/Documentation/translations/zh_CN/riscv/boot-image-header.rst b/Documentation/translations/zh_CN/riscv/boot-image-header.rst deleted file mode 100644 index 0234c28a71..0000000000 --- a/Documentation/translations/zh_CN/riscv/boot-image-header.rst +++ /dev/null @@ -1,69 +0,0 @@ -.. include:: ../disclaimer-zh_CN.rst - -:Original: Documentation/riscv/boot-image-header.rst - -:翻译: - - 司延腾 Yanteng Si - -.. _cn_boot-image-header.rst: - -========================== -RISC-V Linux启动镜像文件头 -========================== - -:Author: Atish Patra -:Date: 20 May 2019 - -此文档仅描述RISC-V Linux 启动文件头的详情。 - -TODO: - 写一个完整的启动指南。 - -在解压后的Linux内核镜像中存在以下64字节的文件头:: - - u32 code0; /* Executable code */ - u32 code1; /* Executable code */ - u64 text_offset; /* Image load offset, little endian */ - u64 image_size; /* Effective Image size, little endian */ - u64 flags; /* kernel flags, little endian */ - u32 version; /* Version of this header */ - u32 res1 = 0; /* Reserved */ - u64 res2 = 0; /* Reserved */ - u64 magic = 0x5643534952; /* Magic number, little endian, "RISCV" */ - u32 magic2 = 0x05435352; /* Magic number 2, little endian, "RSC\x05" */ - u32 res3; /* Reserved for PE COFF offset */ - -这种头格式与PE/COFF文件头兼容,并在很大程度上受到ARM64文件头的启发。因此,ARM64 -和RISC-V文件头可以在未来合并为一个共同的头。 - -注意 -==== - -- 将来也可以复用这个文件头,用来对RISC-V的EFI桩提供支持。为了使内核镜像如同一个 - EFI应用程序一样加载,EFI规范中规定在内核镜像的开始需要PE/COFF镜像文件头。为了 - 支持EFI桩,应该用“MZ”魔术字符替换掉code0,并且res3(偏移量未0x3c)应指向PE/COFF - 文件头的其余部分. - -- 表示文件头版本号的Drop-bit位域 - - ========== ========== - Bits 0:15 次要 版本 - Bits 16:31 主要 版本 - ========== ========== - - 这保持了新旧版本之间的兼容性。 - 当前版本被定义为0.2。 - -- 从版本0.2开始,结构体成员“magic”就已经被弃用,在之后的版本中,可能会移除掉它。 - 最初,该成员应该与ARM64头的“magic”成员匹配,但遗憾的是并没有。 - “magic2”成员代替“magic”成员与ARM64头相匹配。 - -- 在当前的文件头,标志位域只剩下了一个位。 - - ===== ============================== - Bit 0 内核字节序。1 if BE, 0 if LE. - ===== ============================== - -- 对于引导加载程序加载内核映像来说,image_size成员对引导加载程序而言是必须的,否 - 则将引导失败。 diff --git a/Documentation/translations/zh_CN/riscv/index.rst b/Documentation/translations/zh_CN/riscv/index.rst deleted file mode 100644 index 131e405aa8..0000000000 --- a/Documentation/translations/zh_CN/riscv/index.rst +++ /dev/null @@ -1,30 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -.. include:: ../disclaimer-zh_CN.rst - -:Original: Documentation/riscv/index.rst - -:翻译: - - 司延腾 Yanteng Si - -.. _cn_riscv_index: - -=============== -RISC-V 体系结构 -=============== - -.. toctree:: - :maxdepth: 1 - - boot-image-header - vm-layout - patch-acceptance - - -.. only:: subproject and html - - 目录 - ==== - - * :ref:`genindex` diff --git a/Documentation/translations/zh_CN/riscv/patch-acceptance.rst b/Documentation/translations/zh_CN/riscv/patch-acceptance.rst deleted file mode 100644 index d180d24717..0000000000 --- a/Documentation/translations/zh_CN/riscv/patch-acceptance.rst +++ /dev/null @@ -1,33 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -.. include:: ../disclaimer-zh_CN.rst - -:Original: Documentation/riscv/patch-acceptance.rst - -:翻译: - - 司延腾 Yanteng Si - -.. _cn_riscv_patch-acceptance: - -arch/riscv 开发者维护指南 -========================= - -概述 ----- -RISC-V指令集体系结构是公开开发的: -正在进行的草案可供所有人查看和测试实现。新模块或者扩展草案可能会在开发过程中发 -生更改---有时以不兼容的方式对以前的草案进行更改。这种灵活性可能会给RISC-V Linux -维护者带来挑战。Linux开发过程更喜欢经过良好检查和测试的代码,而不是试验代码。我 -们希望推广同样的规则到即将被内核合并的RISC-V相关代码。 - -附加的提交检查单 ----------------- -我们仅接受相关标准已经被RISC-V基金会标准为“已批准”或“已冻结”的扩展或模块的补丁。 -(开发者当然可以维护自己的Linux内核树,其中包含所需代码扩展草案的代码。) - -此外,RISC-V规范允许爱好者创建自己的自定义扩展。这些自定义拓展不需要通过RISC-V -基金会的任何审核或批准。为了避免将爱好者一些特别的RISC-V拓展添加进内核代码带来 -的维护复杂性和对性能的潜在影响,我们将只接受RISC-V基金会正式冻结或批准的的扩展 -补丁。(开发者当然可以维护自己的Linux内核树,其中包含他们想要的任何自定义扩展 -的代码。) diff --git a/Documentation/translations/zh_CN/riscv/vm-layout.rst b/Documentation/translations/zh_CN/riscv/vm-layout.rst deleted file mode 100644 index 91884e2dff..0000000000 --- a/Documentation/translations/zh_CN/riscv/vm-layout.rst +++ /dev/null @@ -1,104 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 -.. include:: ../disclaimer-zh_CN.rst - -:Original: Documentation/riscv/vm-layout.rst - -:翻译: - - 司延腾 Yanteng Si - Binbin Zhou - -============================ -RISC-V Linux上的虚拟内存布局 -============================ - -:作者: Alexandre Ghiti -:日期: 12 February 2021 - -这份文件描述了RISC-V Linux内核使用的虚拟内存布局。 - -32位 RISC-V Linux 内核 -====================== - -RISC-V Linux Kernel SV32 ------------------------- - -TODO - -64位 RISC-V Linux 内核 -====================== - -RISC-V特权架构文档指出,64位地址 "必须使第63-48位值都等于第47位,否则将发生缺页异常。":这将虚 -拟地址空间分成两半,中间有一个非常大的洞,下半部分是用户空间所在的地方,上半部分是RISC-V Linux -内核所在的地方。 - -RISC-V Linux Kernel SV39 ------------------------- - -:: - - ======================================================================================================================== - 开始地址 | 偏移 | 结束地址 | 大小 | 虚拟内存区域描述 - ======================================================================================================================== - | | | | - 0000000000000000 | 0 | 0000003fffffffff | 256 GB | 用户空间虚拟内存,每个内存管理器不同 - __________________|____________|__________________|_________|___________________________________________________________ - | | | | - 0000004000000000 | +256 GB | ffffffbfffffffff | ~16M TB | ... 巨大的、几乎64位宽的直到内核映射的-256GB地方 - | | | | 开始偏移的非经典虚拟内存地址空洞。 - | | | | - __________________|____________|__________________|_________|___________________________________________________________ - | - | 内核空间的虚拟内存,在所有进程之间共享: - ____________________________________________________________|___________________________________________________________ - | | | | - ffffffc6fee00000 | -228 GB | ffffffc6feffffff | 2 MB | fixmap - ffffffc6ff000000 | -228 GB | ffffffc6ffffffff | 16 MB | PCI io - ffffffc700000000 | -228 GB | ffffffc7ffffffff | 4 GB | vmemmap - ffffffc800000000 | -224 GB | ffffffd7ffffffff | 64 GB | vmalloc/ioremap space - ffffffd800000000 | -160 GB | fffffff6ffffffff | 124 GB | 直接映射所有物理内存 - fffffff700000000 | -36 GB | fffffffeffffffff | 32 GB | kasan - __________________|____________|__________________|_________|____________________________________________________________ - | - | - ____________________________________________________________|____________________________________________________________ - | | | | - ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules, BPF - ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel - __________________|____________|__________________|_________|____________________________________________________________ - - -RISC-V Linux Kernel SV48 ------------------------- - -:: - - ======================================================================================================================== - 开始地址 | 偏移 | 结束地址 | 大小 | 虚拟内存区域描述 - ======================================================================================================================== - | | | | - 0000000000000000 | 0 | 00007fffffffffff | 128 TB | 用户空间虚拟内存,每个内存管理器不同 - __________________|____________|__________________|_________|___________________________________________________________ - | | | | - 0000800000000000 | +128 TB | ffff7fffffffffff | ~16M TB | ... 巨大的、几乎64位宽的直到内核映射的-128TB地方 - | | | | 开始偏移的非经典虚拟内存地址空洞。 - | | | | - __________________|____________|__________________|_________|___________________________________________________________ - | - | 内核空间的虚拟内存,在所有进程之间共享: - ____________________________________________________________|___________________________________________________________ - | | | | - ffff8d7ffee00000 | -114.5 TB | ffff8d7ffeffffff | 2 MB | fixmap - ffff8d7fff000000 | -114.5 TB | ffff8d7fffffffff | 16 MB | PCI io - ffff8d8000000000 | -114.5 TB | ffff8f7fffffffff | 2 TB | vmemmap - ffff8f8000000000 | -112.5 TB | ffffaf7fffffffff | 32 TB | vmalloc/ioremap space - ffffaf8000000000 | -80.5 TB | ffffef7fffffffff | 64 TB | 直接映射所有物理内存 - ffffef8000000000 | -16.5 TB | fffffffeffffffff | 16.5 TB | kasan - __________________|____________|__________________|_________|____________________________________________________________ - | - | 从此处开始,与39-bit布局相同: - ____________________________________________________________|____________________________________________________________ - | | | | - ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules, BPF - ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel - __________________|____________|__________________|_________|____________________________________________________________ diff --git a/Documentation/translations/zh_CN/scheduler/sched-arch.rst b/Documentation/translations/zh_CN/scheduler/sched-arch.rst index ce3f39d9b3..b2ac3c743a 100644 --- a/Documentation/translations/zh_CN/scheduler/sched-arch.rst +++ b/Documentation/translations/zh_CN/scheduler/sched-arch.rst @@ -20,8 +20,7 @@ ========== 1. 运行队列锁 默认情况下,switch_to arch函数在调用时锁定了运行队列。这通常不是一个问题,除非 -switch_to可能需要获取运行队列锁。这通常是由于上下文切换中的唤醒操作造成的。见 -arch/ia64/include/asm/switch_to.h的例子。 +switch_to可能需要获取运行队列锁。这通常是由于上下文切换中的唤醒操作造成的。 为了要求调度器在运行队列解锁的情况下调用switch_to,你必须在头文件 中`#define __ARCH_WANT_UNLOCKED_CTXSW`(通常是定义switch_to的那个文件)。 @@ -68,7 +67,5 @@ arch/x86/kernel/process.c有轮询和睡眠空闲函数的例子。 我发现的可能的arch问题(并试图解决或没有解决)。: -ia64 - safe_halt的调用与中断相比,是否很荒谬? (它睡眠了吗) (参考 #4a) - sparc - 在这一点上,IRQ是开着的(?),把local_irq_save改为_disable。 - 待办事项: 需要第二个CPU来禁用抢占 (参考 #1) diff --git a/Documentation/translations/zh_CN/subsystem-apis.rst b/Documentation/translations/zh_CN/subsystem-apis.rst new file mode 100644 index 0000000000..47780bb077 --- /dev/null +++ b/Documentation/translations/zh_CN/subsystem-apis.rst @@ -0,0 +1,110 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ./disclaimer-zh_CN.rst + +:Original: Documentation/subsystem-apis.rst + +:翻译: + + 唐艺舟 Tang Yizhou + +============== +内核子系统文档 +============== + +这些书籍从内核开发者的角度,详细介绍了特定内核子系统 +的如何工作。这里的大部分信息直接取自内核源代码,并 +根据需要添加了补充材料(或者至少是我们设法添加的 - 可 +能 *不是* 所有的材料都有需要)。 + +核心子系统 +---------- + +.. toctree:: + :maxdepth: 1 + + core-api/index + driver-api/index + mm/index + power/index + scheduler/index + locking/index + +TODOList: + +* timers/index + +人机接口 +-------- + +.. toctree:: + :maxdepth: 1 + + sound/index + +TODOList: + +* input/index +* hid/index +* gpu/index +* fb/index + +网络接口 +-------- + +.. toctree:: + :maxdepth: 1 + + infiniband/index + +TODOList: + +* networking/index +* netlabel/index +* isdn/index +* mhi/index + +存储接口 +-------- + +.. toctree:: + :maxdepth: 1 + + filesystems/index + +TODOList: + +* block/index +* cdrom/index +* scsi/index +* target/index + +**Fixme**: 这里还需要更多的分类组织工作。 + +.. toctree:: + :maxdepth: 1 + + accounting/index + cpu-freq/index + iio/index + virt/index + PCI/index + peci/index + +TODOList: + +* fpga/index +* i2c/index +* leds/index +* pcmcia/index +* spi/index +* w1/index +* watchdog/index +* hwmon/index +* accel/index +* security/index +* crypto/index +* bpf/index +* usb/index +* misc-devices/index +* wmi/index diff --git a/Documentation/translations/zh_CN/video4linux/v4l2-framework.txt b/Documentation/translations/zh_CN/video4linux/v4l2-framework.txt index a88fcbc11e..9cc97ec75d 100644 --- a/Documentation/translations/zh_CN/video4linux/v4l2-framework.txt +++ b/Documentation/translations/zh_CN/video4linux/v4l2-framework.txt @@ -768,18 +768,6 @@ const char *video_device_node_name(struct video_device *vdev); 此功能,而非访问 video_device::num 和 video_device::minor 域。 -视频缓冲辅助函数 ---------------- - -v4l2 核心 API 提供了一个处理视频缓冲的标准方法(称为“videobuf”)。 -这些方法使驱动可以通过统一的方式实现 read()、mmap() 和 overlay()。 -目前在设备上支持视频缓冲的方法有分散/聚集 DMA(videobuf-dma-sg)、 -线性 DMA(videobuf-dma-contig)以及大多用于 USB 设备的用 vmalloc -分配的缓冲(videobuf-vmalloc)。 - -请参阅 Documentation/driver-api/media/v4l2-videobuf.rst,以获得更多关于 videobuf -层的使用信息。 - v4l2_fh 结构体 ------------- diff --git a/Documentation/translations/zh_TW/admin-guide/README.rst b/Documentation/translations/zh_TW/admin-guide/README.rst index 7fc56e1e33..4cb581f599 100644 --- a/Documentation/translations/zh_TW/admin-guide/README.rst +++ b/Documentation/translations/zh_TW/admin-guide/README.rst @@ -9,16 +9,16 @@ 吳想成 Wu XiangCheng 胡皓文 Hu Haowen -Linux內核5.x版本 +Linux內核6.x版本 ========================================= -以下是Linux版本5的發行註記。仔細閱讀它們, +以下是Linux版本6的發行註記。仔細閱讀它們, 它們會告訴你這些都是什麼,解釋如何安裝內核,以及遇到問題時該如何做。 什麼是Linux? --------------- - Linux是Unix作業系統的克隆版本,由Linus Torvalds在一個鬆散的網絡黑客 + Linux是Unix操作系統的克隆版本,由Linus Torvalds在一個鬆散的網絡黑客 (Hacker,無貶義)團隊的幫助下從頭開始編寫。它旨在實現兼容POSIX和 單一UNIX規範。 @@ -28,7 +28,7 @@ Linux內核5.x版本 Linux在GNU通用公共許可證,版本2(GNU GPLv2)下分發,詳見隨附的COPYING文件。 -它能在什麼樣的硬體上運行? +它能在什麼樣的硬件上運行? ----------------------------- 雖然Linux最初是爲32位的x86 PC機(386或更高版本)開發的,但今天它也能運行在 @@ -40,16 +40,16 @@ Linux內核5.x版本 單元(PMMU)和一個移植的GNU C編譯器(gcc;GNU Compiler Collection,GCC的一 部分)。Linux也被移植到許多沒有PMMU的體系架構中,儘管功能顯然受到了一定的 限制。 - Linux也被移植到了其自己上。現在可以將內核作爲用戶空間應用程式運行——這被 + Linux也被移植到了其自己上。現在可以將內核作爲用戶空間應用程序運行——這被 稱爲用戶模式Linux(UML)。 文檔 ----- -網際網路上和書籍上都有大量的電子文檔,既有Linux專屬文檔,也有與一般UNIX問題相關 +因特網上和書籍上都有大量的電子文檔,既有Linux專屬文檔,也有與一般UNIX問題相關 的文檔。我建議在任何Linux FTP站點上查找LDP(Linux文檔項目)書籍的文檔子目錄。 本自述文件並不是關於系統的文檔:有更好的可用資源。 - - 網際網路上和書籍上都有大量的(電子)文檔,既有Linux專屬文檔,也有與普通 + - 因特網上和書籍上都有大量的(電子)文檔,既有Linux專屬文檔,也有與普通 UNIX問題相關的文檔。我建議在任何有LDP(Linux文檔項目)書籍的Linux FTP 站點上查找文檔子目錄。本自述文件並不是關於系統的文檔:有更好的可用資源。 @@ -58,33 +58,33 @@ Linux內核5.x版本 :ref:`Documentation/process/changes.rst ` 文件,它包含了升級內核 可能會導致的問題的相關信息。 -安裝內核原始碼 +安裝內核源代碼 --------------- - - 如果您要安裝完整的原始碼,請把內核tar檔案包放在您有權限的目錄中(例如您 + - 如果您要安裝完整的源代碼,請把內核tar檔案包放在您有權限的目錄中(例如您 的主目錄)並將其解包:: - xz -cd linux-5.x.tar.xz | tar xvf - + xz -cd linux-6.x.tar.xz | tar xvf - - 將「X」替換成最新內核的版本號。 + 將“X”替換成最新內核的版本號。 - 【不要】使用 /usr/src/linux 目錄!這裡有一組庫頭文件使用的內核頭文件 + 【不要】使用 /usr/src/linux 目錄!這裏有一組庫頭文件使用的內核頭文件 (通常是不完整的)。它們應該與庫匹配,而不是被內核的變化搞得一團糟。 - - 您還可以通過打補丁在5.x版本之間升級。補丁以xz格式分發。要通過打補丁進行 - 安裝,請獲取所有較新的補丁文件,進入內核原始碼(linux-5.x)的目錄並 + - 您還可以通過打補丁在6.x版本之間升級。補丁以xz格式分發。要通過打補丁進行 + 安裝,請獲取所有較新的補丁文件,進入內核源代碼(linux-6.x)的目錄並 執行:: - xz -cd ../patch-5.x.xz | patch -p1 + xz -cd ../patch-6.x.xz | patch -p1 - 請【按順序】替換所有大於當前原始碼樹版本的「x」,這樣就可以了。您可能想要 + 請【按順序】替換所有大於當前源代碼樹版本的“x”,這樣就可以了。您可能想要 刪除備份文件(文件名類似xxx~ 或 xxx.orig),並確保沒有失敗的補丁(文件名 類似xxx# 或 xxx.rej)。如果有,不是你就是我犯了錯誤。 - 與5.x內核的補丁不同,5.x.y內核(也稱爲穩定版內核)的補丁不是增量的,而是 - 直接應用於基本的5.x內核。例如,如果您的基本內核是5.0,並且希望應用5.0.3 - 補丁,則不應先應用5.0.1和5.0.2的補丁。類似地,如果您運行的是5.0.2內核, - 並且希望跳轉到5.0.3,那麼在應用5.0.3補丁之前,必須首先撤銷5.0.2補丁 + 與6.x內核的補丁不同,6.x.y內核(也稱爲穩定版內核)的補丁不是增量的,而是 + 直接應用於基本的6.x內核。例如,如果您的基本內核是6.0,並且希望應用6.0.3 + 補丁,則不應先應用6.0.1和6.0.2的補丁。類似地,如果您運行的是6.0.2內核, + 並且希望跳轉到6.0.3,那麼在應用6.0.3補丁之前,必須首先撤銷6.0.2補丁 (即patch -R)。更多關於這方面的內容,請閱讀 :ref:`Documentation/process/applying-patches.rst ` 。 @@ -93,7 +93,7 @@ Linux內核5.x版本 linux/scripts/patch-kernel linux - 上面命令中的第一個參數是內核原始碼的位置。補丁是在當前目錄應用的,但是 + 上面命令中的第一個參數是內核源代碼的位置。補丁是在當前目錄應用的,但是 可以將另一個目錄指定爲第二個參數。 - 確保沒有過時的 .o 文件和依賴項:: @@ -101,30 +101,30 @@ Linux內核5.x版本 cd linux make mrproper - 現在您應該已經正確安裝了原始碼。 + 現在您應該已經正確安裝了源代碼。 -軟體要求 +軟件要求 --------- - 編譯和運行5.x內核需要各種軟體包的最新版本。請參考 + 編譯和運行6.x內核需要各種軟件包的最新版本。請參考 :ref:`Documentation/process/changes.rst ` - 來了解最低版本要求以及如何升級軟體包。請注意,使用過舊版本的這些包可能會 + 來了解最低版本要求以及如何升級軟件包。請注意,使用過舊版本的這些包可能會 導致很難追蹤的間接錯誤,因此不要以爲在生成或操作過程中出現明顯問題時可以 只更新包。 爲內核建立目錄 --------------- - 編譯內核時,默認情況下所有輸出文件都將與內核原始碼放在一起。使用 + 編譯內核時,默認情況下所有輸出文件都將與內核源代碼放在一起。使用 ``make O=output/dir`` 選項可以爲輸出文件(包括 .config)指定備用位置。 例如:: - kernel source code: /usr/src/linux-5.x + kernel source code: /usr/src/linux-6.x build directory: /home/name/build/kernel 要配置和構建內核,請使用:: - cd /usr/src/linux-5.x + cd /usr/src/linux-6.x make O=/home/name/build/kernel menuconfig make O=/home/name/build/kernel sudo make O=/home/name/build/kernel modules_install install @@ -136,7 +136,7 @@ Linux內核5.x版本 即使只升級一個小版本,也不要跳過此步驟。每個版本中都會添加新的配置選項, 如果配置文件沒有按預定設置,就會出現奇怪的問題。如果您想以最少的工作量 - 將現有配置升級到新版本,請使用 ``makeoldconfig`` ,它只會詢問您新配置 + 將現有配置升級到新版本,請使用 ``make oldconfig`` ,它只會詢問您新配置 選項的答案。 - 其他配置命令包括:: @@ -164,17 +164,17 @@ Linux內核5.x版本 "make ${PLATFORM}_defconfig" 使用arch/$arch/configs/${PLATFORM}_defconfig中 的默認選項值創建一個./.config文件。 - 用「makehelp」來獲取您體系架構中所有可用平台的列表。 + 用“make help”來獲取您體系架構中所有可用平臺的列表。 "make allyesconfig" - 通過儘可能將選項值設置爲「y」,創建一個 + 通過儘可能將選項值設置爲“y”,創建一個 ./.config文件。 "make allmodconfig" - 通過儘可能將選項值設置爲「m」,創建一個 + 通過儘可能將選項值設置爲“m”,創建一個 ./.config文件。 - "make allnoconfig" 通過儘可能將選項值設置爲「n」,創建一個 + "make allnoconfig" 通過儘可能將選項值設置爲“n”,創建一個 ./.config文件。 "make randconfig" 通過隨機設置選項值來創建./.config文件。 @@ -182,7 +182,7 @@ Linux內核5.x版本 "make localmodconfig" 基於當前配置和加載的模塊(lsmod)創建配置。禁用 已加載的模塊不需要的任何模塊選項。 - 要爲另一台計算機創建localmodconfig,請將該計算機 + 要爲另一臺計算機創建localmodconfig,請將該計算機 的lsmod存儲到一個文件中,並將其作爲lsmod參數傳入。 此外,通過在參數LMC_KEEP中指定模塊的路徑,可以將 @@ -200,9 +200,10 @@ Linux內核5.x版本 "make localyesconfig" 與localmodconfig類似,只是它會將所有模塊選項轉換 爲內置(=y)。你可以同時通過LMC_KEEP保留模塊。 - "make kvmconfig" 爲kvm客體內核支持啓用其他選項。 + "make kvm_guest.config" + 爲kvm客戶機內核支持啓用其他選項。 - "make xenconfig" 爲xen dom0客體內核支持啓用其他選項。 + "make xen.config" 爲xen dom0客戶機內核支持啓用其他選項。 "make tinyconfig" 配置儘可能小的內核。 @@ -218,10 +219,10 @@ Linux內核5.x版本 這種情況下,數學仿真永遠不會被使用。內核會稍微大一點,但不管 是否有數學協處理器,都可以在不同的機器上工作。 - - 「kernel hacking」配置細節通常會導致更大或更慢的內核(或兩者 + - “kernel hacking”配置細節通常會導致更大或更慢的內核(或兩者 兼而有之),甚至可以通過配置一些例程來主動嘗試破壞壞代碼以發現 內核問題,從而降低內核的穩定性(kmalloc())。因此,您可能應該 - 用於研究「開發」、「實驗」或「調試」特性相關問題。 + 用於研究“開發”、“實驗”或“調試”特性相關問題。 編譯內核 --------- @@ -229,10 +230,8 @@ Linux內核5.x版本 - 確保您至少有gcc 5.1可用。 有關更多信息,請參閱 :ref:`Documentation/process/changes.rst ` 。 - 請注意,您仍然可以使用此內核運行a.out用戶程序。 - - 執行 ``make`` 來創建壓縮內核映像。如果您安裝了lilo以適配內核makefile, - 那麼也可以進行 ``makeinstall`` ,但是您可能需要先檢查特定的lilo設置。 + 那麼也可以進行 ``make install`` ,但是您可能需要先檢查特定的lilo設置。 實際安裝必須以root身份執行,但任何正常構建都不需要。 無須徒然使用root身份。 @@ -242,8 +241,8 @@ Linux內核5.x版本 - 詳細的內核編譯/生成輸出: 通常,內核構建系統在相當安靜的模式下運行(但不是完全安靜)。但是有時您或 - 其他內核開發人員需要看到編譯、連結或其他命令的執行過程。爲此,可使用 - 「verbose(詳細)」構建模式。 + 其他內核開發人員需要看到編譯、鏈接或其他命令的執行過程。爲此,可使用 + “verbose(詳細)”構建模式。 向 ``make`` 命令傳遞 ``V=1`` 來實現,例如:: make V=1 all @@ -255,15 +254,15 @@ Linux內核5.x版本 與工作內核版本號相同的新內核,請在進行 ``make modules_install`` 安裝 之前備份modules目錄。 - 或者,在編譯之前,使用內核配置選項「LOCALVERSION」向常規內核版本附加 - 一個唯一的後綴。LOCALVERSION可以在「General Setup」菜單中設置。 + 或者,在編譯之前,使用內核配置選項“LOCALVERSION”向常規內核版本附加 + 一個唯一的後綴。LOCALVERSION可以在“General Setup”菜單中設置。 - 爲了引導新內核,您需要將內核映像(例如編譯後的 .../linux/arch/x86/boot/bzImage)複製到常規可引導內核的位置。 - 不再支持在沒有LILO等啓動裝載程序幫助的情況下直接從軟盤引導內核。 - 如果從硬碟引導Linux,很可能使用LILO,它使用/etc/lilo.conf文件中 + 如果從硬盤引導Linux,很可能使用LILO,它使用/etc/lilo.conf文件中 指定的內核映像文件。內核映像文件通常是/vmlinuz、/boot/vmlinuz、 /bzImage或/boot/bzImage。使用新內核前,請保存舊映像的副本,並複製 新映像覆蓋舊映像。然後您【必須重新運行LILO】來更新加載映射!否則, @@ -284,68 +283,13 @@ Linux內核5.x版本 若遇到問題 ----------- - - 如果您發現了一些可能由於內核缺陷所導致的問題,請檢查MAINTAINERS(維護者) - 文件看看是否有人與令您遇到麻煩的內核部分相關。如果無人在此列出,那麼第二 - 個最好的方案就是把它們發給我(torvalds@linux-foundation.org),也可能發送 - 到任何其他相關的郵件列表或新聞組。 - - - 在所有的缺陷報告中,【請】告訴我們您在說什麼內核,如何復現問題,以及您的 - 設置是什麼的(使用您的常識)。如果問題是新的,請告訴我;如果問題是舊的, - 請嘗試告訴我您什麼時候首次注意到它。 - - - 如果缺陷導致如下消息:: - - unable to handle kernel paging request at address C0000010 - Oops: 0002 - EIP: 0010:XXXXXXXX - eax: xxxxxxxx ebx: xxxxxxxx ecx: xxxxxxxx edx: xxxxxxxx - esi: xxxxxxxx edi: xxxxxxxx ebp: xxxxxxxx - ds: xxxx es: xxxx fs: xxxx gs: xxxx - Pid: xx, process nr: xx - xx xx xx xx xx xx xx xx xx xx - - 或者類似的內核調試信息顯示在屏幕上或在系統日誌里,請【如實】複製它。 - 可能對你來說轉儲(dump)看起來不可理解,但它確實包含可能有助於調試問題的 - 信息。轉儲上方的文本也很重要:它說明了內核轉儲代碼的原因(在上面的示例中, - 是由於內核指針錯誤)。更多關於如何理解轉儲的信息,請參見 - Documentation/admin-guide/bug-hunting.rst。 - - - 如果使用 CONFIG_KALLSYMS 編譯內核,則可以按原樣發送轉儲,否則必須使用 - ``ksymoops`` 程序來理解轉儲(但通常首選使用CONFIG_KALLSYMS編譯)。 - 此實用程序可從 - https://www.kernel.org/pub/linux/utils/kernel/ksymoops/ 下載。 - 或者,您可以手動執行轉儲查找: - - - 在調試像上面這樣的轉儲時,如果您可以查找EIP值的含義,這將非常有幫助。 - 十六進位值本身對我或其他任何人都沒有太大幫助:它會取決於特定的內核設置。 - 您應該做的是從EIP行獲取十六進位值(忽略 ``0010:`` ),然後在內核名字列表 - 中查找它,以查看哪個內核函數包含有問題的地址。 - - 要找到內核函數名,您需要找到與顯示症狀的內核相關聯的系統二進位文件。就是 - 文件「linux/vmlinux」。要提取名字列表並將其與內核崩潰中的EIP進行匹配, - 請執行:: - - nm vmlinux | sort | less - - 這將爲您提供一個按升序排序的內核地址列表,從中很容易找到包含有問題的地址 - 的函數。請注意,內核調試消息提供的地址不一定與函數地址完全匹配(事實上, - 這是不可能的),因此您不能只「grep」列表:不過列表將爲您提供每個內核函數 - 的起點,因此通過查找起始地址低於你正在搜索的地址,但後一個函數的高於的 - 函數,你會找到您想要的。實際上,在您的問題報告中加入一些「上下文」可能是 - 一個好主意,給出相關的上下幾行。 - - 如果您由於某些原因無法完成上述操作(如您使用預編譯的內核映像或類似的映像), - 請儘可能多地告訴我您的相關設置信息,這會有所幫助。有關詳細信息請閱讀 - 『Documentation/admin-guide/reporting-issues.rst』。 - - - 或者,您可以在正在運行的內核上使用gdb(只讀的;即不能更改值或設置斷點)。 - 爲此,請首先使用-g編譯內核;適當地編輯arch/x86/Makefile,然後執行 ``make - clean`` 。您還需要啓用CONFIG_PROC_FS(通過 ``make config`` )。 - - 使用新內核重新啓動後,執行 ``gdb vmlinux /proc/kcore`` 。現在可以使用所有 - 普通的gdb命令。查找系統崩潰點的命令是 ``l *0xXXXXXXXX`` (將xxx替換爲EIP - 值)。 - - 用gdb無法調試一個當前未運行的內核是由於gdb(錯誤地)忽略了編譯內核的起始 - 偏移量。 +如果您發現了一些可能由於內核缺陷所導致的問題,請參閱: +Documentation/translations/zh_CN/admin-guide/reporting-issues.rst 。 + +想要理解內核錯誤報告,請參閱: +Documentation/translations/zh_CN/admin-guide/bug-hunting.rst 。 + +更多用GDB調試內核的信息,請參閱: +Documentation/translations/zh_CN/dev-tools/gdb-kernel-debugging.rst +和 Documentation/dev-tools/kgdb.rst 。 diff --git a/Documentation/translations/zh_TW/admin-guide/bootconfig.rst b/Documentation/translations/zh_TW/admin-guide/bootconfig.rst new file mode 100644 index 0000000000..abac5aa60f --- /dev/null +++ b/Documentation/translations/zh_TW/admin-guide/bootconfig.rst @@ -0,0 +1,294 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../disclaimer-zh_TW.rst + +:Original: Documentation/admin-guide/bootconfig.rst + +:譯者: 吳想成 Wu XiangCheng + +======== +引導配置 +======== + +:作者: Masami Hiramatsu + +概述 +==== + +引導配置擴展了現有的內核命令行,以一種更有效率的方式在引導內核時進一步支持 +鍵值數據。這允許管理員傳遞一份結構化關鍵字的配置文件。 + +配置文件語法 +============ + +引導配置文件的語法採用非常簡單的鍵值結構。每個關鍵字由點連接的單詞組成,鍵 +和值由 ``=`` 連接。值以分號( ``;`` )或換行符( ``\n`` )結尾。數組值中每 +個元素由逗號( ``,`` )分隔。:: + + KEY[.WORD[...]] = VALUE[, VALUE2[...]][;] + +與內核命令行語法不同,逗號和 ``=`` 周圍允許有空格。 + +關鍵字只允許包含字母、數字、連字符( ``-`` )和下劃線( ``_`` )。值可包含 +可打印字符和空格,但分號( ``;`` )、換行符( ``\n`` )、逗號( ``,`` )、 +井號( ``#`` )和右大括號( ``}`` )等分隔符除外。 + +如果你需要在值中使用這些分隔符,可以用雙引號( ``"VALUE"`` )或單引號 +( ``'VALUE'`` )括起來。注意,引號無法轉義。 + +鍵的值可以爲空或不存在。這些鍵用於檢查該鍵是否存在(類似布爾值)。 + +鍵值語法 +-------- + +引導配置文件語法允許用戶通過大括號合併鍵名部分相同的關鍵字。例如:: + + foo.bar.baz = value1 + foo.bar.qux.quux = value2 + +也可以寫成:: + + foo.bar { + baz = value1 + qux.quux = value2 + } + +或者更緊湊一些,寫成:: + + foo.bar { baz = value1; qux.quux = value2 } + +在這兩種樣式中,引導解析時相同的關鍵字都會自動合併。因此可以追加類似的樹或 +鍵值。 + +相同關鍵字的值 +-------------- + +禁止兩個或多個值或數組共享同一個關鍵字。例如:: + + foo = bar, baz + foo = qux # !錯誤! 我們不可以重定義相同的關鍵字 + +如果你想要更新值,必須顯式使用覆蓋操作符 ``:=`` 。例如:: + + foo = bar, baz + foo := qux + +這樣 ``foo`` 關鍵字的值就變成了 ``qux`` 。這對於通過添加(部分)自定義引導 +配置來覆蓋默認值非常有用,免於解析默認引導配置。 + +如果你想對現有關鍵字追加值作爲數組成員,可以使用 ``+=`` 操作符。例如:: + + foo = bar, baz + foo += qux + +這樣, ``foo`` 關鍵字就同時擁有了 ``bar`` , ``baz`` 和 ``qux`` 。 + +此外,父關鍵字下可同時存在值和子關鍵字。 +例如,下列配置是可行的。:: + + foo = value1 + foo.bar = value2 + foo := value3 # 這會更新foo的值。 + +注意,裸值不能直接放進結構化關鍵字中,必須在大括號外定義它。例如:: + + foo { + bar = value1 + bar { + baz = value2 + qux = value3 + } + } + +同時,關鍵字下值節點的順序是固定的。如果值和子關鍵字同時存在,值永遠是該關 +鍵字的第一個子節點。因此如果用戶先指定子關鍵字,如:: + + foo.bar = value1 + foo = value2 + +則在程序(和/proc/bootconfig)中,它會按如下顯示:: + + foo = value2 + foo.bar = value1 + +註釋 +---- + +配置語法接受shell腳本風格的註釋。註釋以井號( ``#`` )開始,到換行符 +( ``\n`` )結束。 + +:: + + # comment line + foo = value # value is set to foo. + bar = 1, # 1st element + 2, # 2nd element + 3 # 3rd element + +會被解析爲:: + + foo = value + bar = 1, 2, 3 + +注意你不能把註釋放在值和分隔符( ``,`` 或 ``;`` )之間。如下配置語法是錯誤的:: + + key = 1 # comment + ,2 + + +/proc/bootconfig +================ + +/proc/bootconfig是引導配置的用戶空間接口。與/proc/cmdline不同,此文件內容以 +鍵值列表樣式顯示。 +每個鍵值對一行,樣式如下:: + + KEY[.WORDS...] = "[VALUE]"[,"VALUE2"...] + + +用引導配置引導內核 +================== + +用引導配置引導內核有兩種方法:將引導配置附加到initrd鏡像或直接嵌入內核中。 + +*initrd: initial RAM disk,初始內存磁盤* + +將引導配置附加到initrd +---------------------- + +由於默認情況下引導配置文件是用initrd加載的,因此它將被添加到initrd(initramfs) +鏡像文件的末尾,其中包含填充、大小、校驗值和12字節幻數,如下所示:: + + [initrd][bootconfig][padding][size(le32)][checksum(le32)][#BOOTCONFIG\n] + +大小和校驗值爲小端序存放的32位無符號值。 + +當引導配置被加到initrd鏡像時,整個文件大小會對齊到4字節。空字符( ``\0`` ) +會填補對齊空隙。因此 ``size`` 就是引導配置文件的長度+填充的字節。 + +Linux內核在內存中解碼initrd鏡像的最後部分以獲取引導配置數據。由於這種“揹負式” +的方法,只要引導加載器傳遞了正確的initrd文件大小,就無需更改或更新引導加載器 +和內核鏡像本身。如果引導加載器意外傳遞了更長的大小,內核將無法找到引導配置數 +據。 + +Linux內核在tools/bootconfig下提供了 ``bootconfig`` 命令來完成此操作,管理員 +可以用它從initrd鏡像中刪除或追加配置文件。你可以用以下命令來構建它:: + + # make -C tools/bootconfig + +要向initrd鏡像添加你的引導配置文件,請按如下命令操作(舊數據會自動移除):: + + # tools/bootconfig/bootconfig -a your-config /boot/initrd.img-X.Y.Z + +要從鏡像中移除配置,可以使用-d選項:: + + # tools/bootconfig/bootconfig -d /boot/initrd.img-X.Y.Z + +然後在內核命令行上添加 ``bootconfig`` 告訴內核去initrd文件末尾尋找內核配置。 + +將引導配置嵌入內核 +------------------ + +如果你不能使用initrd,也可以通過Kconfig選項將引導配置文件嵌入內核中。在此情 +況下,你需要用以下選項重新編譯內核:: + + CONFIG_BOOT_CONFIG_EMBED=y + CONFIG_BOOT_CONFIG_EMBED_FILE="/引導配置/文件/的/路徑" + +``CONFIG_BOOT_CONFIG_EMBED_FILE`` 需要從源碼樹或對象樹開始的引導配置文件的 +絕對/相對路徑。內核會將其嵌入作爲默認引導配置。 + +與將引導配置附加到initrd一樣,你也需要在內核命令行上添加 ``bootconfig`` 告訴 +內核去啓用內嵌的引導配置。 + +注意,即使你已經設置了此選項,仍可用附加到initrd的其他引導配置覆蓋內嵌的引導 +配置。 + +通過引導配置傳遞內核參數 +======================== + +除了內核命令行,引導配置也可以用於傳遞內核參數。所有 ``kernel`` 關鍵字下的鍵 +值對都將直接傳遞給內核命令行。此外, ``init`` 下的鍵值對將通過命令行傳遞給 +init進程。參數按以下順序與用戶給定的內核命令行字符串相連,因此命令行參數可以 +覆蓋引導配置參數(這取決於子系統如何處理參數,但通常前面的參數將被後面的參數 +覆蓋):: + + [bootconfig params][cmdline params] -- [bootconfig init params][cmdline init params] + +如果引導配置文件給出的kernel/init參數是:: + + kernel { + root = 01234567-89ab-cdef-0123-456789abcd + } + init { + splash + } + +這將被複制到內核命令行字符串中,如下所示:: + + root="01234567-89ab-cdef-0123-456789abcd" -- splash + +如果用戶給出的其他命令行是:: + + ro bootconfig -- quiet + +則最後的內核命令行如下:: + + root="01234567-89ab-cdef-0123-456789abcd" ro bootconfig -- splash quiet + + +配置文件的限制 +============== + +當前最大的配置大小是32KB,關鍵字總數(不是鍵值條目)必須少於1024個節點。 +注意:這不是條目數而是節點數,條目必須消耗超過2個節點(一個關鍵字和一個值)。 +所以從理論上講最多512個鍵值對。如果關鍵字平均包含3個單詞,則可有256個鍵值對。 +在大多數情況下,配置項的數量將少於100個條目,小於8KB,因此這應該足夠了。如果 +節點數超過1024,解析器將返回錯誤,即使文件大小小於32KB。(請注意,此最大尺寸 +不包括填充的空字符。) +無論如何,因爲 ``bootconfig`` 命令在附加啓動配置到initrd映像時會驗證它,用戶 +可以在引導之前注意到它。 + + +引導配置API +=========== + +用戶可以查詢或遍歷鍵值對,也可以查找(前綴)根關鍵字節點,並在查找該節點下的 +鍵值。 + +如果您有一個關鍵字字符串,則可以直接使用 xbc_find_value() 查詢該鍵的值。如果 +你想知道引導配置裏有哪些關鍵字,可以使用 xbc_for_each_key_value() 迭代鍵值對。 +請注意,您需要使用 xbc_array_for_each_value() 訪問數組的值,例如:: + + vnode = NULL; + xbc_find_value("key.word", &vnode); + if (vnode && xbc_node_is_array(vnode)) + xbc_array_for_each_value(vnode, value) { + printk("%s ", value); + } + +如果您想查找具有前綴字符串的鍵,可以使用 xbc_find_node() 通過前綴字符串查找 +節點,然後用 xbc_node_for_each_key_value() 迭代前綴節點下的鍵。 + +但最典型的用法是獲取前綴下的命名值或前綴下的命名數組,例如:: + + root = xbc_find_node("key.prefix"); + value = xbc_node_find_value(root, "option", &vnode); + ... + xbc_node_for_each_array_value(root, "array-option", value, anode) { + ... + } + +這將訪問值“key.prefix.option”的值和“key.prefix.array-option”的數組。 + +鎖是不需要的,因爲在初始化之後配置只讀。如果需要修改,必須複製所有數據和關鍵字。 + + +函數與結構體 +============ + +相關定義的kernel-doc參見: + + - include/linux/bootconfig.h + - lib/bootconfig.c + diff --git a/Documentation/translations/zh_TW/admin-guide/bug-bisect.rst b/Documentation/translations/zh_TW/admin-guide/bug-bisect.rst index b448dbf5ac..3f10a9f8f2 100644 --- a/Documentation/translations/zh_TW/admin-guide/bug-bisect.rst +++ b/Documentation/translations/zh_TW/admin-guide/bug-bisect.rst @@ -17,14 +17,14 @@ 引言 ===== -始終嘗試由來自kernel.org的原始碼構建的最新內核。如果您沒有信心這樣做,請將 +始終嘗試由來自kernel.org的源代碼構建的最新內核。如果您沒有信心這樣做,請將 錯誤報告給您的發行版供應商,而不是內核開發人員。 找到缺陷(bug)並不總是那麼容易,不過仍然得去找。如果你找不到它,不要放棄。 -儘可能多的向相關維護人員報告您發現的信息。請參閱MAINTAINERS文件以了解您所 +儘可能多的向相關維護人員報告您發現的信息。請參閱MAINTAINERS文件以瞭解您所 關注的子系統的維護人員。 -在提交錯誤報告之前,請閱讀「Documentation/admin-guide/reporting-issues.rst」。 +在提交錯誤報告之前,請閱讀“Documentation/admin-guide/reporting-issues.rst”。 設備未出現(Devices not appearing) ==================================== @@ -38,7 +38,7 @@ 操作步驟: -- 從git原始碼構建內核 +- 從git源代碼構建內核 - 以此開始二分 [#f1]_:: $ git bisect start @@ -76,7 +76,7 @@ 如需進一步參考,請閱讀: - ``git-bisect`` 的手冊頁 -- `Fighting regressions with git bisect(用git bisect解決回歸) +- `Fighting regressions with git bisect(用git bisect解決迴歸) `_ - `Fully automated bisecting with "git bisect run"(使用git bisect run 來全自動二分) `_ diff --git a/Documentation/translations/zh_TW/admin-guide/bug-hunting.rst b/Documentation/translations/zh_TW/admin-guide/bug-hunting.rst index 9a3de3bff5..631fd26509 100644 --- a/Documentation/translations/zh_TW/admin-guide/bug-hunting.rst +++ b/Documentation/translations/zh_TW/admin-guide/bug-hunting.rst @@ -48,8 +48,8 @@ [] ? sysenter_past_esp+0x40/0x6a ---[ end trace 6ebc60ef3981792f ]--- -這樣的堆棧跟蹤提供了足夠的信息來識別內核原始碼中發生錯誤的那一行。根據問題的 -嚴重性,它還可能包含 **「Oops」** 一詞,比如:: +這樣的堆棧跟蹤提供了足夠的信息來識別內核源代碼中發生錯誤的那一行。根據問題的 +嚴重性,它還可能包含 **“Oops”** 一詞,比如:: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] iret_exc+0x7d0/0xa59 @@ -58,17 +58,17 @@ ... 儘管有 **Oops** 或其他類型的堆棧跟蹤,但通常需要找到出問題的行來識別和處理缺 -陷。在本章中,我們將參考「Oops」來了解需要分析的各種堆棧跟蹤。 +陷。在本章中,我們將參考“Oops”來了解需要分析的各種堆棧跟蹤。 如果內核是用 ``CONFIG_DEBUG_INFO`` 編譯的,那麼可以使用文件: `scripts/decode_stacktrace.sh` 。 -連結的模塊 +鏈接的模塊 ----------- -受到汙染或正在加載/卸載的模塊用「(…)」標記,汙染標誌在 -`Documentation/admin-guide/tainted-kernels.rst` 文件中進行了描述,「正在被加 -載」用「+」標註,「正在被卸載」用「-」標註。 +受到污染或正在加載/卸載的模塊用“(…)”標記,污染標誌在 +`Documentation/admin-guide/tainted-kernels.rst` 文件中進行了描述,“正在被加 +載”用“+”標註,“正在被卸載”用“-”標註。 Oops消息在哪? @@ -81,19 +81,19 @@ syslog文件,通常是 ``/var/log/messages`` (取決於 ``/etc/syslog.conf`` 有時 ``klogd`` 會掛掉,這種情況下您可以運行 ``dmesg > file`` 從內核緩衝區 讀取數據並保存它。或者您可以 ``cat /proc/kmsg > file`` ,但是您必須適時 -中斷以停止傳輸,因爲 ``kmsg`` 是一個「永無止境的文件」。 +中斷以停止傳輸,因爲 ``kmsg`` 是一個“永無止境的文件”。 -如果機器嚴重崩潰,無法輸入命令或磁碟不可用,那還有三個選項: +如果機器嚴重崩潰,無法輸入命令或磁盤不可用,那還有三個選項: (1) 手動複製屏幕上的文本,並在機器重新啓動後輸入。很難受,但這是突然崩潰下 - 唯一的選擇。或者你可以用數位相機拍下屏幕——雖然不那麼好,但總比什麼都沒 - 有好。如果消息滾動超出控制台頂部,使用更高解析度(例如 ``vga=791`` ) - 引導啓動將允許您閱讀更多文本。(警告:這需要 ``vesafb`` ,因此對「早期」 + 唯一的選擇。或者你可以用數碼相機拍下屏幕——雖然不那麼好,但總比什麼都沒 + 有好。如果消息滾動超出控制檯頂部,使用更高分辨率(例如 ``vga=791`` ) + 引導啓動將允許您閱讀更多文本。(警告:這需要 ``vesafb`` ,因此對“早期” 的Oppses沒有幫助) (2) 從串口終端啓動(參見 :ref:`Documentation/admin-guide/serial-console.rst ` ), - 在另一台機器上運行數據機然後用你喜歡的通信程序捕獲輸出。 + 在另一臺機器上運行調制解調器然後用你喜歡的通信程序捕獲輸出。 Minicom運行良好。 (3) 使用Kdump(參閱 Documentation/admin-guide/kdump/kdump.rst ),使用 @@ -103,7 +103,7 @@ syslog文件,通常是 ``/var/log/messages`` (取決於 ``/etc/syslog.conf`` 找到缺陷位置 ------------- -如果你能指出缺陷在內核原始碼中的位置,則報告缺陷的效果會非常好。這有兩種方法。 +如果你能指出缺陷在內核源代碼中的位置,則報告缺陷的效果會非常好。這有兩種方法。 通常來說使用 ``gdb`` 會比較容易,不過內核需要用調試信息來預編譯。 gdb @@ -187,7 +187,7 @@ GNU 調試器(GNU debugger, ``gdb`` )是從 ``vmlinux`` 文件中找出OOP objdump ^^^^^^^^ -要調試內核,請使用objdump並從崩潰輸出中查找十六進位偏移,以找到有效的代碼/匯 +要調試內核,請使用objdump並從崩潰輸出中查找十六進制偏移,以找到有效的代碼/匯 編行。如果沒有調試符號,您將看到所示例程的彙編程序代碼,但是如果內核有調試 符號,C代碼也將可見(調試符號可以在內核配置菜單的hacking項中啓用)。例如:: @@ -197,7 +197,7 @@ objdump 您需要處於內核樹的頂層以便此獲得您的C文件。 -如果您無法訪問原始碼,仍然可以使用以下方法調試一些崩潰轉儲(如Dave Miller的 +如果您無法訪問源代碼,仍然可以使用以下方法調試一些崩潰轉儲(如Dave Miller的 示例崩潰轉儲輸出所示):: EIP is at +0x14/0x4c0 @@ -234,9 +234,9 @@ objdump 報告缺陷 --------- -一旦你通過定位缺陷找到了其發生的地方,你可以嘗試自己修復它或者向上游報告它。 +一旦你通過定位缺陷找到了其發生的地方,你可以嘗試自己修復它或者向上遊報告它。 -爲了向上游報告,您應該找出用於開發受影響代碼的郵件列表。這可以使用 ``get_maintainer.pl`` 。 +爲了向上遊報告,您應該找出用於開發受影響代碼的郵件列表。這可以使用 ``get_maintainer.pl`` 。 例如,您在gspca的sonixj.c文件中發現一個缺陷,則可以通過以下方法找到它的維護者:: @@ -251,7 +251,7 @@ objdump 請注意它將指出: -- 最後接觸原始碼的開發人員(如果這是在git樹中完成的)。在上面的例子中是Tejun +- 最後接觸源代碼的開發人員(如果這是在git樹中完成的)。在上面的例子中是Tejun 和Bhaktipriya(在這個特定的案例中,沒有人真正參與這個文件的開發); - 驅動維護人員(Hans Verkuil); - 子系統維護人員(Mauro Carvalho Chehab); diff --git a/Documentation/translations/zh_TW/admin-guide/clearing-warn-once.rst b/Documentation/translations/zh_TW/admin-guide/clearing-warn-once.rst index bd0c08aab8..6961006b4a 100644 --- a/Documentation/translations/zh_TW/admin-guide/clearing-warn-once.rst +++ b/Documentation/translations/zh_TW/admin-guide/clearing-warn-once.rst @@ -7,10 +7,10 @@ 清除 WARN_ONCE -------------- -WARN_ONCE / WARN_ON_ONCE / printk_once 僅僅列印一次消息. +WARN_ONCE / WARN_ON_ONCE / printk_once 僅僅打印一次消息. echo 1 > /sys/kernel/debug/clear_warn_once -可以清除這種狀態並且再次允許列印一次告警信息,這對於運行測試集後重現問題 +可以清除這種狀態並且再次允許打印一次告警信息,這對於運行測試集後重現問題 很有用。 diff --git a/Documentation/translations/zh_TW/admin-guide/cpu-load.rst b/Documentation/translations/zh_TW/admin-guide/cpu-load.rst index 9e04aeac1a..cc046f3b7f 100644 --- a/Documentation/translations/zh_TW/admin-guide/cpu-load.rst +++ b/Documentation/translations/zh_TW/admin-guide/cpu-load.rst @@ -20,13 +20,13 @@ Linux通過``/proc/stat``和``/proc/uptime``導出各種信息,用戶空間工 ... -這裡系統認爲在默認採樣周期內有10.01%的時間工作在用戶空間,2.92%的時 +這裏系統認爲在默認採樣週期內有10.01%的時間工作在用戶空間,2.92%的時 間用在系統空間,總體上有81.63%的時間是空閒的。 大多數情況下``/proc/stat``的信息幾乎真實反映了系統信息,然而,由於內 核採集這些數據的方式/時間的特點,有時這些信息根本不可靠。 -那麼這些信息是如何被搜集的呢?每當時間中斷觸發時,內核查看此刻運行的 +那麼這些信息是如何被蒐集的呢?每當時間中斷觸發時,內核查看此刻運行的 進程類型,並增加與此類型/狀態進程對應的計數器的值。這種方法的問題是 在兩次時間中斷之間系統(進程)能夠在多種狀態之間切換多次,而計數器只 增加最後一種狀態下的計數。 @@ -34,7 +34,7 @@ Linux通過``/proc/stat``和``/proc/uptime``導出各種信息,用戶空間工 舉例 --- -假設系統有一個進程以如下方式周期性地占用cpu:: +假設系統有一個進程以如下方式週期性地佔用cpu:: 兩個時鐘中斷之間的時間線 |-----------------------| @@ -46,7 +46,7 @@ Linux通過``/proc/stat``和``/proc/uptime``導出各種信息,用戶空間工 在上面的情況下,根據``/proc/stat``的信息(由於當系統處於空閒狀態時, 時間中斷經常會發生)系統的負載將會是0 -大家能夠想像內核的這種行爲會發生在許多情況下,這將導致``/proc/stat`` +大家能夠想象內核的這種行爲會發生在許多情況下,這將導致``/proc/stat`` 中存在相當古怪的信息:: /* gcc -o hog smallhog.c */ diff --git a/Documentation/translations/zh_TW/admin-guide/cputopology.rst b/Documentation/translations/zh_TW/admin-guide/cputopology.rst new file mode 100644 index 0000000000..5c46d1b3b0 --- /dev/null +++ b/Documentation/translations/zh_TW/admin-guide/cputopology.rst @@ -0,0 +1,97 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../disclaimer-zh_TW.rst + +:Original: Documentation/admin-guide/cputopology.rst + +:翻譯: + + 唐藝舟 Tang Yizhou + +========================== +如何通過sysfs將CPU拓撲導出 +========================== + +CPU拓撲信息通過sysfs導出。顯示的項(屬性)和某些架構的/proc/cpuinfo輸出相似。它們位於 +/sys/devices/system/cpu/cpuX/topology/。請閱讀ABI文件: +Documentation/ABI/stable/sysfs-devices-system-cpu。 + +drivers/base/topology.c是體系結構中性的,它導出了這些屬性。然而,die、cluster、book、 +draw這些層次結構相關的文件僅在體系結構提供了下文描述的宏的條件下被創建。 + +對於支持這個特性的體系結構,它必須在include/asm-XXX/topology.h中定義這些宏中的一部分:: + + #define topology_physical_package_id(cpu) + #define topology_die_id(cpu) + #define topology_cluster_id(cpu) + #define topology_core_id(cpu) + #define topology_book_id(cpu) + #define topology_drawer_id(cpu) + #define topology_sibling_cpumask(cpu) + #define topology_core_cpumask(cpu) + #define topology_cluster_cpumask(cpu) + #define topology_die_cpumask(cpu) + #define topology_book_cpumask(cpu) + #define topology_drawer_cpumask(cpu) + +``**_id macros`` 的類型是int。 +``**_cpumask macros`` 的類型是 ``(const) struct cpumask *`` 。後者和恰當的 +``**_siblings`` sysfs屬性對應(除了topology_sibling_cpumask(),它和thread_siblings +對應)。 + +爲了在所有體系結構上保持一致,include/linux/topology.h提供了上述所有宏的默認定義,以防 +它們未在include/asm-XXX/topology.h中定義: + +1) topology_physical_package_id: -1 +2) topology_die_id: -1 +3) topology_cluster_id: -1 +4) topology_core_id: 0 +5) topology_book_id: -1 +6) topology_drawer_id: -1 +7) topology_sibling_cpumask: 僅入參CPU +8) topology_core_cpumask: 僅入參CPU +9) topology_cluster_cpumask: 僅入參CPU +10) topology_die_cpumask: 僅入參CPU +11) topology_book_cpumask: 僅入參CPU +12) topology_drawer_cpumask: 僅入參CPU + +此外,CPU拓撲信息由/sys/devices/system/cpu提供,包含下述文件。輸出對應的內部數據源放在 +方括號("[]")中。 + + =========== ================================================================== + kernel_max: 內核配置允許的最大CPU下標值。[NR_CPUS-1] + + offline: 由於熱插拔移除或者超過內核允許的CPU上限(上文描述的kernel_max) + 導致未上線的CPU。[~cpu_online_mask + cpus >= NR_CPUS] + + online: 在線的CPU,可供調度使用。[cpu_online_mask] + + possible: 已被分配資源的CPU,如果它們CPU實際存在,可以上線。 + [cpu_possible_mask] + + present: 被系統識別實際存在的CPU。[cpu_present_mask] + =========== ================================================================== + +上述輸出的格式和cpulist_parse()兼容[參見 ]。下面給些例子。 + +在本例中,系統中有64個CPU,但是CPU 32-63超過了kernel_max值,因爲NR_CPUS配置項是32, +取值範圍被限制爲0..31。此外注意CPU2和4-31未上線,但是可以上線,因爲它們同時存在於 +present和possible:: + + kernel_max: 31 + offline: 2,4-31,32-63 + online: 0-1,3 + possible: 0-31 + present: 0-31 + +在本例中,NR_CPUS配置項是128,但內核啓動時設置possible_cpus=144。系統中有4個CPU, +CPU2被手動設置下線(也是唯一一個可以上線的CPU):: + + kernel_max: 127 + offline: 2,4-127,128-143 + online: 0-1,3 + possible: 0-127 + present: 0-3 + +閱讀Documentation/core-api/cpu_hotplug.rst可瞭解開機參數possible_cpus=NUM,同時還 +可以瞭解各種cpumask的信息。 + diff --git a/Documentation/translations/zh_TW/admin-guide/index.rst b/Documentation/translations/zh_TW/admin-guide/index.rst index 2804d61920..aba8939351 100644 --- a/Documentation/translations/zh_TW/admin-guide/index.rst +++ b/Documentation/translations/zh_TW/admin-guide/index.rst @@ -3,13 +3,14 @@ .. include:: ../disclaimer-zh_TW.rst :Original: :doc:`../../../admin-guide/index` -:Translator: 胡皓文 Hu Haowen +:Translator: Alex Shi + 胡皓文 Hu Haowen Linux 內核用戶和管理員指南 ========================== 下面是一組隨時間添加到內核中的面向用戶的文檔的集合。到目前爲止,還沒有一個 -整體的順序或組織 - 這些材料不是一個單一的,連貫的文件!幸運的話,情況會隨著 +整體的順序或組織 - 這些材料不是一個單一的,連貫的文件!幸運的話,情況會隨着 時間的推移而迅速改善。 這個初始部分包含總體信息,包括描述內核的README, 關於內核參數的文檔等。 @@ -21,15 +22,15 @@ Linux 內核用戶和管理員指南 Todolist: - kernel-parameters - devices - sysctl/index +* kernel-parameters +* devices +* sysctl/index 本節介紹CPU漏洞及其緩解措施。 Todolist: - hw-vuln/index +* hw-vuln/index 下面的一組文檔,針對的是試圖跟蹤問題和bug的用戶。 @@ -37,6 +38,7 @@ Todolist: :maxdepth: 1 reporting-issues + reporting-regressions security-bugs bug-hunting bug-bisect @@ -45,18 +47,17 @@ Todolist: Todolist: - reporting-bugs - ramoops - dynamic-debug-howto - kdump/index - perf/index +* ramoops +* dynamic-debug-howto +* kdump/index +* perf/index -這是應用程式開發人員感興趣的章節的開始。可以在這裡找到涵蓋內核ABI各個 +這是應用程序開發人員感興趣的章節的開始。可以在這裏找到涵蓋內核ABI各個 方面的文檔。 Todolist: - sysfs-rules +* sysfs-rules 本手冊的其餘部分包括各種指南,介紹如何根據您的喜好配置內核的特定行爲。 @@ -64,67 +65,67 @@ Todolist: .. toctree:: :maxdepth: 1 + bootconfig clearing-warn-once cpu-load + cputopology + lockup-watchdogs unicode + sysrq + mm/index Todolist: - acpi/index - aoe/index - auxdisplay/index - bcache - binderfs - binfmt-misc - blockdev/index - bootconfig - braille-console - btmrvl - cgroup-v1/index - cgroup-v2 - cifs/index - cputopology - dell_rbu - device-mapper/index - edid - efi-stub - ext4 - nfs/index - gpio/index - highuid - hw_random - initrd - iostats - java - jfs - kernel-per-CPU-kthreads - laptops/index - lcd-panel-cgram - ldm - lockup-watchdogs - LSM/index - md - media/index - mm/index - module-signing - mono - namespaces/index - numastat - parport - perf-security - pm/index - pnp - rapidio - ras - rtc - serial-console - svga - sysrq - thunderbolt - ufs - vga-softcursor - video-output - xfs +* acpi/index +* aoe/index +* auxdisplay/index +* bcache +* binderfs +* binfmt-misc +* blockdev/index +* braille-console +* btmrvl +* cgroup-v1/index +* cgroup-v2 +* cifs/index +* dell_rbu +* device-mapper/index +* edid +* efi-stub +* ext4 +* nfs/index +* gpio/index +* highuid +* hw_random +* initrd +* iostats +* java +* jfs +* kernel-per-CPU-kthreads +* laptops/index +* lcd-panel-cgram +* ldm +* LSM/index +* md +* media/index +* module-signing +* mono +* namespaces/index +* numastat +* parport +* perf-security +* pm/index +* pnp +* rapidio +* ras +* rtc +* serial-console +* svga +* thunderbolt +* ufs +* vga-softcursor +* video-output +* xfs .. only:: subproject and html diff --git a/Documentation/translations/zh_TW/admin-guide/init.rst b/Documentation/translations/zh_TW/admin-guide/init.rst index db3fdf6110..be6e34f5f7 100644 --- a/Documentation/translations/zh_TW/admin-guide/init.rst +++ b/Documentation/translations/zh_TW/admin-guide/init.rst @@ -9,8 +9,8 @@ 吳想成 Wu XiangCheng 胡皓文 Hu Haowen -解釋「No working init found.」啓動掛起消息 -========================================== +解釋“No working init found.”啓動掛起消息 +========================================= :作者: @@ -18,41 +18,41 @@ Cristian Souza -本文檔提供了加載初始化二進位(init binary)失敗的一些高層級原因(大致按執行 +本文檔提供了加載初始化二進制(init binary)失敗的一些高層級原因(大致按執行 順序列出)。 -1) **無法掛載根文件系統Unable to mount root FS** :請設置「debug」內核參數(在 +1) **無法掛載根文件系統Unable to mount root FS** :請設置“debug”內核參數(在 引導加載程序bootloader配置文件或CONFIG_CMDLINE)以獲取更詳細的內核消息。 -2) **初始化二進位不存在於根文件系統上init binary doesn't exist on rootfs** : +2) **初始化二進制不存在於根文件系統上init binary doesn't exist on rootfs** : 確保您的根文件系統類型正確(並且 ``root=`` 內核參數指向正確的分區);擁有 - 所需的驅動程序,例如SCSI或USB等存儲硬體;文件系統(ext3、jffs2等)是內建的 + 所需的驅動程序,例如SCSI或USB等存儲硬件;文件系統(ext3、jffs2等)是內建的 (或者作爲模塊由initrd預加載)。 -3) **控制台設備損壞Broken console device** : ``console= setup`` 中可能存在 - 衝突 --> 初始控制台不可用(initial console unavailable)。例如,由於串行 - IRQ問題(如缺少基於中斷的配置)導致的某些串行控制台不可靠。嘗試使用不同的 +3) **控制檯設備損壞Broken console device** : ``console= setup`` 中可能存在 + 衝突 --> 初始控制檯不可用(initial console unavailable)。例如,由於串行 + IRQ問題(如缺少基於中斷的配置)導致的某些串行控制檯不可靠。嘗試使用不同的 ``console= device`` 或像 ``netconsole=`` 。 -4) **二進位存在但依賴項不可用Binary exists but dependencies not available** : - 例如初始化二進位的必需庫依賴項,像 ``/lib/ld-linux.so.2`` 丟失或損壞。使用 +4) **二進制存在但依賴項不可用Binary exists but dependencies not available** : + 例如初始化二進制的必需庫依賴項,像 ``/lib/ld-linux.so.2`` 丟失或損壞。使用 ``readelf -d |grep NEEDED`` 找出需要哪些庫。 -5) **無法加載二進位Binary cannot be loaded** :請確保二進位的體系結構與您的 - 硬體匹配。例如i386不匹配x86_64,或者嘗試在ARM硬體上加載x86。如果您嘗試在 - 此處加載非二進位文件(shell腳本?),您應該確保腳本在其工作頭(shebang +5) **無法加載二進制Binary cannot be loaded** :請確保二進制的體系結構與您的 + 硬件匹配。例如i386不匹配x86_64,或者嘗試在ARM硬件上加載x86。如果您嘗試在 + 此處加載非二進制文件(shell腳本?),您應該確保腳本在其工作頭(shebang header)行 ``#!/...`` 中指定能正常工作的解釋器(包括其庫依賴項)。在處理 - 腳本之前,最好先測試一個簡單的非腳本二進位文件,比如 ``/bin/sh`` ,並確認 + 腳本之前,最好先測試一個簡單的非腳本二進制文件,比如 ``/bin/sh`` ,並確認 它能成功執行。要了解更多信息,請將代碼添加到 ``init/main.c`` 以顯示 kernel_execve()的返回值。 -當您發現新的失敗原因時,請擴展本解釋(畢竟加載初始化二進位是一個 **關鍵** 且 +當您發現新的失敗原因時,請擴展本解釋(畢竟加載初始化二進制是一個 **關鍵** 且 艱難的過渡步驟,需要儘可能無痛地進行),然後向LKML提交一個補丁。 待辦事項: - 通過一個可以存儲 ``kernel_execve()`` 結果值的結構體數組實現各種 - ``run_init_process()`` 調用,並在失敗時通過疊代 **所有** 結果來記錄一切 + ``run_init_process()`` 調用,並在失敗時通過迭代 **所有** 結果來記錄一切 (非常重要的可用性修復)。 -- 試著使實現本身在一般情況下更有幫助,例如在受影響的地方提供額外的錯誤消息。 +- 試着使實現本身在一般情況下更有幫助,例如在受影響的地方提供額外的錯誤消息。 diff --git a/Documentation/translations/zh_TW/admin-guide/lockup-watchdogs.rst b/Documentation/translations/zh_TW/admin-guide/lockup-watchdogs.rst new file mode 100644 index 0000000000..f65b0c96e8 --- /dev/null +++ b/Documentation/translations/zh_TW/admin-guide/lockup-watchdogs.rst @@ -0,0 +1,67 @@ +.. include:: ../disclaimer-zh_TW.rst + +:Original: Documentation/admin-guide/lockup-watchdogs.rst +:Translator: Hailong Liu + +.. _tw_lockup-watchdogs: + + +================================================= +Softlockup與hardlockup檢測機制(又名:nmi_watchdog) +================================================= + +Linux中內核實現了一種用以檢測系統發生softlockup和hardlockup的看門狗機制。 + +Softlockup是一種會引發系統在內核態中一直循環超過20秒(詳見下面“實現”小節)導致 +其他任務沒有機會得到運行的BUG。一旦檢測到'softlockup'發生,默認情況下系統會打 +印當前堆棧跟蹤信息並進入鎖定狀態。也可配置使其在檢測到'softlockup'後進入panic +狀態;通過sysctl命令設置“kernel.softlockup_panic”、使用內核啓動參數 +“softlockup_panic”(詳見Documentation/admin-guide/kernel-parameters.rst)以及使 +能內核編譯選項“BOOTPARAM_SOFTLOCKUP_PANIC”都可實現這種配置。 + +而'hardlockup'是一種會引發系統在內核態一直循環超過10秒鐘(詳見"實現"小節)導致其 +他中斷沒有機會運行的缺陷。與'softlockup'情況類似,除了使用sysctl命令設置 +'hardlockup_panic'、使能內核選項“BOOTPARAM_HARDLOCKUP_PANIC”以及使用內核參數 +"nmi_watchdog"(詳見:”Documentation/admin-guide/kernel-parameters.rst“)外,一旦檢 +測到'hardlockup'默認情況下系統打印當前堆棧跟蹤信息,然後進入鎖定狀態。 + +這個panic選項也可以與panic_timeout結合使用(這個panic_timeout是通過稍具迷惑性的 +sysctl命令"kernel.panic"來設置),使系統在panic指定時間後自動重啓。 + +實現 +==== + +Softlockup和hardlockup分別建立在hrtimer(高精度定時器)和perf兩個子系統上而實現。 +這也就意味着理論上任何架構只要實現了這兩個子系統就支持這兩種檢測機制。 + +Hrtimer用於週期性產生中斷並喚醒watchdog線程;NMI perf事件則以”watchdog_thresh“ +(編譯時默認初始化爲10秒,也可通過”watchdog_thresh“這個sysctl接口來進行配置修改) +爲間隔週期產生以檢測 hardlockups。如果一個CPU在這個時間段內沒有檢測到hrtimer中 +斷髮生,'hardlockup 檢測器'(即NMI perf事件處理函數)將會視系統配置而選擇產生內核 +警告或者直接panic。 + +而watchdog線程本質上是一個高優先級內核線程,每調度一次就對時間戳進行一次更新。 +如果時間戳在2*watchdog_thresh(這個是softlockup的觸發門限)這段時間都未更新,那麼 +"softlocup 檢測器"(內部hrtimer定時器回調函數)會將相關的調試信息打印到系統日誌中, +然後如果系統配置了進入panic流程則進入panic,否則內核繼續執行。 + +Hrtimer定時器的週期是2*watchdog_thresh/5,也就是說在hardlockup被觸發前hrtimer有 +2~3次機會產生時鐘中斷。 + +如上所述,內核相當於爲系統管理員提供了一個可調節hrtimer定時器和perf事件週期長度 +的調節旋鈕。如何通過這個旋鈕爲特定使用場景配置一個合理的週期值要對lockups檢測的 +響應速度和lockups檢測開銷這二者之間進行權衡。 + +默認情況下所有在線cpu上都會運行一個watchdog線程。不過在內核配置了”NO_HZ_FULL“的 +情況下watchdog線程默認只會運行在管家(housekeeping)cpu上,而”nohz_full“啓動參數指 +定的cpu上則不會有watchdog線程運行。試想,如果我們允許watchdog線程在”nohz_full“指 +定的cpu上運行,這些cpu上必須得運行時鐘定時器來激發watchdog線程調度;這樣一來就會 +使”nohz_full“保護用戶程序免受內核干擾的功能失效。當然,副作用就是”nohz_full“指定 +的cpu即使在內核產生了lockup問題我們也無法檢測到。不過,至少我們可以允許watchdog +線程在管家(non-tickless)核上繼續運行以便我們能繼續正常的監測這些cpus上的lockups +事件。 + +不論哪種情況都可以通過sysctl命令kernel.watchdog_cpumask來對沒有運行watchdog線程 +的cpu集合進行調節。對於nohz_full而言,如果nohz_full cpu上有異常掛住的情況,通過 +這種方式打開這些cpu上的watchdog進行調試可能會有所作用。 + diff --git a/Documentation/translations/zh_TW/admin-guide/mm/damon/index.rst b/Documentation/translations/zh_TW/admin-guide/mm/damon/index.rst new file mode 100644 index 0000000000..a472eb3c70 --- /dev/null +++ b/Documentation/translations/zh_TW/admin-guide/mm/damon/index.rst @@ -0,0 +1,30 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../../../disclaimer-zh_TW.rst + +:Original: Documentation/admin-guide/mm/damon/index.rst + +:翻譯: + + 司延騰 Yanteng Si + +:校譯: + +============ +監測數據訪問 +============ + +:doc:`DAMON ` 允許輕量級的數據訪問監測。使用DAMON, +用戶可以分析他們系統的內存訪問模式,並優化它們。 + +.. toctree:: + :maxdepth: 2 + + start + usage + reclaim + lru_sort + + + + + diff --git a/Documentation/translations/zh_TW/admin-guide/mm/damon/lru_sort.rst b/Documentation/translations/zh_TW/admin-guide/mm/damon/lru_sort.rst new file mode 100644 index 0000000000..1ffc4b6b1d --- /dev/null +++ b/Documentation/translations/zh_TW/admin-guide/mm/damon/lru_sort.rst @@ -0,0 +1,264 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../../../disclaimer-zh_TW.rst + +:Original: Documentation/admin-guide/mm/damon/lru_sort.rst + +:翻譯: + + 臧雷剛 Leigang Zang + +:校譯: + +================== +基於DAMON的LRU排序 +================== + +基於DAMON的LRU排序是一個靜態的內核模塊,旨在用於以主動的、輕量級的數據訪問模型 +爲基礎的頁面優先級處理的LRU鏈表上,以使得LRU上的數據訪問模型更爲可信。 + +哪裏需要主動的LRU排序 +===================== + +在一個大型系統中,以頁爲粒度的訪問檢測會有比較顯著的開銷,LRU通常不會主動去排序, +而是對部分特殊事件進行部分的、響應式的排序,例如:特殊的用戶請求,系統調用或者 +內存壓力。這導致,在有些場景下,LRU不能夠完美的作爲一個可信的數據訪問模型,比如 +在內存壓力下對目標內存進行回收。 + +因爲DAMON能夠儘可能準確的識別數據訪問模型,同時只引起用戶指定範圍的開銷,主動的 +執行DAMON_LRU_SORT讓LRU變得更爲可信是有益的,而且這隻需要較少和可控的開銷。 + +這是如何工作的 +============== + +DAMON_LRU_SORT使用DAMON尋找熱頁(範圍內的頁面訪問頻率高於用戶指定的閾值)和冷頁 +(範圍內的頁面在超過用戶指定的時間無訪問),並提高熱頁和降低冷頁在LRU中的優先級。 +爲了避免在排序過程佔用更多的CPU計算資源,可以設置一個CPU佔用時間的約束值。在約 +束下,分別提升或者降低更多的熱頁和冷頁。系統管理員也可以配置三個內存水位以控制 +在何種條件下自動激活或者停止這種機制。 + +冷熱閾值和CPU約束的默認值是比較保守的。這意味着,在默認參數下,模塊可以廣泛且無 +負作用的使用在常見環境中,同時在只消耗一小部分CPU時間的情況下,給有內存壓力的系 +統提供一定水平的冷熱識別。 + +接口:模塊參數 +============== + +使用此特性,你首先需要確認你的系統中運行的內核在編譯時啓用了 +``CONFIG_DAMON_LRU_SORT=y``. + +爲了讓系統管理員打開或者關閉並且調節指定的系統,DAMON_LRU_SORT設計了模塊參數。 +這意味着,你可以添加 ``damon_lru_sort.=`` 到內核的啓動命令行 +參數,或者在 ``/sys/modules/damon_lru_sort/parameters/`` 寫入正確的 +值。 + +下邊是每個參數的描述 + +enabled +------- + +打開或者關閉DAMON_LRU_SORT. + +你可以通過設置這個參數爲 ``Y`` 來打開DAMON_LRU_SORT。設置爲 ``N`` 關閉 +DAMON_LRU_SORT。注意,在基於水位的激活的情況下,DAMON_LRU_SORT有可能不會真正去 +監測或者做LRU排序。對這種情況,參考下方關於水位的描述。 + +commit_inputs +------------- + +讓DAMON_LRU_SORT再次讀取輸入參數,除了 ``enabled`` 。 + +在DAMON_LRU_SORT運行時,新的輸入參數默認不會被應用。一旦這個參數被設置爲 ``Y`` +,DAMON_LRU_SORT會再次讀取除了 ``enabled`` 之外的參數。讀取完成後,這個參數會被 +設置爲 ``N`` 。如果在讀取時發現有無效參數,DAMON_LRU_SORT會被關閉。 + +hot_thres_access_freq +--------------------- + +熱點內存區域的訪問頻率閾值,千分比。 + +如果一個內存區域的訪問頻率大於等於這個值,DAMON_LRU_SORT把這個區域看作熱區,並 +在LRU上把這個區域標記爲已訪問,因些在內存壓力下這部分內存不會被回收。默認爲50%。 + +cold_min_age +------------ + +用於識別冷內存區域的時間閾值,單位是微秒。 + +如果一個內存區域在這個時間內未被訪問過,DAMON_LRU_SORT把這個區域看作冷區,並在 +LRU上把這個區域標記爲未訪問,因此在內存壓力下這些內存會首先被回收。默認值爲120 +秒。 + +quota_ms +-------- + +嘗試LRU鏈表排序的時間限制,單位是毫秒。 + +DAMON_LRU_SORT在一個時間窗口內(quota_reset_interval_ms)內最多嘗試這麼長時間來 +對LRU進行排序。這個可以用來作爲CPU計算資源的約束。如果值爲0,則表示無限制。 + +默認10毫秒。 + +quota_reset_interval_ms +----------------------- + +配額計時重置週期,毫秒。 + +配額計時重置週期。即,在quota_reset_interval_ms毫秒內,DAMON_LRU_SORT對LRU進行 +排序不會超過quota_ms或者quota_sz。 + +默認1秒。 + +wmarks_interval +--------------- + +水位的檢查週期,單位是微秒。 + +當DAMON_LRU_SORT使能但是由於水位而不活躍時檢查水位前最小的等待時間。默認值5秒。 + +wmarks_high +----------- + +空閒內存高水位,千分比。 + +如果空閒內存水位高於這個值,DAMON_LRU_SORT停止工作,不做任何事,除了週期性的檢 +查水位。默認200(20%)。 + +wmarks_mid +---------- + +空閒內存中間水位,千分比。 + +如果空閒內存水位在這個值與低水位之間,DAMON_LRU_SORT開始工作,開始檢測並對LRU鏈 +表進行排序。默認150(15%)。 + +wmarks_low +---------- + +空閒內存低水位,千分比。 + +如果空閒內存小於這個值,DAMON_LRU_SORT不再工作,不做任何事,除了週期性的檢查水 +線。默認50(5%)。 + +sample_interval +--------------- + +監測的採樣週期,微秒。 + +DAMON對冷內存監測的採樣週期。更多細節請參考DAMON文檔 (:doc:`usage`) 。默認5 +毫秒。 + +aggr_interval +------------- + +監測的收集週期,微秒。 + +DAMON對冷內存進行收集的時間週期。更多細節請參考DAMON文檔 (:doc:`usage`) 。默認 +100毫秒。 + +min_nr_regions +-------------- + +最小監測區域數量。 + +對冷內存區域監測的最小數量。這個值可以作爲監測質量的下限。不過,這個值設置的過 +大會增加開銷。更多細節請參考DAMON文檔 (:doc:`usage`) 。默認值爲10。 + +max_nr_regions +-------------- + +最大監測區域數量。 + +對冷內存區域監測的最大數量。這個值可以作爲監測質量的上限。然而,這個值設置的過 +低會導致監測結果變差。更多細節請參考DAMON文檔 (:doc:`usage`) 。默認值爲1000。 + +monitor_region_start +-------------------- + +目標內存區域的起始物理地址。 + +DAMON_LRU_SORT要處理的目標內存區域的起始物理地址。默認,使用系統最大內存。 + +monitor_region_end +------------------ + +目標內存區域的結束物理地址。 + +DAMON_LRU_SORT要處理的目標內存區域的結束物理地址。默認,使用系統最大內存。 + +kdamond_pid +----------- + +DAMON線程的PID。 + +如果DAMON_LRU_SORT是使能的,這個表示任務線程的PID。其它情況爲-1。 + +nr_lru_sort_tried_hot_regions +----------------------------- + +被嘗試進行LRU排序的熱內存區域的數量。 + +bytes_lru_sort_tried_hot_regions +-------------------------------- + +被嘗試進行LRU排序的熱內存區域的大小(字節)。 + +nr_lru_sorted_hot_regions +------------------------- + +成功進行LRU排序的熱內存區域的數量。 + +bytes_lru_sorted_hot_regions +---------------------------- + +成功進行LRU排序的熱內存區域的大小(字節)。 + +nr_hot_quota_exceeds +-------------------- + +熱區域時間約束超過限制的次數。 + +nr_lru_sort_tried_cold_regions +------------------------------ + +被嘗試進行LRU排序的冷內存區域的數量。 + +bytes_lru_sort_tried_cold_regions +--------------------------------- + +被嘗試進行LRU排序的冷內存區域的大小(字節)。 + +nr_lru_sorted_cold_regions +-------------------------- + +成功進行LRU排序的冷內存區域的數量。 + +bytes_lru_sorted_cold_regions +----------------------------- + +成功進行LRU排序的冷內存區域的大小(字節)。 + +nr_cold_quota_exceeds +--------------------- + +冷區域時間約束超過限制的次數。 + +Example +======= + +如下是一個運行時的命令示例,使DAMON_LRU_SORT查找訪問頻率超過50%的區域並對其進行 +LRU的優先級的提升,同時降低那些超過120秒無人訪問的內存區域的優先級。優先級的處 +理被限制在最多1%的CPU以避免DAMON_LRU_SORT消費過多CPU時間。在系統空閒內存超過50% +時DAMON_LRU_SORT停止工作,並在低於40%時重新開始工作。如果DAMON_RECLAIM沒有取得 +進展且空閒內存低於20%,再次讓DAMON_LRU_SORT停止工作,以此回退到以LRU鏈表爲基礎 +以頁面爲單位的內存回收上。 :: + + # cd /sys/modules/damon_lru_sort/parameters + # echo 500 > hot_thres_access_freq + # echo 120000000 > cold_min_age + # echo 10 > quota_ms + # echo 1000 > quota_reset_interval_ms + # echo 500 > wmarks_high + # echo 400 > wmarks_mid + # echo 200 > wmarks_low + # echo Y > enabled + diff --git a/Documentation/translations/zh_TW/admin-guide/mm/damon/reclaim.rst b/Documentation/translations/zh_TW/admin-guide/mm/damon/reclaim.rst new file mode 100644 index 0000000000..efed29c40e --- /dev/null +++ b/Documentation/translations/zh_TW/admin-guide/mm/damon/reclaim.rst @@ -0,0 +1,229 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../../../disclaimer-zh_TW.rst + +:Original: Documentation/admin-guide/mm/damon/reclaim.rst + +:翻譯: + + 司延騰 Yanteng Si + +:校譯: + +=============== +基於DAMON的回收 +=============== + +基於DAMON的回收(DAMON_RECLAIM)是一個靜態的內核模塊,旨在用於輕度內存壓力下的主動和輕 +量級的回收。它的目的不是取代基於LRU列表的頁面回收,而是有選擇地用於不同程度的內存壓力和要 +求。 + +哪些地方需要主動回收? +====================== + +在一般的內存超量使用(over-committed systems,虛擬化相關術語)的系統上,主動回收冷頁 +有助於節省內存和減少延遲高峯,這些延遲是由直接回收進程或kswapd的CPU消耗引起的,同時只產 +生最小的性能下降 [1]_ [2]_ 。 + +基於空閒頁報告 [3]_ 的內存過度承諾的虛擬化系統就是很好的例子。在這樣的系統中,客戶機 +向主機報告他們的空閒內存,而主機則將報告的內存重新分配給其他客戶。因此,系統的內存得到了充 +分的利用。然而,客戶可能不那麼節省內存,主要是因爲一些內核子系統和用戶空間應用程序被設計爲 +使用盡可能多的內存。然後,客戶機可能只向主機報告少量的內存是空閒的,導致系統的內存利用率下降。 +在客戶中運行主動回收可以緩解這個問題。 + +它是如何工作的? +================ + +DAMON_RECLAIM找到在特定時間內沒有被訪問的內存區域並分頁。爲了避免它在分頁操作中消耗過多 +的CPU,可以配置一個速度限制。在這個速度限制下,它首先分頁出那些沒有被訪問過的內存區域。系 +統管理員還可以配置在什麼情況下這個方案應該自動激活和停用三個內存壓力水位。 + +接口: 模塊參數 +============== + +要使用這個功能,你首先要確保你的系統運行在一個以 ``CONFIG_DAMON_RECLAIM=y`` 構建的內 +核上。 + +爲了讓系統管理員啓用或禁用它,併爲給定的系統進行調整,DAMON_RECLAIM利用了模塊參數。也就 +是說,你可以把 ``damon_reclaim.=`` 放在內核啓動命令行上,或者把 +適當的值寫入 ``/sys/module/damon_reclaim/parameters/`` 文件。 + +下面是每個參數的描述。 + +enabled +------- + +啓用或禁用DAMON_RECLAIM。 + +你可以通過把這個參數的值設置爲 ``Y`` 來啓用DAMON_RCLAIM,把它設置爲 ``N`` 可以禁用 +DAMON_RECLAIM。注意,由於基於水位的激活條件,DAMON_RECLAIM不能進行真正的監測和回收。 +這一點請參考下面關於水位參數的描述。 + +min_age +------- + +識別冷內存區域的時間閾值,單位是微秒。 + +如果一個內存區域在這個時間或更長的時間內沒有被訪問,DAMON_RECLAIM會將該區域識別爲冷的, +並回收它。 + +默認爲120秒。 + +quota_ms +-------- + +回收的時間限制,以毫秒爲單位。 + +DAMON_RECLAIM 試圖在一個時間窗口(quota_reset_interval_ms)內只使用到這個時間,以 +嘗試回收冷頁。這可以用來限制DAMON_RECLAIM的CPU消耗。如果該值爲零,則該限制被禁用。 + +默認爲10ms。 + +quota_sz +-------- + +回收的內存大小限制,單位爲字節。 + +DAMON_RECLAIM 收取在一個時間窗口(quota_reset_interval_ms)內試圖回收的內存量,並 +使其不超過這個限制。這可以用來限制CPU和IO的消耗。如果該值爲零,則限制被禁用。 + +默認情況下是128 MiB。 + +quota_reset_interval_ms +----------------------- + +時間/大小配額收取重置間隔,單位爲毫秒。 + +時間(quota_ms)和大小(quota_sz)的配額的目標重置間隔。也就是說,DAMON_RECLAIM在 +嘗試回收‘不’超過quota_ms毫秒或quota_sz字節的內存。 + +默認爲1秒。 + +wmarks_interval +--------------- + +當DAMON_RECLAIM被啓用但由於其水位規則而不活躍時,在檢查水位之前的最小等待時間。 + +wmarks_high +----------- + +高水位的可用內存率(每千字節)。 + +如果系統的可用內存(以每千字節爲單位)高於這個數值,DAMON_RECLAIM就會變得不活躍,所以 +它什麼也不做,只是定期檢查水位。 + +wmarks_mid +---------- + +中間水位的可用內存率(每千字節)。 + +如果系統的空閒內存(以每千字節爲單位)在這個和低水位線之間,DAMON_RECLAIM就會被激活, +因此開始監測和回收。 + +wmarks_low +---------- + +低水位的可用內存率(每千字節)。 + +如果系統的空閒內存(以每千字節爲單位)低於這個數值,DAMON_RECLAIM就會變得不活躍,所以 +它除了定期檢查水位外什麼都不做。在這種情況下,系統會退回到基於LRU列表的頁面粒度回收邏輯。 + +sample_interval +--------------- + +監測的採樣間隔,單位是微秒。 + +DAMON用於監測冷內存的採樣間隔。更多細節請參考DAMON文檔 (:doc:`usage`) 。 + +aggr_interval +------------- + +監測的聚集間隔,單位是微秒。 + +DAMON對冷內存監測的聚集間隔。更多細節請參考DAMON文檔 (:doc:`usage`)。 + +min_nr_regions +-------------- + +監測區域的最小數量。 + +DAMON用於冷內存監測的最小監測區域數。這可以用來設置監測質量的下限。但是,設 +置的太高可能會導致監測開銷的增加。更多細節請參考DAMON文檔 (:doc:`usage`) 。 + +max_nr_regions +-------------- + +監測區域的最大數量。 + +DAMON用於冷內存監測的最大監測區域數。這可以用來設置監測開銷的上限值。但是, +設置得太低可能會導致監測質量不好。更多細節請參考DAMON文檔 (:doc:`usage`) 。 + +monitor_region_start +-------------------- + +目標內存區域的物理地址起點。 + +DAMON_RECLAIM將對其進行工作的內存區域的起始物理地址。也就是說,DAMON_RECLAIM +將在這個區域中找到冷的內存區域並進行回收。默認情況下,該區域使用最大系統內存區。 + +monitor_region_end +------------------ + +目標內存區域的結束物理地址。 + +DAMON_RECLAIM將對其進行工作的內存區域的末端物理地址。也就是說,DAMON_RECLAIM將 +在這個區域內找到冷的內存區域並進行回收。默認情況下,該區域使用最大系統內存區。 + +kdamond_pid +----------- + +DAMON線程的PID。 + +如果DAMON_RECLAIM被啓用,這將成爲工作線程的PID。否則,爲-1。 + +nr_reclaim_tried_regions +------------------------ + +試圖通過DAMON_RECLAIM回收的內存區域的數量。 + +bytes_reclaim_tried_regions +--------------------------- + +試圖通過DAMON_RECLAIM回收的內存區域的總字節數。 + +nr_reclaimed_regions +-------------------- + +通過DAMON_RECLAIM成功回收的內存區域的數量。 + +bytes_reclaimed_regions +----------------------- + +通過DAMON_RECLAIM成功回收的內存區域的總字節數。 + +nr_quota_exceeds +---------------- + +超過時間/空間配額限制的次數。 + +例子 +==== + +下面的運行示例命令使DAMON_RECLAIM找到30秒或更長時間沒有訪問的內存區域並“回收”? +爲了避免DAMON_RECLAIM在分頁操作中消耗過多的CPU時間,回收被限制在每秒1GiB以內。 +它還要求DAMON_RECLAIM在系統的可用內存率超過50%時不做任何事情,但如果它低於40%時 +就開始真正的工作。如果DAMON_RECLAIM沒有取得進展,因此空閒內存率低於20%,它會要求 +DAMON_RECLAIM再次什麼都不做,這樣我們就可以退回到基於LRU列表的頁面粒度回收了:: + + # cd /sys/module/damon_reclaim/parameters + # echo 30000000 > min_age + # echo $((1 * 1024 * 1024 * 1024)) > quota_sz + # echo 1000 > quota_reset_interval_ms + # echo 500 > wmarks_high + # echo 400 > wmarks_mid + # echo 200 > wmarks_low + # echo Y > enabled + +.. [1] https://research.google/pubs/pub48551/ +.. [2] https://lwn.net/Articles/787611/ +.. [3] https://www.kernel.org/doc/html/latest/mm/free_page_reporting.html + diff --git a/Documentation/translations/zh_TW/admin-guide/mm/damon/start.rst b/Documentation/translations/zh_TW/admin-guide/mm/damon/start.rst new file mode 100644 index 0000000000..1822956be0 --- /dev/null +++ b/Documentation/translations/zh_TW/admin-guide/mm/damon/start.rst @@ -0,0 +1,125 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../../../disclaimer-zh_TW.rst + +:Original: Documentation/admin-guide/mm/damon/start.rst + +:翻譯: + + 司延騰 Yanteng Si + +:校譯: + +======== +入門指南 +======== + +本文通過演示DAMON的默認用戶空間工具,簡要地介紹瞭如何使用DAMON。請注意,爲了簡潔 +起見,本文檔只描述了它的部分功能。更多細節請參考該工具的使用文檔。 +`doc `_ . + + +前提條件 +======== + +內核 +---- + +首先,你要確保你當前系統中跑的內核構建時選定了這個功能選項 ``CONFIG_DAMON_*=y``. + + +用戶空間工具 +------------ + +在演示中,我們將使用DAMON的默認用戶空間工具,稱爲DAMON Operator(DAMO)。它可以在 +https://github.com/awslabs/damo找到。下面的例子假設DAMO在你的$PATH上。當然,但 +這並不是強制性的。 + +因爲DAMO使用了DAMON的sysfs接口(詳情請參考:doc:`usage`),你應該確保 +:doc:`sysfs ` 被掛載。 + +記錄數據訪問模式 +================ + +下面的命令記錄了一個程序的內存訪問模式,並將監測結果保存到文件中。 :: + + $ git clone https://github.com/sjp38/masim + $ cd masim; make; ./masim ./configs/zigzag.cfg & + $ sudo damo record -o damon.data $(pidof masim) + +命令的前兩行下載了一個人工內存訪問生成器程序並在後臺運行。生成器將重複地逐一訪問兩個 +100 MiB大小的內存區域。你可以用你的真實工作負載來代替它。最後一行要求 ``damo`` 將 +訪問模式記錄在 ``damon.data`` 文件中。 + + +將記錄的模式可視化 +================== + +你可以在heatmap中直觀地看到這種模式,顯示哪個內存區域(X軸)何時被訪問(Y軸)以及訪 +問的頻率(數字)。:: + + $ sudo damo report heats --heatmap stdout + 22222222222222222222222222222222222222211111111111111111111111111111111111111100 + 44444444444444444444444444444444444444434444444444444444444444444444444444443200 + 44444444444444444444444444444444444444433444444444444444444444444444444444444200 + 33333333333333333333333333333333333333344555555555555555555555555555555555555200 + 33333333333333333333333333333333333344444444444444444444444444444444444444444200 + 22222222222222222222222222222222222223355555555555555555555555555555555555555200 + 00000000000000000000000000000000000000288888888888888888888888888888888888888400 + 00000000000000000000000000000000000000288888888888888888888888888888888888888400 + 33333333333333333333333333333333333333355555555555555555555555555555555555555200 + 88888888888888888888888888888888888888600000000000000000000000000000000000000000 + 88888888888888888888888888888888888888600000000000000000000000000000000000000000 + 33333333333333333333333333333333333333444444444444444444444444444444444444443200 + 00000000000000000000000000000000000000288888888888888888888888888888888888888400 + [...] + # access_frequency: 0 1 2 3 4 5 6 7 8 9 + # x-axis: space (139728247021568-139728453431248: 196.848 MiB) + # y-axis: time (15256597248362-15326899978162: 1 m 10.303 s) + # resolution: 80x40 (2.461 MiB and 1.758 s for each character) + +你也可以直觀地看到工作集的大小分佈,按大小排序。:: + + $ sudo damo report wss --range 0 101 10 + # + # target_id 18446632103789443072 + # avr: 107.708 MiB + 0 0 B | | + 10 95.328 MiB |**************************** | + 20 95.332 MiB |**************************** | + 30 95.340 MiB |**************************** | + 40 95.387 MiB |**************************** | + 50 95.387 MiB |**************************** | + 60 95.398 MiB |**************************** | + 70 95.398 MiB |**************************** | + 80 95.504 MiB |**************************** | + 90 190.703 MiB |********************************************************* | + 100 196.875 MiB |***********************************************************| + +在上述命令中使用 ``--sortby`` 選項,可以顯示工作集的大小是如何按時間順序變化的。:: + + $ sudo damo report wss --range 0 101 10 --sortby time + # + # target_id 18446632103789443072 + # avr: 107.708 MiB + 0 3.051 MiB | | + 10 190.703 MiB |***********************************************************| + 20 95.336 MiB |***************************** | + 30 95.328 MiB |***************************** | + 40 95.387 MiB |***************************** | + 50 95.332 MiB |***************************** | + 60 95.320 MiB |***************************** | + 70 95.398 MiB |***************************** | + 80 95.398 MiB |***************************** | + 90 95.340 MiB |***************************** | + 100 95.398 MiB |***************************** | + + +數據訪問模式感知的內存管理 +========================== + +以下三個命令使每一個大小>=4K的內存區域在你的工作負載中沒有被訪問>=60秒,就會被換掉。 :: + + $ echo "#min-size max-size min-acc max-acc min-age max-age action" > test_scheme + $ echo "4K max 0 0 60s max pageout" >> test_scheme + $ damo schemes -c test_scheme + diff --git a/Documentation/translations/zh_TW/admin-guide/mm/damon/usage.rst b/Documentation/translations/zh_TW/admin-guide/mm/damon/usage.rst new file mode 100644 index 0000000000..6dee719a32 --- /dev/null +++ b/Documentation/translations/zh_TW/admin-guide/mm/damon/usage.rst @@ -0,0 +1,592 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../../../disclaimer-zh_TW.rst + +:Original: Documentation/admin-guide/mm/damon/usage.rst + +:翻譯: + + 司延騰 Yanteng Si + +:校譯: + +======== +詳細用法 +======== + +DAMON 爲不同的用戶提供了下面這些接口。 + +- *DAMON用戶空間工具。* + `這 `_ 爲有這特權的人, 如系統管理員,希望有一個剛好 + 可以工作的人性化界面。 + 使用它,用戶可以以人性化的方式使用DAMON的主要功能。不過,它可能不會爲特殊情況進行高度調整。 + 它同時支持虛擬和物理地址空間的監測。更多細節,請參考它的 `使用文檔 + `_。 +- *sysfs接口。* + :ref:`這 ` 是爲那些希望更高級的使用DAMON的特權用戶空間程序員準備的。 + 使用它,用戶可以通過讀取和寫入特殊的sysfs文件來使用DAMON的主要功能。因此,你可以編寫和使 + 用你個性化的DAMON sysfs包裝程序,代替你讀/寫sysfs文件。 `DAMON用戶空間工具 + `_ 就是這種程序的一個例子 它同時支持虛擬和物理地址 + 空間的監測。注意,這個界面只提供簡單的監測結果 :ref:`統計 `。對於詳細的監測 + 結果,DAMON提供了一個:ref:`跟蹤點 `。 +- *debugfs interface.* + :ref:`這 ` 幾乎與:ref:`sysfs interface ` 接 + 口相同。這將在下一個LTS內核發佈後被移除,所以用戶應該轉移到 + :ref:`sysfs interface `。 +- *內核空間編程接口。* + :doc:`這 ` 這是爲內核空間程序員準備的。使用它,用戶可以通過爲你編寫內 + 核空間的DAMON應用程序,最靈活有效地利用DAMON的每一個功能。你甚至可以爲各種地址空間擴展DAMON。 + 詳細情況請參考接口 :doc:`文件 `。 + +sysfs接口 +========= +DAMON的sysfs接口是在定義 ``CONFIG_DAMON_SYSFS`` 時建立的。它在其sysfs目錄下創建多 +個目錄和文件, ``/kernel/mm/damon/`` 。你可以通過對該目錄下的文件進行寫入和 +讀取來控制DAMON。 + +對於一個簡短的例子,用戶可以監測一個給定工作負載的虛擬地址空間,如下所示:: + + # cd /sys/kernel/mm/damon/admin/ + # echo 1 > kdamonds/nr_kdamonds && echo 1 > kdamonds/0/contexts/nr_contexts + # echo vaddr > kdamonds/0/contexts/0/operations + # echo 1 > kdamonds/0/contexts/0/targets/nr_targets + # echo $(pidof ) > kdamonds/0/contexts/0/targets/0/pid_target + # echo on > kdamonds/0/state + +文件層次結構 +------------ + +DAMON sysfs接口的文件層次結構如下圖所示。在下圖中,父子關係用縮進表示,每個目錄有 +``/`` 後綴,每個目錄中的文件用逗號(",")分開。 :: + + /sys/kernel/mm/damon/admin + │ kdamonds/nr_kdamonds + │ │ 0/state,pid + │ │ │ contexts/nr_contexts + │ │ │ │ 0/operations + │ │ │ │ │ monitoring_attrs/ + │ │ │ │ │ │ intervals/sample_us,aggr_us,update_us + │ │ │ │ │ │ nr_regions/min,max + │ │ │ │ │ targets/nr_targets + │ │ │ │ │ │ 0/pid_target + │ │ │ │ │ │ │ regions/nr_regions + │ │ │ │ │ │ │ │ 0/start,end + │ │ │ │ │ │ │ │ ... + │ │ │ │ │ │ ... + │ │ │ │ │ schemes/nr_schemes + │ │ │ │ │ │ 0/action + │ │ │ │ │ │ │ access_pattern/ + │ │ │ │ │ │ │ │ sz/min,max + │ │ │ │ │ │ │ │ nr_accesses/min,max + │ │ │ │ │ │ │ │ age/min,max + │ │ │ │ │ │ │ quotas/ms,bytes,reset_interval_ms + │ │ │ │ │ │ │ │ weights/sz_permil,nr_accesses_permil,age_permil + │ │ │ │ │ │ │ watermarks/metric,interval_us,high,mid,low + │ │ │ │ │ │ │ stats/nr_tried,sz_tried,nr_applied,sz_applied,qt_exceeds + │ │ │ │ │ │ │ tried_regions/ + │ │ │ │ │ │ │ │ 0/start,end,nr_accesses,age + │ │ │ │ │ │ │ │ ... + │ │ │ │ │ │ ... + │ │ │ │ ... + │ │ ... + +根 +-- + +DAMON sysfs接口的根是 ``/kernel/mm/damon/`` ,它有一個名爲 ``admin`` 的 +目錄。該目錄包含特權用戶空間程序控制DAMON的文件。擁有根權限的用戶空間工具或deamons可以 +使用這個目錄。 + +kdamonds/ +--------- + +與監測相關的信息包括請求規格和結果被稱爲DAMON上下文。DAMON用一個叫做kdamond的內核線程 +執行每個上下文,多個kdamonds可以並行運行。 + +在 ``admin`` 目錄下,有一個目錄,即``kdamonds``,它有控制kdamonds的文件存在。在開始 +時,這個目錄只有一個文件,``nr_kdamonds``。向該文件寫入一個數字(``N``),就會創建名爲 +``0`` 到 ``N-1`` 的子目錄數量。每個目錄代表每個kdamond。 + +kdamonds// +------------- + +在每個kdamond目錄中,存在兩個文件(``state`` 和 ``pid`` )和一個目錄( ``contexts`` )。 + +讀取 ``state`` 時,如果kdamond當前正在運行,則返回 ``on`` ,如果沒有運行則返回 ``off`` 。 +寫入 ``on`` 或 ``off`` 使kdamond處於狀態。向 ``state`` 文件寫 ``update_schemes_stats`` , +更新kdamond的每個基於DAMON的操作方案的統計文件的內容。關於統計信息的細節,請參考 +:ref:`stats section `. 將 ``update_schemes_tried_regions`` 寫到 +``state`` 文件,爲kdamond的每個基於DAMON的操作方案,更新基於DAMON的操作方案動作的嘗試區域目錄。 +將`clear_schemes_tried_regions`寫入`state`文件,清除kdamond的每個基於DAMON的操作方案的動作 +嘗試區域目錄。 關於基於DAMON的操作方案動作嘗試區域目錄的細節,請參考:ref:tried_regions 部分 +`。 + +如果狀態爲 ``on``,讀取 ``pid`` 顯示kdamond線程的pid。 + +``contexts`` 目錄包含控制這個kdamond要執行的監測上下文的文件。 + +kdamonds//contexts/ +---------------------- + +在開始時,這個目錄只有一個文件,即 ``nr_contexts`` 。向該文件寫入一個數字( ``N`` ),就會創 +建名爲``0`` 到 ``N-1`` 的子目錄數量。每個目錄代表每個監測背景。目前,每個kdamond只支持 +一個上下文,所以只有 ``0`` 或 ``1`` 可以被寫入文件。 + +contexts// +------------- + +在每個上下文目錄中,存在一個文件(``operations``)和三個目錄(``monitoring_attrs``, +``targets``, 和 ``schemes``)。 + +DAMON支持多種類型的監測操作,包括對虛擬地址空間和物理地址空間的監測。你可以通過向文件 +中寫入以下關鍵詞之一,並從文件中讀取,來設置和獲取DAMON將爲上下文使用何種類型的監測操作。 + + - vaddr: 監測特定進程的虛擬地址空間 + - paddr: 監視系統的物理地址空間 + +contexts//monitoring_attrs/ +------------------------------ + +用於指定監測屬性的文件,包括所需的監測質量和效率,都在 ``monitoring_attrs`` 目錄中。 +具體來說,這個目錄下有兩個目錄,即 ``intervals`` 和 ``nr_regions`` 。 + +在 ``intervals`` 目錄下,存在DAMON的採樣間隔(``sample_us``)、聚集間隔(``aggr_us``) +和更新間隔(``update_us``)三個文件。你可以通過寫入和讀出這些文件來設置和獲取微秒級的值。 + +在 ``nr_regions`` 目錄下,有兩個文件分別用於DAMON監測區域的下限和上限(``min`` 和 ``max`` ), +這兩個文件控制着監測的開銷。你可以通過向這些文件的寫入和讀出來設置和獲取這些值。 + +關於間隔和監測區域範圍的更多細節,請參考設計文件 (:doc:`/mm/damon/design`)。 + +contexts//targets/ +--------------------- + +在開始時,這個目錄只有一個文件 ``nr_targets`` 。向該文件寫入一個數字(``N``),就可以創建 +名爲 ``0`` 到 ``N-1`` 的子目錄的數量。每個目錄代表每個監測目標。 + +targets// +------------ + +在每個目標目錄中,存在一個文件(``pid_target``)和一個目錄(``regions``)。 + +如果你把 ``vaddr`` 寫到 ``contexts//operations`` 中,每個目標應該是一個進程。你 +可以通過將進程的pid寫到 ``pid_target`` 文件中來指定DAMON的進程。 + +targets//regions +------------------- + +當使用 ``vaddr`` 監測操作集時( ``vaddr`` 被寫入 ``contexts//operations`` 文 +件),DAMON自動設置和更新監測目標區域,這樣就可以覆蓋目標進程的整個內存映射。然而,用戶可 +能希望將初始監測區域設置爲特定的地址範圍。 + +相反,當使用 ``paddr`` 監測操作集時,DAMON不會自動設置和更新監測目標區域( ``paddr`` +被寫入 ``contexts//operations`` 中)。因此,在這種情況下,用戶應該自己設置監測目標 +區域。 + +在這種情況下,用戶可以按照自己的意願明確設置初始監測目標區域,將適當的值寫入該目錄下的文件。 + +開始時,這個目錄只有一個文件, ``nr_regions`` 。向該文件寫入一個數字(``N``),就可以創 +建名爲 ``0`` 到 ``N-1`` 的子目錄。每個目錄代表每個初始監測目標區域。 + +regions// +------------ + +在每個區域目錄中,你會發現兩個文件( ``start`` 和 ``end`` )。你可以通過向文件寫入 +和從文件中讀出,分別設置和獲得初始監測目標區域的起始和結束地址。 + +每個區域不應該與其他區域重疊。 目錄“N”的“結束”應等於或小於目錄“N+1”的“開始”。 + +contexts//schemes/ +--------------------- + +對於一版的基於DAMON的數據訪問感知的內存管理優化,用戶通常希望系統對特定訪問模式的內存區 +域應用內存管理操作。DAMON從用戶那裏接收這種形式化的操作方案,並將這些方案應用於目標內存 +區域。用戶可以通過讀取和寫入這個目錄下的文件來獲得和設置這些方案。 + +在開始時,這個目錄只有一個文件,``nr_schemes``。向該文件寫入一個數字(``N``),就可以 +創建名爲``0``到``N-1``的子目錄的數量。每個目錄代表每個基於DAMON的操作方案。 + +schemes// +------------ + +在每個方案目錄中,存在五個目錄(``access_pattern``、``quotas``、``watermarks``、 +``stats`` 和 ``tried_regions``)和一個文件(``action``)。 + +``action`` 文件用於設置和獲取你想應用於具有特定訪問模式的內存區域的動作。可以寫入文件 +和從文件中讀取的關鍵詞及其含義如下。 + + - ``willneed``: 對有 ``MADV_WILLNEED`` 的區域調用 ``madvise()`` 。 + - ``cold``: 對具有 ``MADV_COLD`` 的區域調用 ``madvise()`` 。 + - ``pageout``: 爲具有 ``MADV_PAGEOUT`` 的區域調用 ``madvise()`` 。 + - ``hugepage``: 爲帶有 ``MADV_HUGEPAGE`` 的區域調用 ``madvise()`` 。 + - ``nohugepage``: 爲帶有 ``MADV_NOHUGEPAGE`` 的區域調用 ``madvise()``。 + - ``lru_prio``: 在其LRU列表上對區域進行優先排序。 + - ``lru_deprio``: 對區域的LRU列表進行降低優先處理。 + - ``stat``: 什麼都不做,只計算統計數據 + +schemes//access_pattern/ +--------------------------- + +每個基於DAMON的操作方案的目標訪問模式由三個範圍構成,包括以字節爲單位的區域大小、每個 +聚合區間的監測訪問次數和區域年齡的聚合區間數。 + +在 ``access_pattern`` 目錄下,存在三個目錄( ``sz``, ``nr_accesses``, 和 ``age`` ), +每個目錄有兩個文件(``min`` 和 ``max`` )。你可以通過向 ``sz``, ``nr_accesses``, 和 +``age`` 目錄下的 ``min`` 和 ``max`` 文件分別寫入和讀取來設置和獲取給定方案的訪問模式。 + +schemes//quotas/ +------------------- + +每個 ``動作`` 的最佳 ``目標訪問模式`` 取決於工作負載,所以不容易找到。更糟糕的是,將某些動作 +的方案設置得過於激進會造成嚴重的開銷。爲了避免這種開銷,用戶可以爲每個方案限制時間和大小配額。 +具體來說,用戶可以要求DAMON儘量只使用特定的時間(``時間配額``)來應用動作,並且在給定的時間間 +隔(``重置間隔``)內,只對具有目標訪問模式的內存區域應用動作,而不使用特定數量(``大小配額``)。 + +當預計超過配額限制時,DAMON會根據 ``目標訪問模式`` 的大小、訪問頻率和年齡,對找到的內存區域 +進行優先排序。爲了進行個性化的優先排序,用戶可以爲這三個屬性設置權重。 + +在 ``quotas`` 目錄下,存在三個文件(``ms``, ``bytes``, ``reset_interval_ms``)和一個 +目錄(``weights``),其中有三個文件(``sz_permil``, ``nr_accesses_permil``, 和 +``age_permil``)。 + +你可以設置以毫秒爲單位的 ``時間配額`` ,以字節爲單位的 ``大小配額`` ,以及以毫秒爲單位的 ``重 +置間隔`` ,分別向這三個文件寫入數值。你還可以通過向 ``weights`` 目錄下的三個文件寫入數值來設 +置大小、訪問頻率和年齡的優先權,單位爲千分之一。 + +schemes//watermarks/ +----------------------- + +爲了便於根據系統狀態激活和停用每個方案,DAMON提供了一個稱爲水位的功能。該功能接收五個值,稱爲 +``度量`` 、``間隔`` 、``高`` 、``中`` 、``低`` 。``度量值`` 是指可以測量的系統度量值,如 +自由內存比率。如果系統的度量值 ``高`` 於memoent的高值或 ``低`` 於低值,則該方案被停用。如果 +該值低於 ``中`` ,則該方案被激活。 + +在水位目錄下,存在五個文件(``metric``, ``interval_us``,``high``, ``mid``, and ``low``) +用於設置每個值。你可以通過向這些文件的寫入來分別設置和獲取這五個值。 + +可以寫入 ``metric`` 文件的關鍵詞和含義如下。 + + - none: 忽略水位 + - free_mem_rate: 系統的自由內存率(千分比)。 + +``interval`` 應以微秒爲單位寫入。 + +schemes//stats/ +------------------ + +DAMON統計每個方案被嘗試應用的區域的總數量和字節數,每個方案被成功應用的區域的兩個數字,以及 +超過配額限制的總數量。這些統計數據可用於在線分析或調整方案。 + +可以通過讀取 ``stats`` 目錄下的文件(``nr_tried``, ``sz_tried``, ``nr_applied``, +``sz_applied``, 和 ``qt_exceeds``))分別檢索這些統計數據。這些文件不是實時更新的,所以 +你應該要求DAMON sysfs接口通過在相關的 ``kdamonds//state`` 文件中寫入一個特殊的關鍵字 +``update_schemes_stats`` 來更新統計信息的文件內容。 + +schemes//tried_regions/ +-------------------------- + +當一個特殊的關鍵字 ``update_schemes_tried_regions`` 被寫入相關的 ``kdamonds//state`` +文件時,DAMON會在這個目錄下創建從 ``0`` 開始命名的整數目錄。每個目錄包含的文件暴露了關於每個 +內存區域的詳細信息,在下一個 :ref:`聚集區間 `,相應的方案的 ``動作`` +已經嘗試在這個目錄下應用。這些信息包括地址範圍、``nr_accesses`` 以及區域的 ``年齡`` 。 + +當另一個特殊的關鍵字 ``clear_schemes_tried_regions`` 被寫入相關的 ``kdamonds//state`` +文件時,這些目錄將被刪除。 + +tried_regions// +------------------ + +在每個區域目錄中,你會發現四個文件(``start``, ``end``, ``nr_accesses``, and ``age``)。 +讀取這些文件將顯示相應的基於DAMON的操作方案 ``動作`` 試圖應用的區域的開始和結束地址、``nr_accesses`` +和 ``年齡`` 。 + +用例 +~~~~ + +下面的命令應用了一個方案:”如果一個大小爲[4KiB, 8KiB]的內存區域在[10, 20]的聚合時間間隔內 +顯示出每一個聚合時間間隔[0, 5]的訪問量,請分頁該區域。對於分頁,每秒最多隻能使用10ms,而且每 +秒分頁不能超過1GiB。在這一限制下,首先分頁出具有較長年齡的內存區域。另外,每5秒鐘檢查一次系統 +的可用內存率,當可用內存率低於50%時開始監測和分頁,但如果可用內存率大於60%,或低於30%,則停 +止監測。“ :: + + # cd /kernel/mm/damon/admin + # # populate directories + # echo 1 > kdamonds/nr_kdamonds; echo 1 > kdamonds/0/contexts/nr_contexts; + # echo 1 > kdamonds/0/contexts/0/schemes/nr_schemes + # cd kdamonds/0/contexts/0/schemes/0 + # # set the basic access pattern and the action + # echo 4096 > access_pattern/sz/min + # echo 8192 > access_pattern/sz/max + # echo 0 > access_pattern/nr_accesses/min + # echo 5 > access_pattern/nr_accesses/max + # echo 10 > access_pattern/age/min + # echo 20 > access_pattern/age/max + # echo pageout > action + # # set quotas + # echo 10 > quotas/ms + # echo $((1024*1024*1024)) > quotas/bytes + # echo 1000 > quotas/reset_interval_ms + # # set watermark + # echo free_mem_rate > watermarks/metric + # echo 5000000 > watermarks/interval_us + # echo 600 > watermarks/high + # echo 500 > watermarks/mid + # echo 300 > watermarks/low + +請注意,我們強烈建議使用用戶空間的工具,如 `damo `_ , +而不是像上面那樣手動讀寫文件。以上只是一個例子。 + +debugfs接口 +=========== + +.. note:: + + DAMON debugfs接口將在下一個LTS內核發佈後被移除,所以用戶應該轉移到 + :ref:`sysfs接口`。 + +DAMON導出了八個文件, ``attrs``, ``target_ids``, ``init_regions``, +``schemes``, ``monitor_on``, ``kdamond_pid``, ``mk_contexts`` 和 +``rm_contexts`` under its debugfs directory, ``/damon/``. + + +屬性 +---- + +用戶可以通過讀取和寫入 ``attrs`` 文件獲得和設置 ``採樣間隔`` 、 ``聚集間隔`` 、 ``更新間隔`` +以及監測目標區域的最小/最大數量。要詳細瞭解監測屬性,請參考 `:doc:/mm/damon/design` 。例如, +下面的命令將這些值設置爲5ms、100ms、1000ms、10和1000,然後再次檢查:: + + # cd /damon + # echo 5000 100000 1000000 10 1000 > attrs + # cat attrs + 5000 100000 1000000 10 1000 + + +目標ID +------ + +一些類型的地址空間支持多個監測目標。例如,虛擬內存地址空間的監測可以有多個進程作爲監測目標。用戶 +可以通過寫入目標的相關id值來設置目標,並通過讀取 ``target_ids`` 文件來獲得當前目標的id。在監 +測虛擬地址空間的情況下,這些值應該是監測目標進程的pid。例如,下面的命令將pid爲42和4242的進程設 +爲監測目標,並再次檢查:: + + # cd /damon + # echo 42 4242 > target_ids + # cat target_ids + 42 4242 + +用戶還可以通過在文件中寫入一個特殊的關鍵字 "paddr\n" 來監測系統的物理內存地址空間。因爲物理地 +址空間監測不支持多個目標,讀取文件會顯示一個假值,即 ``42`` ,如下圖所示:: + + # cd /damon + # echo paddr > target_ids + # cat target_ids + 42 + +請注意,設置目標ID並不啓動監測。 + + +初始監測目標區域 +---------------- + +在虛擬地址空間監測的情況下,DAMON自動設置和更新監測的目標區域,這樣就可以覆蓋目標進程的整個 +內存映射。然而,用戶可能希望將監測區域限制在特定的地址範圍內,如堆、棧或特定的文件映射區域。 +或者,一些用戶可以知道他們工作負載的初始訪問模式,因此希望爲“自適應區域調整”設置最佳初始區域。 + +相比之下,DAMON在物理內存監測的情況下不會自動設置和更新監測目標區域。因此,用戶應該自己設置 +監測目標區域。 + +在這種情況下,用戶可以通過在 ``init_regions`` 文件中寫入適當的值,明確地設置他們想要的初 +始監測目標區域。輸入應該是一個由三個整數組成的隊列,用空格隔開,代表一個區域的形式如下:: + + + +目標idx應該是 ``target_ids`` 文件中目標的索引,從 ``0`` 開始,區域應該按照地址順序傳遞。 +例如,下面的命令將設置幾個地址範圍, ``1-100`` 和 ``100-200`` 作爲pid 42的初始監測目標 +區域,這是 ``target_ids`` 中的第一個(索引 ``0`` ),另外幾個地址範圍, ``20-40`` 和 +``50-100`` 作爲pid 4242的地址,這是 ``target_ids`` 中的第二個(索引 ``1`` ):: + + # cd /damon + # cat target_ids + 42 4242 + # echo "0 1 100 \ + 0 100 200 \ + 1 20 40 \ + 1 50 100" > init_regions + +請注意,這只是設置了初始的監測目標區域。在虛擬內存監測的情況下,DAMON會在一個 ``更新間隔`` +後自動更新區域的邊界。因此,在這種情況下,如果用戶不希望更新的話,應該把 ``更新間隔`` 設 +置得足夠大。 + + +方案 +---- + +對於通常的基於DAMON的數據訪問感知的內存管理優化,用戶只是希望系統對特定訪問模式的內存區域應用內 +存管理操作。DAMON從用戶那裏接收這種形式化的操作方案,並將這些方案應用到目標進程中。 + +用戶可以通過讀取和寫入 ``scheme`` debugfs文件來獲得和設置這些方案。讀取該文件還可以顯示每個 +方案的統計數據。在文件中,每一個方案都應該在每一行中以下列形式表示出來:: + + + +你可以通過簡單地在文件中寫入一個空字符串來禁用方案。 + +目標訪問模式 +~~~~~~~~~~~~ + +``<目標訪問模式>`` 是由三個範圍構成的,形式如下:: + + min-size max-size min-acc max-acc min-age max-age + +具體來說,區域大小的字節數( `min-size` 和 `max-size` ),訪問頻率的每聚合區間的監測訪問次 +數( `min-acc` 和 `max-acc` ),區域年齡的聚合區間數( `min-age` 和 `max-age` )都被指定。 +請注意,這些範圍是封閉區間。 + +動作 +~~~~ + +```` 是一個預定義的內存管理動作的整數,DAMON將應用於具有目標訪問模式的區域。支持 +的數字和它們的含義如下:: + + - 0: Call ``madvise()`` for the region with ``MADV_WILLNEED`` + - 1: Call ``madvise()`` for the region with ``MADV_COLD`` + - 2: Call ``madvise()`` for the region with ``MADV_PAGEOUT`` + - 3: Call ``madvise()`` for the region with ``MADV_HUGEPAGE`` + - 4: Call ``madvise()`` for the region with ``MADV_NOHUGEPAGE`` + - 5: Do nothing but count the statistics + +配額 +~~~~ + +每個 ``動作`` 的最佳 ``目標訪問模式`` 取決於工作負載,所以不容易找到。更糟糕的是,將某個 +動作的方案設置得過於激進會導致嚴重的開銷。爲了避免這種開銷,用戶可以通過下面表格中的 ```` +來限制方案的時間和大小配額:: + + + +這使得DAMON在 ```` 毫秒內,儘量只用 ```` 毫秒的時間對 ``目標訪 +問模式`` 的內存區域應用動作,並在 ```` 內只對最多字節的內存區域應 +用動作。將 ```` 和 ```` 都設置爲零,可以禁用配額限制。 + +當預計超過配額限制時,DAMON會根據 ``目標訪問模式`` 的大小、訪問頻率和年齡,對發現的內存 +區域進行優先排序。爲了實現個性化的優先級,用戶可以在 ``<優先級權重>`` 中設置這三個屬性的 +權重,具體形式如下:: + + + +水位 +~~~~ + +有些方案需要根據系統特定指標的當前值來運行,如自由內存比率。對於這種情況,用戶可以爲該條 +件指定水位。:: + + + +```` 是一個預定義的整數,用於要檢查的度量。支持的數字和它們的含義如下。 + + - 0: 忽視水位 + - 1: 系統空閒內存率 (千分比) + +每隔 ``<檢查間隔>`` 微秒檢查一次公制的值。 + +如果該值高於 ``<高標>`` 或低於 ``<低標>`` ,該方案被停用。如果該值低於 ``<中標>`` , +該方案將被激活。 + +統計數據 +~~~~~~~~ + +它還統計每個方案被嘗試應用的區域的總數量和字節數,每個方案被成功應用的區域的兩個數量,以 +及超過配額限制的總數量。這些統計數據可用於在線分析或調整方案。 + +統計數據可以通過讀取方案文件來顯示。讀取該文件將顯示你在每一行中輸入的每個 ``方案`` , +統計的五個數字將被加在每一行的末尾。 + +例子 +~~~~ + +下面的命令應用了一個方案:”如果一個大小爲[4KiB, 8KiB]的內存區域在[10, 20]的聚合時間 +間隔內顯示出每一個聚合時間間隔[0, 5]的訪問量,請分頁出該區域。對於分頁,每秒最多隻能使 +用10ms,而且每秒分頁不能超過1GiB。在這一限制下,首先分頁出具有較長年齡的內存區域。另外, +每5秒鐘檢查一次系統的可用內存率,當可用內存率低於50%時開始監測和分頁,但如果可用內存率 +大於60%,或低於30%,則停止監測“:: + + # cd /damon + # scheme="4096 8192 0 5 10 20 2" # target access pattern and action + # scheme+=" 10 $((1024*1024*1024)) 1000" # quotas + # scheme+=" 0 0 100" # prioritization weights + # scheme+=" 1 5000000 600 500 300" # watermarks + # echo "$scheme" > schemes + + +開關 +---- + +除非你明確地啓動監測,否則如上所述的文件設置不會產生效果。你可以通過寫入和讀取 ``monitor_on`` +文件來啓動、停止和檢查監測的當前狀態。寫入 ``on`` 該文件可以啓動對有屬性的目標的監測。寫入 +``off`` 該文件則停止這些目標。如果每個目標進程被終止,DAMON也會停止。下面的示例命令開啓、關 +閉和檢查DAMON的狀態:: + + # cd /damon + # echo on > monitor_on + # echo off > monitor_on + # cat monitor_on + off + +請注意,當監測開啓時,你不能寫到上述的debugfs文件。如果你在DAMON運行時寫到這些文件,將會返 +回一個錯誤代碼,如 ``-EBUSY`` 。 + + +監測線程PID +----------- + +DAMON通過一個叫做kdamond的內核線程來進行請求監測。你可以通過讀取 ``kdamond_pid`` 文件獲 +得該線程的 ``pid`` 。當監測被 ``關閉`` 時,讀取該文件不會返回任何信息:: + + # cd /damon + # cat monitor_on + off + # cat kdamond_pid + none + # echo on > monitor_on + # cat kdamond_pid + 18594 + + +使用多個監測線程 +---------------- + +每個監測上下文都會創建一個 ``kdamond`` 線程。你可以使用 ``mk_contexts`` 和 ``rm_contexts`` +文件爲多個 ``kdamond`` 需要的用例創建和刪除監測上下文。 + +將新上下文的名稱寫入 ``mk_contexts`` 文件,在 ``DAMON debugfs`` 目錄上創建一個該名稱的目錄。 +該目錄將有該上下文的 ``DAMON debugfs`` 文件:: + + # cd /damon + # ls foo + # ls: cannot access 'foo': No such file or directory + # echo foo > mk_contexts + # ls foo + # attrs init_regions kdamond_pid schemes target_ids + +如果不再需要上下文,你可以通過把上下文的名字放到 ``rm_contexts`` 文件中來刪除它和相應的目錄:: + + # echo foo > rm_contexts + # ls foo + # ls: cannot access 'foo': No such file or directory + +注意, ``mk_contexts`` 、 ``rm_contexts`` 和 ``monitor_on`` 文件只在根目錄下。 + + +監測結果的監測點 +================ + +DAMON通過一個tracepoint ``damon:damon_aggregated`` 提供監測結果. 當監測開啓時,你可 +以記錄追蹤點事件,並使用追蹤點支持工具如perf顯示結果。比如說:: + + # echo on > monitor_on + # perf record -e damon:damon_aggregated & + # sleep 5 + # kill 9 $(pidof perf) + # echo off > monitor_on + # perf script + diff --git a/Documentation/translations/zh_TW/admin-guide/mm/index.rst b/Documentation/translations/zh_TW/admin-guide/mm/index.rst new file mode 100644 index 0000000000..0b04d925b6 --- /dev/null +++ b/Documentation/translations/zh_TW/admin-guide/mm/index.rst @@ -0,0 +1,50 @@ +.. include:: ../../disclaimer-zh_TW.rst + +:Original: Documentation/admin-guide/mm/index.rst + +:翻譯: + + 徐鑫 xu xin + + +======== +內存管理 +======== + +Linux內存管理子系統,顧名思義,是負責系統中的內存管理。它包括了虛擬內存與請求 +分頁的實現,內核內部結構和用戶空間程序的內存分配、將文件映射到進程地址空間以 +及許多其他很酷的事情。 + +Linux內存管理是一個具有許多可配置設置的複雜系統, 且這些設置中的大多數都可以通 +過 ``/proc`` 文件系統獲得,並且可以使用 ``sysctl`` 進行查詢和調整。這些API接 +口被描述在Documentation/admin-guide/sysctl/vm.rst文件和 `man 5 proc`_ 中。 + +.. _man 5 proc: http://man7.org/linux/man-pages/man5/proc.5.html + +Linux內存管理有它自己的術語,如果你還不熟悉它,請考慮閱讀下面參考: +Documentation/admin-guide/mm/concepts.rst. + +在此目錄下,我們詳細描述瞭如何與Linux內存管理中的各種機制交互。 + +.. toctree:: + :maxdepth: 1 + + damon/index + ksm + +Todolist: +* concepts +* cma_debugfs +* hugetlbpage +* idle_page_tracking +* memory-hotplug +* nommu-mmap +* numa_memory_policy +* numaperf +* pagemap +* soft-dirty +* swap_numa +* transhuge +* userfaultfd +* zswap + diff --git a/Documentation/translations/zh_TW/admin-guide/mm/ksm.rst b/Documentation/translations/zh_TW/admin-guide/mm/ksm.rst new file mode 100644 index 0000000000..1b4944b3cf --- /dev/null +++ b/Documentation/translations/zh_TW/admin-guide/mm/ksm.rst @@ -0,0 +1,199 @@ +.. include:: ../../disclaimer-zh_TW.rst + +:Original: Documentation/admin-guide/mm/ksm.rst + +:翻譯: + + 徐鑫 xu xin + + +============ +內核同頁合併 +============ + + +概述 +==== + +KSM是一種能節省內存的數據去重功能,由CONFIG_KSM=y啓用,並在2.6.32版本時被添 +加到Linux內核。詳見 ``mm/ksm.c`` 的實現,以及http://lwn.net/Articles/306704 +和https://lwn.net/Articles/330589 + +KSM最初目的是爲了與KVM(即著名的內核共享內存)一起使用而開發的,通過共享虛擬機 +之間的公共數據,將更多虛擬機放入物理內存。但它對於任何會生成多個相同數據實例的 +應用程序都是很有用的。 + +KSM的守護進程ksmd會定期掃描那些已註冊的用戶內存區域,查找內容相同的頁面,這些 +頁面可以被單個寫保護頁面替換(如果進程以後想要更新其內容,將自動複製)。使用: +引用:`sysfs intraface ` 接口來配置KSM守護程序在單個過程中所掃描的頁 +數以及兩個過程之間的間隔時間。 + +KSM只合並匿名(私有)頁面,從不合並頁緩存(文件)頁面。KSM的合併頁面最初只能被 +鎖定在內核內存中,但現在可以就像其他用戶頁面一樣被換出(但當它們被交換回來時共 +享會被破壞: ksmd必須重新發現它們的身份並再次合併)。 + +以madvise控制KSM +================ + +KSM僅在特定的地址空間區域時運行,即應用程序通過使用如下所示的madvise(2)系統調 +用來請求某塊地址成爲可能的合併候選者的地址空間:: + + int madvise(addr, length, MADV_MERGEABLE) + +應用程序當然也可以通過調用:: + + int madvise(addr, length, MADV_UNMERGEABLE) + +來取消該請求,並恢復爲非共享頁面:此時KSM將去除合併在該範圍內的任何合併頁。注意: +這個去除合併的調用可能突然需要的內存量超過實際可用的內存量-那麼可能會出現EAGAIN +失敗,但更可能會喚醒OOM killer。 + +如果KSM未被配置到正在運行的內核中,則madvise MADV_MERGEABLE 和 MADV_UNMERGEABLE +的調用只會以EINVAL 失敗。如果正在運行的內核是用CONFIG_KSM=y方式構建的,那麼這些 +調用通常會成功:即使KSM守護程序當前沒有運行,MADV_MERGEABLE 仍然會在KSM守護程序 +啓動時註冊範圍,即使該範圍不能包含KSM實際可以合併的任何頁面,即使MADV_UNMERGEABLE +應用於從未標記爲MADV_MERGEABLE的範圍。 + +如果一塊內存區域必須被拆分爲至少一個新的MADV_MERGEABLE區域或MADV_UNMERGEABLE區域, +當該進程將超過 ``vm.max_map_count`` 的設定,則madvise可能返回ENOMEM。(請參閱文檔 +Documentation/admin-guide/sysctl/vm.rst)。 + +與其他madvise調用一樣,它們在用戶地址空間的映射區域上使用:如果指定的範圍包含未 +映射的間隙(儘管在中間的映射區域工作),它們將報告ENOMEM,如果沒有足夠的內存用於 +內部結構,則可能會因EAGAIN而失敗。 + +KSM守護進程sysfs接口 +==================== + +KSM守護進程可以由``/sys/kernel/mm/ksm/`` 中的sysfs文件控制,所有人都可以讀取,但 +只能由root用戶寫入。各接口解釋如下: + + +pages_to_scan + ksmd進程進入睡眠前要掃描的頁數。 + 例如, ``echo 100 > /sys/kernel/mm/ksm/pages_to_scan`` + + 默認值:100(該值被選擇用於演示目的) + +sleep_millisecs + ksmd在下次掃描前應休眠多少毫秒 + 例如, ``echo 20 > /sys/kernel/mm/ksm/sleep_millisecs`` + + 默認值:20(該值被選擇用於演示目的) + +merge_across_nodes + 指定是否可以合併來自不同NUMA節點的頁面。當設置爲0時,ksm僅合併在物理上位 + 於同一NUMA節點的內存區域中的頁面。這降低了訪問共享頁面的延遲。在有明顯的 + NUMA距離上,具有更多節點的系統可能受益於設置該值爲0時的更低延遲。而對於 + 需要對內存使用量最小化的較小系統來說,設置該值爲1(默認設置)則可能會受 + 益於更大共享頁面。在決定使用哪種設置之前,您可能希望比較系統在每種設置下 + 的性能。 ``merge_across_nodes`` 僅當系統中沒有ksm共享頁面時,才能被更改設 + 置:首先將接口`run` 設置爲2從而對頁進行去合併,然後在修改 + ``merge_across_nodes`` 後再將‘run’又設置爲1,以根據新設置來重新合併。 + + 默認值:1(如早期的發佈版本一樣合併跨站點) + +run + * 設置爲0可停止ksmd運行,但保留合併頁面, + * 設置爲1可運行ksmd,例如, ``echo 1 > /sys/kernel/mm/ksm/run`` , + * 設置爲2可停止ksmd運行,並且對所有目前已合併的頁進行去合併,但保留可合併 + 區域以供下次運行。 + + 默認值:0(必須設置爲1才能激活KSM,除非禁用了CONFIG_SYSFS) + +use_zero_pages + 指定是否應當特殊處理空頁(即那些僅含zero的已分配頁)。當該值設置爲1時, + 空頁與內核零頁合併,而不是像通常情況下那樣空頁自身彼此合併。這可以根據 + 工作負載的不同,在具有着色零頁的架構上可以提高性能。啓用此設置時應小心, + 因爲它可能會降低某些工作負載的KSM性能,比如,當待合併的候選頁面的校驗和 + 與空頁面的校驗和恰好匹配的時候。此設置可隨時更改,僅對那些更改後再合併 + 的頁面有效。 + + 默認值:0(如同早期版本的KSM正常表現) + +max_page_sharing + 單個KSM頁面允許的最大共享站點數。這將強制執行重複數據消除限制,以避免涉 + 及遍歷共享KSM頁面的虛擬映射的虛擬內存操作的高延遲。最小值爲2,因爲新創 + 建的KSM頁面將至少有兩個共享者。該值越高,KSM合併內存的速度越快,去重 + 因子也越高,但是對於任何給定的KSM頁面,虛擬映射的最壞情況遍歷的速度也會 + 越慢。減慢了這種遍歷速度就意味着在交換、壓縮、NUMA平衡和頁面遷移期間, + 某些虛擬內存操作將有更高的延遲,從而降低這些虛擬內存操作調用者的響應能力。 + 其他任務如果不涉及執行虛擬映射遍歷的VM操作,其任務調度延遲不受此參數的影 + 響,因爲這些遍歷本身是調度友好的。 + +stable_node_chains_prune_millisecs + 指定KSM檢查特定頁面的元數據的頻率(即那些達到過時信息數據去重限制標準的 + 頁面)單位是毫秒。較小的毫秒值將以更低的延遲來釋放KSM元數據,但它們將使 + ksmd在掃描期間使用更多CPU。如果還沒有一個KSM頁面達到 ``max_page_sharing`` + 標準,那就沒有什麼用。 + +KSM與MADV_MERGEABLE的工作有效性體現於 ``/sys/kernel/mm/ksm/`` 路徑下的接口: + +pages_shared + 表示多少共享頁正在被使用 +pages_sharing + 表示還有多少站點正在共享這些共享頁,即節省了多少 +pages_unshared + 表示有多少頁是唯一的,但被反覆檢查以進行合併 +pages_volatile + 表示有多少頁因變化太快而無法放在tree中 +full_scans + 表示所有可合併區域已掃描多少次 +stable_node_chains + 達到 ``max_page_sharing`` 限制的KSM頁數 +stable_node_dups + 重複的KSM頁數 + +比值 ``pages_sharing/pages_shared`` 的最大值受限制於 ``max_page_sharing`` +的設定。要想增加該比值,則相應地要增加 ``max_page_sharing`` 的值。 + +監測KSM的收益 +============= + +KSM可以通過合併相同的頁面來節省內存,但也會消耗額外的內存,因爲它需要生成一些rmap_items +來保存每個掃描頁面的簡要rmap信息。其中有些頁面可能會被合併,但有些頁面在被檢查幾次 +後可能無法被合併,這些都是無益的內存消耗。 + +1) 如何確定KSM在全系統範圍內是節省內存還是消耗內存?這裏有一個簡單的近似計算方法供參考:: + + general_profit =~ pages_sharing * sizeof(page) - (all_rmap_items) * + sizeof(rmap_item); + + 其中all_rmap_items可以通過對 ``pages_sharing`` 、 ``pages_shared`` 、 ``pages_unshared`` + 和 ``pages_volatile`` 的求和而輕鬆獲得。 + +2) 單一進程中KSM的收益也可以通過以下近似的計算得到:: + + process_profit =~ ksm_merging_pages * sizeof(page) - + ksm_rmap_items * sizeof(rmap_item). + + 其中ksm_merging_pages顯示在 ``/proc//`` 目錄下,而ksm_rmap_items + 顯示在 ``/proc//ksm_stat`` 。 + +從應用的角度來看, ``ksm_rmap_items`` 和 ``ksm_merging_pages`` 的高比例意 +味着不好的madvise-applied策略,所以開發者或管理員必須重新考慮如何改變madvis策 +略。舉個例子供參考,一個頁面的大小通常是4K,而rmap_item的大小在32位CPU架構上分 +別是32B,在64位CPU架構上是64B。所以如果 ``ksm_rmap_items/ksm_merging_pages`` +的比例在64位CPU上超過64,或者在32位CPU上超過128,那麼應用程序的madvise策略應 +該被放棄,因爲ksm收益大約爲零或負值。 + +監控KSM事件 +=========== + +在/proc/vmstat中有一些計數器,可以用來監控KSM事件。KSM可能有助於節省內存,這是 +一種權衡,因爲它可能會在KSM COW或複製中的交換上遭受延遲。這些事件可以幫助用戶評估 +是否或如何使用KSM。例如,如果cow_ksm增加得太快,用戶可以減少madvise(, , MADV_MERGEABLE) +的範圍。 + +cow_ksm + 在每次KSM頁面觸發寫時拷貝(COW)時都會被遞增,當用戶試圖寫入KSM頁面時, + 我們必須做一個拷貝。 + +ksm_swpin_copy + 在換入時,每次KSM頁被複制時都會被遞增。請注意,KSM頁在換入時可能會被複 + 制,因爲do_swap_page()不能做所有的鎖,而需要重組一個跨anon_vma的KSM頁。 + +-- +Izik Eidus, +Hugh Dickins, 2009年11月17日。 + diff --git a/Documentation/translations/zh_TW/admin-guide/reporting-issues.rst b/Documentation/translations/zh_TW/admin-guide/reporting-issues.rst index ea51342879..fe5a5a07d5 100644 --- a/Documentation/translations/zh_TW/admin-guide/reporting-issues.rst +++ b/Documentation/translations/zh_TW/admin-guide/reporting-issues.rst @@ -1,13 +1,6 @@ .. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0) -.. - If you want to distribute this text under CC-BY-4.0 only, please use 'The - Linux kernel developers' for author attribution and link this as source: - https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/reporting-issues.rst -.. - Note: Only the content of this RST file as found in the Linux kernel sources - is available under CC-BY-4.0, as versions of this text that were processed - (for example by the kernel's build system) might contain content taken from - files which use a more restrictive license. +.. See the bottom of this file for additional redistribution information. + .. include:: ../disclaimer-zh_TW.rst @@ -26,14 +19,16 @@ 簡明指南(亦即 太長不看) ========================== -您面臨的是否爲同系列穩定版或長期支持內核的普通內核的回歸?是否仍然受支持? +您面臨的是否爲同系列穩定版或長期支持內核的普通內核的迴歸?是否仍然受支持? 請搜索 `LKML內核郵件列表 `_ 和 `Linux穩定版郵件列表 `_ 存檔中匹配的報告並 加入討論。如果找不到匹配的報告,請安裝該系列的最新版本。如果它仍然出現問題, -報告給穩定版郵件列表(stable@vger.kernel.org)。 +請報告給穩定版郵件列表(stable@vger.kernel.org)並抄送回歸郵件列表 +(regressions@lists.linux.dev);理想情況下,還可以抄送維護者和相關子系統的 +郵件列表。 在所有其他情況下,請儘可能猜測是哪個內核部分導致了問題。查看MAINTAINERS文件, -了解開發人員希望如何得知問題,大多數情況下,報告問題都是通過電子郵件和抄送 +瞭解開發人員希望如何得知問題,大多數情況下,報告問題都是通過電子郵件和抄送 相關郵件列表進行的。檢查報告目的地的存檔中是否已有匹配的報告;也請搜索 `LKML `_ 和網絡。如果找不到可加入的討論,請 安裝 `最新的主線內核 `_ 。如果仍存在問題,請發送報告。 @@ -45,21 +40,22 @@ **通用提醒** :當安裝和測試上述內核時,請確保它是普通的(即:沒有補丁,也沒 有使用附加模塊)。還要確保它是在一個正常的環境中構建和運行,並且在問題發生 -之前沒有被汙染(tainted)。 +之前沒有被污染(tainted)。 -在編寫報告時,要涵蓋與問題相關的所有信息,如使用的內核和發行版。在碰見回歸時, -嘗試給出引入它的更改的提交ID,二分可以找到它。如果您同時面臨Linux內核的多個 -問題,請分別報告每個問題。 +當你同時面臨Linux內核的多個問題時,請分別報告。在編寫報告時,要涵蓋與問題 +相關的所有信息,如使用的內核和發行版。如果碰見迴歸,請把報告抄送回歸郵件列表 +(regressions@lists.linux.dev)。也請試試用二分法找出源頭;如果成功找到,請 +在報告中寫上它的提交ID並抄送sign-off-by鏈中的所有人。 一旦報告發出,請回答任何出現的問題,並儘可能地提供幫助。這包括通過不時重新 -測試新版本並發送狀態更新來推動進展。 +測試新版本併發送狀態更新來推動進展。 如何向內核維護人員報告問題的逐步指南 ===================================== -上面的簡明指南概述了如何向Linux內核開發人員報告問題。對於已經熟悉向自由和開 -源軟體(FLOSS)項目報告問題的人來說,這可能是他們所需要的全部內容。對於其他 +上面的簡明指南概述瞭如何向Linux內核開發人員報告問題。對於已經熟悉向自由和開 +源軟件(FLOSS)項目報告問題的人來說,這可能是他們所需要的全部內容。對於其他 人,本部分更爲詳細,並一步一步地描述。爲了便於閱讀,它仍然儘量簡潔,並省略 了許多細節;這些在逐步指南後的參考章節中進行了描述,該章節更詳細地解釋了每 個步驟。 @@ -68,16 +64,16 @@ 儘早意識到看起來像Linux內核毛病的問題可能實際上是由其他原因引起的。這些步驟 可以確保你最終不會覺得在這一過程中投入的時間是浪費: - * 您是否面臨硬體或軟體供應商提供的Linux內核的問題?那麼基本上您最好停止閱讀 + * 您是否面臨硬件或軟件供應商提供的Linux內核的問題?那麼基本上您最好停止閱讀 本文檔,轉而向您的供應商報告問題,除非您願意自己安裝最新的Linux版本。尋找 和解決問題往往需要後者。 - * 使用您喜愛的網絡搜尋引擎對現有報告進行粗略搜索;此外,請檢查 + * 使用您喜愛的網絡搜索引擎對現有報告進行粗略搜索;此外,請檢查 `Linux內核郵件列表(LKML) `_ 的存檔。如果 找到匹配的報告,請加入討論而不是發送新報告。 - * 看看你正在處理的問題是否爲回歸問題、安全問題或非常嚴重的問題:這些都是需 - 要在接下來的一些步驟中特別處理的「高優先級問題」。 + * 看看你正在處理的問題是否爲迴歸問題、安全問題或非常嚴重的問題:這些都是需 + 要在接下來的一些步驟中特別處理的“高優先級問題”。 * 確保不是內核環境導致了您面臨的問題。 @@ -86,15 +82,15 @@ * 確保您的系統不會通過動態構建額外的內核模塊來增強其內核,像DKMS這樣的解決 方案可能在您不知情的情況下就在本地進行了這樣的工作。 - * 當問題發生時,檢查您的內核是否被「汙染」,因爲使內核設置這個標誌的事件可能 + * 當問題發生時,檢查您的內核是否被“污染”,因爲使內核設置這個標誌的事件可能 會導致您面臨的問題。 * 粗略地寫下如何重現這個問題。如果您同時處理多個問題,請爲每個問題單獨寫注 釋,並確保它們在新啓動的系統上獨立出現。這是必要的,因爲每個問題都需要分 別報告給內核開發人員,除非它們嚴重糾纏在一起。 - * 如果您正面臨穩定版或長期支持版本線的回歸(例如從5.10.4更新到5.10.5時出現 - 故障),請查看後文「報告穩定版和長期支持內核線的回歸」小節。 + * 如果您正面臨穩定版或長期支持版本線的迴歸(例如從5.10.4更新到5.10.5時出現 + 故障),請查看後文“報告穩定版和長期支持內核線的迴歸”小節。 * 定位可能引起問題的驅動程序或內核子系統。找出其開發人員期望的報告的方式和 位置。注意:大多數情況下不會是 bugzilla.kernel.org,因爲問題通常需要通 @@ -105,61 +101,62 @@ 在完成這些準備之後,你將進入主要部分: - * 除非您已經在運行最新的「主線」Linux內核,否則最好在報告流程前安裝它。在某些 - 情況下,使用最新的「穩定版」Linux進行測試和報告也是可以接受的替代方案;在 + * 除非您已經在運行最新的“主線”Linux內核,否則最好在報告流程前安裝它。在某些 + 情況下,使用最新的“穩定版”Linux進行測試和報告也是可以接受的替代方案;在 合併窗口期間,這實際上可能是最好的方法,但在開發階段最好還是暫停幾天。無論 - 你選擇什麼版本,最好使用「普通」構建。忽略這些建議會大大增加您的報告被拒絕 + 你選擇什麼版本,最好使用“普通”構建。忽略這些建議會大大增加您的報告被拒絕 或忽略的風險。 - * 確保您剛剛安裝的內核在運行時不會「汙染」自己。 + * 確保您剛剛安裝的內核在運行時不會“污染”自己。 * 在您剛剛安裝的內核中復現這個問題。如果它沒有出現,請查看下方只發生在 穩定版和長期支持內核的問題的說明。 - * 優化你的筆記:試著找到並寫出最直接的復現問題的方法。確保最終結果包含所有 + * 優化你的筆記:試着找到並寫出最直接的復現問題的方法。確保最終結果包含所有 重要的細節,同時讓第一次聽說的人容易閱讀和理解。如果您在此過程中學到了一 些東西,請考慮再次搜索關於該問題的現有報告。 - * 如果失敗涉及「panic」、「Oops」、「warning」或「BUG」,請考慮解碼內核日誌以查找觸 + * 如果失敗涉及“panic”、“Oops”、“warning”或“BUG”,請考慮解碼內核日誌以查找觸 發錯誤的代碼行。 - * 如果您的問題是回歸問題,請儘可能縮小引入問題時的範圍。 + * 如果您的問題是迴歸問題,請儘可能縮小引入問題時的範圍。 * 通過詳細描述問題來開始編寫報告。記得包括以下條目:您爲復現而安裝的最新內 核版本、使用的Linux發行版以及關於如何復現該問題的說明。如果可能,將內核 - 構建配置(.config)和 ``dmesg`` 的輸出放在網上的某個地方,並連結到它。包 + 構建配置(.config)和 ``dmesg`` 的輸出放在網上的某個地方,並鏈接到它。包 含或上傳所有其他可能相關的信息,如Oops的輸出/截圖或來自 ``lspci`` 的輸出 。一旦你寫完了這個主要部分,請在上方插入一個正常長度的段落快速概述問題和 影響。再在此之上添加一個簡單描述問題的句子,以得到人們的閱讀。現在給出一 個更短的描述性標題或主題。然後就可以像MAINTAINERS文件告訴你的那樣發送或 - 提交報告了,除非你在處理一個「高優先級問題」:它們需要按照下面「高優先級問 - 題的特殊處理」所述特別關照。 + 提交報告了,除非你在處理一個“高優先級問題”:它們需要按照下面“高優先級問 + 題的特殊處理”所述特別關照。 * 等待別人的反應,繼續推進事情,直到你能夠接受這樣或那樣的結果。因此,請公 開和及時地回應任何詢問。測試提出的修復。積極地測試:至少重新測試每個新主 線版本的首個候選版本(RC),並報告你的結果。如果出現拖延,就友好地提醒一 - 下。如果你沒有得到任何幫助或者未能滿意,請試著自己幫助自己。 + 下。如果你沒有得到任何幫助或者未能滿意,請試着自己幫助自己。 -報告穩定版和長期支持內核線的回歸 +報告穩定版和長期支持內核線的迴歸 ---------------------------------- -如果您發現了穩定版或長期支持內核版本線中的回歸問題並按上述流程跳到這裡,那麼 +如果您發現了穩定版或長期支持內核版本線中的迴歸問題並按上述流程跳到這裏,那麼 請閱讀本小節。即例如您在從5.10.4更新到5.10.5時出現了問題(從5.9.15到5.10.5則 -不是)。開發人員希望儘快修復此類回歸,因此有一個簡化流程來報告它們: +不是)。開發人員希望儘快修復此類迴歸,因此有一個簡化流程來報告它們: * 檢查內核開發人員是否仍然維護你關心的Linux內核版本線:去 `kernel.org 的首頁 - `_ ,確保此特定版本線的最新版沒有「[EOL]」標記。 + `_ ,確保此特定版本線的最新版沒有“[EOL]”標記。 * 檢查 `Linux穩定版郵件列表 `_ 中的現有報告。 - * 從特定的版本線安裝最新版本作爲純淨內核。確保這個內核沒有被汙染,並且仍然 - 存在問題,因爲問題可能已經在那裡被修復了。如果您第一次發現供應商內核的問題, + * 從特定的版本線安裝最新版本作爲純淨內核。確保這個內核沒有被污染,並且仍然 + 存在問題,因爲問題可能已經在那裏被修復了。如果您第一次發現供應商內核的問題, 請檢查已知最新版本的普通構建是否可以正常運行。 - * 向Linux穩定版郵件列表發送一個簡短的問題報告(stable@vger.kernel.org)。大致 - 描述問題,並解釋如何復現。講清楚首個出現問題的版本和最後一個工作正常的版本。 - 然後等待進一步的指示。 + * 向Linux穩定版郵件列表發送一個簡短的問題報告(stable@vger.kernel.org)並抄送 + Linux迴歸郵件列表(regressions@lists.linux.dev);如果你懷疑是由某子系統 + 引起的,請抄送其維護人員和子系統郵件列表。大致描述問題,並解釋如何復現。 + 講清楚首個出現問題的版本和最後一個工作正常的版本。然後等待進一步的指示。 下面的參考章節部分詳細解釋了這些步驟中的每一步。 @@ -167,14 +164,14 @@ 報告只發生在較舊內核版本線的問題 ---------------------------------- -若您嘗試了上述的最新主線內核,但未能在那裡復現問題,那麼本小節適用於您;以下 +若您嘗試了上述的最新主線內核,但未能在那裏復現問題,那麼本小節適用於您;以下 流程有助於使問題在仍然支持的穩定版或長期支持版本線,或者定期基於最新穩定版或 長期支持內核的供應商內核中得到修復。如果是這種情況,請執行以下步驟: * 請做好準備,接下來的幾個步驟可能無法在舊版本中解決問題:修復可能太大或太 - 冒險,無法移植到那裡。 + 冒險,無法移植到那裏。 - * 執行前節「報告穩定版和長期支持內核線的回歸」中的前三個步驟。 + * 執行前節“報告穩定版和長期支持內核線的迴歸”中的前三個步驟。 * 在Linux內核版本控制系統中搜索修復主線問題的更改,因爲它的提交消息可能會 告訴你修復是否已經計劃好了支持。如果你沒有找到,搜索適當的郵件列表,尋找 @@ -219,14 +216,14 @@ 確保您使用的是上游Linux內核 ---------------------------- - *您是否面臨硬體或軟體供應商提供的Linux內核的問題?那麼基本上您最好停止閱 + *您是否面臨硬件或軟件供應商提供的Linux內核的問題?那麼基本上您最好停止閱 讀本文檔,轉而向您的供應商報告問題,除非您願意自己安裝最新的Linux版本。 尋找和解決問題往往需要後者。* -與大多數程式設計師一樣,Linux內核開發人員不喜歡花時間處理他們維護的原始碼中根本 -不會發生的問題的報告。這只會浪費每個人的時間,尤其是你的時間。不幸的是,當 +與大多數程序員一樣,Linux內核開發人員不喜歡花時間處理他們維護的源代碼中根本 +不會發生的問題的報告。這隻會浪費每個人的時間,尤其是你的時間。不幸的是,當 涉及到內核時,這樣的情況很容易發生,並且常常導致雙方氣餒。這是因爲幾乎所有預 -裝在設備(台式機、筆記本電腦、智慧型手機、路由器等)上的Linux內核,以及大多數 +裝在設備(臺式機、筆記本電腦、智能手機、路由器等)上的Linux內核,以及大多數 由Linux發行商提供的內核,都與由kernel.org發行的官方Linux內核相距甚遠:從Linux 開發的角度來看,這些供應商提供的內核通常是古老的或者經過了大量修改,通常兩點 兼具。 @@ -235,19 +232,19 @@ 可能已經由Linux內核開發人員在數月或數年前修復;此外,供應商的修改和增強可能 會導致您面臨的問題,即使它們看起來很小或者完全不相關。這就是爲什麼您應該向 供應商報告這些內核的問題。它的開發者應該查看報告,如果它是一個上游問題,直接 -於上游修復或將報告轉發到那裡。在實踐中,這有時行不通。因此,您可能需要考慮 +於上游修復或將報告轉發到那裏。在實踐中,這有時行不通。因此,您可能需要考慮 通過自己安裝最新的Linux內核內核來繞過供應商。如果如果您選擇此方法,那麼本指 南後面的步驟將解釋如何在排除了其他可能導致您的問題的原因後執行此操作。 -注意前段使用的詞語是「大多數」,因爲有時候開發人員實際上願意處理供應商內核出現 +注意前段使用的詞語是“大多數”,因爲有時候開發人員實際上願意處理供應商內核出現 的問題報告。他們是否這麼做很大程度上取決於開發人員和相關問題。如果發行版只 根據最近的Linux版本對內核進行了較小修改,那麼機會就比較大;例如對於Debian GNU/Linux Sid或Fedora Rawhide所提供的主線內核。一些開發人員還將接受基於最新 穩定內核的發行版內核問題報告,只要它改動不大;例如Arch Linux、常規Fedora版本 和openSUSE Turboweed。但是請記住,您最好使用主線Linux,並避免在此流程中使用 -穩定版內核,如「安裝一個新的內核進行測試」一節中所詳述。 +穩定版內核,如“安裝一個新的內核進行測試”一節中所詳述。 -當然,您可以忽略所有這些建議,並向上游Linux開發人員報告舊的或經過大量修改的 +當然,您可以忽略所有這些建議,並向上遊Linux開發人員報告舊的或經過大量修改的 供應商內核的問題。但是注意,這樣的報告經常被拒絕或忽視,所以自行小心考慮一下。 不過這還是比根本不報告問題要好:有時候這樣的報告會直接或間接地幫助解決之後的 問題。 @@ -256,64 +253,61 @@ GNU/Linux Sid或Fedora Rawhide所提供的主線內核。一些開發人員還 搜索現有報告(第一部分) ------------------------- - *使用您喜愛的網絡搜尋引擎對現有報告進行粗略搜索;此外,請檢查Linux內核 + *使用您喜愛的網絡搜索引擎對現有報告進行粗略搜索;此外,請檢查Linux內核 郵件列表(LKML)的存檔。如果找到匹配的報告,請加入討論而不是發送新報告。* 報告一個別人已經提出的問題,對每個人來說都是浪費時間,尤其是作爲報告人的你。 所以徹底檢查是否有人已經報告了這個問題,這對你自己是有利的。在流程中的這一步, -可以只執行一個粗略的搜索:一旦您知道您的問題需要報告到哪裡,稍後的步驟將告訴 +可以只執行一個粗略的搜索:一旦您知道您的問題需要報告到哪裏,稍後的步驟將告訴 您如何詳細搜索。儘管如此,不要倉促完成這一步,它可以節省您的時間和減少麻煩。 -只需先用你最喜歡的搜尋引擎在網際網路上搜索。然後再搜索Linux內核郵件列表(LKML) +只需先用你最喜歡的搜索引擎在互聯網上搜索。然後再搜索Linux內核郵件列表(LKML) 存檔。 -如果搜索結果實在太多,可以考慮讓你的搜尋引擎將搜索時間範圍限制在過去的一個 -月或一年。而且無論你在哪裡搜索,一定要用恰當的搜索關鍵詞;也要變化幾次關鍵 -詞。同時,試著從別人的角度看問題:這將幫助你想出其他的關鍵詞。另外,一定不 +如果搜索結果實在太多,可以考慮讓你的搜索引擎將搜索時間範圍限制在過去的一個 +月或一年。而且無論你在哪裏搜索,一定要用恰當的搜索關鍵詞;也要變化幾次關鍵 +詞。同時,試着從別人的角度看問題:這將幫助你想出其他的關鍵詞。另外,一定不 要同時使用過多的關鍵詞。記住搜索時要同時嘗試包含和不包含內核驅動程序的名稱 -或受影響的硬體組件的名稱等信息。但其確切的品牌名稱(比如說「華碩紅魔 Radeon -RX 5700 XT Gaming OC」)往往幫助不大,因爲它太具體了。相反,嘗試搜索術語,如 -型號(Radeon 5700 或 Radeon 5000)和核心代號(「Navi」或「Navi10」),以及包含 -和不包含其製造商(「AMD」)。 +或受影響的硬件組件的名稱等信息。但其確切的品牌名稱(比如說“華碩紅魔 Radeon +RX 5700 XT Gaming OC”)往往幫助不大,因爲它太具體了。相反,嘗試搜索術語,如 +型號(Radeon 5700 或 Radeon 5000)和核心代號(“Navi”或“Navi10”),以及包含 +和不包含其製造商(“AMD”)。 如果你發現了關於你的問題的現有報告,請加入討論,因爲你可能會提供有價值的額 外信息。這一點很重要,即使是在修復程序已經準備好或處於最後階段,因爲開發人 -員可能會尋找能夠提供額外信息或測試建議修復程序的人。跳到「發布報告後的責任」 -一節,了解有關如何正確參與的細節。 +員可能會尋找能夠提供額外信息或測試建議修復程序的人。跳到“發佈報告後的責任” +一節,瞭解有關如何正確參與的細節。 注意,搜索 `bugzilla.kernel.org `_ 網站可能 也是一個好主意,因爲這可能會提供有價值的見解或找到匹配的報告。如果您發現後者, -請記住:大多數子系統都希望在不同的位置報告,如下面「你需要將問題報告到何處」 +請記住:大多數子系統都希望在不同的位置報告,如下面“你需要將問題報告到何處” 一節中所述。因此本應處理這個問題的開發人員甚至可能不知道bugzilla的工單。所以 請檢查工單中的問題是否已經按照本文檔所述得到報告,如果沒有,請考慮這樣做。 高優先級的問題? ----------------- - *看看你正在處理的問題是否是回歸問題、安全問題或非常嚴重的問題:這些都是 - 需要在接下來的一些步驟中特別處理的「高優先級問題」。* + *看看你正在處理的問題是否是迴歸問題、安全問題或非常嚴重的問題:這些都是 + 需要在接下來的一些步驟中特別處理的“高優先級問題”。* Linus Torvalds和主要的Linux內核開發人員希望看到一些問題儘快得到解決,因此在 -報告過程中有一些「高優先級問題」的處理略有不同。有三種情況符合條件:回歸、安全 +報告過程中有一些“高優先級問題”的處理略有不同。有三種情況符合條件:迴歸、安全 問題和非常嚴重的問題。 -如果在舊版本的Linux內核中工作的東西不能在新版本的Linux內核中工作,或者某種 -程度上在新版本的Linux內核中工作得更差,那麼你就需要處理「回歸」。因此,當一個 -在Linux 5.7中表現良好的WiFi驅動程序在5.8中表現不佳或根本不能工作時,這是一 -種回歸。如果應用程式在新的內核中出現不穩定的現象,這也是一種回歸,這可能是 -由於內核和用戶空間之間的接口(如procfs和sysfs)發生不兼容的更改造成的。顯著 -的性能降低或功耗增加也可以稱爲回歸。但是請記住:新內核需要使用與舊內核相似的 -配置來構建(參見下面如何實現這一點)。這是因爲內核開發人員在實現新特性時有 -時無法避免不兼容性;但是爲了避免回歸,這些特性必須在構建配置期間顯式地啓用。 +如果某個應用程序或實際用例在原先的Linux內核上運行良好,但在使用類似配置編譯的 +較新版本上效果更差、或者根本不能用,那麼你就需要處理迴歸問題。 +Documentation/admin-guide/reporting-regressions.rst 對此進行了更詳細的解釋。 +它還提供了很多你可能想知道的關於迴歸的其他信息;例如,它解釋瞭如何將您的問題 +添加到迴歸跟蹤列表中,以確保它不會被忽略。 什麼是安全問題留給您自己判斷。在繼續之前,請考慮閱讀 -「Documentation/translations/zh_TW/admin-guide/security-bugs.rst」, -因爲它提供了如何最恰當地處理安全問題的額外細節。 +Documentation/translations/zh_CN/admin-guide/security-bugs.rst , +因爲它提供瞭如何最恰當地處理安全問題的額外細節。 -當發生了完全無法接受的糟糕事情時,此問題就是一個「非常嚴重的問題」。例如, -Linux內核破壞了它處理的數據或損壞了它運行的硬體。當內核突然顯示錯誤消息 -(「kernel panic」)並停止工作,或者根本沒有任何停止信息時,您也在處理一個嚴重 -的問題。注意:不要混淆「panic」(內核停止自身的致命錯誤)和「Oops」(可恢復錯誤), +當發生了完全無法接受的糟糕事情時,此問題就是一個“非常嚴重的問題”。例如, +Linux內核破壞了它處理的數據或損壞了它運行的硬件。當內核突然顯示錯誤消息 +(“kernel panic”)並停止工作,或者根本沒有任何停止信息時,您也在處理一個嚴重 +的問題。注意:不要混淆“panic”(內核停止自身的致命錯誤)和“Oops”(可恢復錯誤), 因爲顯示後者之後內核仍然在運行。 @@ -325,22 +319,22 @@ Linux內核破壞了它處理的數據或損壞了它運行的硬體。當內核 看起來很像內核問題的問題有時是由構建或運行時環境引起的。很難完全排除這種問 題,但你應該儘量減少這種問題: - * 構建內核時,請使用經過驗證的工具,因爲編譯器或二進位文件中的錯誤可能會導 + * 構建內核時,請使用經過驗證的工具,因爲編譯器或二進制文件中的錯誤可能會導 致內核出現錯誤行爲。 * 確保您的計算機組件在其設計規範內運行;這對處理器、內存和主板尤爲重要。因 此,當面臨潛在的內核問題時,停止低電壓或超頻。 - * 儘量確保不是硬體故障導致了你的問題。例如,內存損壞會導致大量的問題,這些 + * 儘量確保不是硬件故障導致了你的問題。例如,內存損壞會導致大量的問題,這些 問題會表現爲看起來像內核問題。 * 如果你正在處理一個文件系統問題,你可能需要用 ``fsck`` 檢查一下文件系統, 因爲它可能會以某種方式被損壞,從而導致無法預期的內核行爲。 - * 在處理回歸問題時,要確保沒有在更新內核的同時發生了其他變化。例如,這個問 - 題可能是由同時更新的其他軟體引起的。也有可能是在你第一次重啓進入新內核時, - 某個硬體巧合地壞了。更新系統 BIOS 或改變 BIOS 設置中的某些內容也會導致 - 一些看起來很像內核回歸的問題。 + * 在處理迴歸問題時,要確保沒有在更新內核的同時發生了其他變化。例如,這個問 + 題可能是由同時更新的其他軟件引起的。也有可能是在你第一次重啓進入新內核時, + 某個硬件巧合地壞了。更新系統 BIOS 或改變 BIOS 設置中的某些內容也會導致 + 一些看起來很像內核迴歸的問題。 爲緊急情況做好準備 @@ -349,8 +343,8 @@ Linux內核破壞了它處理的數據或損壞了它運行的硬體。當內核 *創建一個全新的備份,並將系統修復和還原工具放在手邊* 我得提醒您,您正在和計算機打交道,計算機有時會出現意想不到的事情,尤其是當 -您折騰其作業系統的內核等關鍵部件時。而這就是你在這個過程中要做的事情。因此, -一定要創建一個全新的備份;還要確保你手頭有修復或重裝作業系統的所有工具, +您折騰其操作系統的內核等關鍵部件時。而這就是你在這個過程中要做的事情。因此, +一定要創建一個全新的備份;還要確保你手頭有修復或重裝操作系統的所有工具, 以及恢復備份所需的一切。 @@ -366,67 +360,67 @@ Linux內核破壞了它處理的數據或損壞了它運行的硬體。當內核 的任何模塊。然後重新啓動再繼續。 注意,你可能不知道你的系統正在使用這些解決方案之一:當你安裝 Nvidia 專有圖 -形驅動程序、VirtualBox 或其他需要 Linux 內核以外的模塊支持的軟體時,它們通 -常會靜默設置。這就是爲什麼你可能需要卸載這些軟體的軟體包,以擺脫任何第三方 +形驅動程序、VirtualBox 或其他需要 Linux 內核以外的模塊支持的軟件時,它們通 +常會靜默設置。這就是爲什麼你可能需要卸載這些軟件的軟件包,以擺脫任何第三方 內核模塊。 -檢測「汙染」標誌 +檢查“污染”標誌 ---------------- - *當問題發生時,檢查您的內核是否被「汙染」,因爲使內核設置這個標誌的事件可 + *當問題發生時,檢查您的內核是否被“污染”,因爲使內核設置這個標誌的事件可 能會導致您面臨的問題。* -當某些可能會導致看起來完全不相關的後續錯誤的事情發生時,內核會用「汙染 -(taint)」標誌標記自己。如果您的內核受到汙染,那麼您面臨的可能是這樣的錯誤。 +當某些可能會導致看起來完全不相關的後續錯誤的事情發生時,內核會用“污染 +(taint)”標誌標記自己。如果您的內核受到污染,那麼您面臨的可能是這樣的錯誤。 因此在投入更多時間到這個過程中之前,儘早排除此情況可能對你有好處。這是這個 -步驟出現在這裡的唯一原因,因爲這個過程稍後會告訴您安裝最新的主線內核;然後 -您將需要再次檢查汙染標誌,因爲當它出問題的時候內核報告會關注它。 +步驟出現在這裏的唯一原因,因爲這個過程稍後會告訴您安裝最新的主線內核;然後 +您將需要再次檢查污染標誌,因爲當它出問題的時候內核報告會關注它。 -在正在運行的系統上檢查內核是否汙染非常容易:如果 ``cat /proc/sys/kernel/tainted`` -返回「0」,那麼內核沒有被汙染,一切正常。在某些情況下無法檢查該文件;這就是 -爲什麼當內核報告內部問題(「kernel bug」)、可恢復錯誤(「kernel Oops」)或停止 -操作前不可恢復的錯誤(「kernel panic」)時,它也會提到汙染狀態。當其中一個錯 -誤發生時,查看列印的錯誤消息的頂部,搜索以「CPU:」開頭的行。如果發現問題時內 -核未被汙染,那麼它應該以「Not infected」結束;如果你看到「Tainted:」且後跟一些 -空格和字母,那就被汙染了。 +在正在運行的系統上檢查內核是否污染非常容易:如果 ``cat /proc/sys/kernel/tainted`` +返回“0”,那麼內核沒有被污染,一切正常。在某些情況下無法檢查該文件;這就是 +爲什麼當內核報告內部問題(“kernel bug”)、可恢復錯誤(“kernel Oops”)或停止 +操作前不可恢復的錯誤(“kernel panic”)時,它也會提到污染狀態。當其中一個錯 +誤發生時,查看打印的錯誤消息的頂部,搜索以“CPU:”開頭的行。如果發現問題時內 +核未被污染,那麼它應該以“Not infected”結束;如果你看到“Tainted:”且後跟一些 +空格和字母,那就被污染了。 -如果你的內核被汙染了,請閱讀「Documentation/translations/zh_TW/admin-guide/tainted-kernels.rst」 -以找出原因。設法消除汙染因素。通常是由以下三種因素之一引起的: +如果你的內核被污染了,請閱讀 Documentation/translations/zh_CN/admin-guide/tainted-kernels.rst +以找出原因。設法消除污染因素。通常是由以下三種因素之一引起的: - 1. 發生了一個可恢復的錯誤(「kernel Oops」),內核汙染了自己,因爲內核知道在 + 1. 發生了一個可恢復的錯誤(“kernel Oops”),內核污染了自己,因爲內核知道在 此之後它可能會出現奇怪的行爲錯亂。在這種情況下,檢查您的內核或系統日誌, 並尋找以下列文字開頭的部分:: Oops: 0000 [#1] SMP - 如方括號中的「#1」所示,這是自啓動以來的第一次Oops。每個Oops和此後發生的 + 如方括號中的“#1”所示,這是自啓動以來的第一次Oops。每個Oops和此後發生的 任何其他問題都可能是首個Oops的後續問題,即使這兩個問題看起來完全不相關。 通過消除首個Oops的原因並在之後復現該問題,可以排除這種情況。有時僅僅 重新啓動就足夠了,有時更改配置後重新啓動可以消除Oops。但是在這個流程中 不要花費太多時間在這一點上,因爲引起Oops的原因可能已經在您稍後將按流程 安裝的新Linux內核版本中修復了。 - 2. 您的系統使用的軟體安裝了自己的內核模塊,例如Nvidia的專有圖形驅動程序或 - VirtualBox。當內核從外部源(即使它們是開源的)加載此類模塊時,它會汙染 + 2. 您的系統使用的軟件安裝了自己的內核模塊,例如Nvidia的專有圖形驅動程序或 + VirtualBox。當內核從外部源(即使它們是開源的)加載此類模塊時,它會污染 自己:它們有時會在不相關的內核區域導致錯誤,從而可能導致您面臨的問題。 因此,當您想要向Linux內核開發人員報告問題時,您必須阻止這些模塊加載。 - 大多數情況下最簡單的方法是:臨時卸載這些軟體,包括它們可能已經安裝的任 + 大多數情況下最簡單的方法是:臨時卸載這些軟件,包括它們可能已經安裝的任 何模塊。之後重新啓動。 - 3. 當內核加載駐留在Linux內核原始碼staging樹中的模塊時,它也會汙染自身。這 + 3. 當內核加載駐留在Linux內核源代碼staging樹中的模塊時,它也會污染自身。這 是一個特殊的區域,代碼(主要是驅動程序)還沒有達到正常Linux內核的質量 - 標準。當您報告此種模塊的問題時,內核受到汙染顯然是沒有問題的;只需確保 - 問題模塊是造成汙染的唯一原因。如果問題發生在一個不相關的區域,重新啓動 + 標準。當您報告此種模塊的問題時,內核受到污染顯然是沒有問題的;只需確保 + 問題模塊是造成污染的唯一原因。如果問題發生在一個不相關的區域,重新啓動 並通過指定 ``foo.blacklist=1`` 作爲內核參數臨時阻止該模塊被加載(用有 - 問題的模塊名替換「foo」)。 + 問題的模塊名替換“foo”)。 記錄如何重現問題 ------------------ *粗略地寫下如何重現這個問題。如果您同時處理多個問題,請爲每個問題單獨寫 - 注釋,並確保它們在新啓動的系統上獨立出現。這是必要的,因爲每個問題都需 + 註釋,並確保它們在新啓動的系統上獨立出現。這是必要的,因爲每個問題都需 要分別報告給內核開發人員,除非它們嚴重糾纏在一起。* 如果你同時處理多個問題,必須分別報告每個問題,因爲它們可能由不同的開發人員 @@ -438,20 +432,20 @@ Linux內核破壞了它處理的數據或損壞了它運行的硬體。當內核 注意:報告只發生過一次的問題往往是沒有結果的,因爲它們可能是由於宇宙輻射導 致的位翻轉。所以你應該嘗試通過重現問題來排除這種情況,然後再繼續。如果你有 -足夠的經驗來區分由於硬體故障引起的一次性錯誤和難以重現的罕見內核問題,可以 +足夠的經驗來區分由於硬件故障引起的一次性錯誤和難以重現的罕見內核問題,可以 忽略這個建議。 -穩定版或長期支持內核的回歸? +穩定版或長期支持內核的迴歸? ----------------------------- - *如果您正面臨穩定版或長期支持版本線的回歸(例如從5.10.4更新到5.10.5時出現 - 故障),請查看後文「報告穩定版和長期支持內核線的回歸」小節。* + *如果您正面臨穩定版或長期支持版本線的迴歸(例如從5.10.4更新到5.10.5時出現 + 故障),請查看後文“報告穩定版和長期支持內核線的迴歸”小節。* -穩定版和長期支持內核版本線中的回歸是Linux開發人員非常希望解決的問題,這樣的 -問題甚至比主線開發分支中的回歸更不應出現,因爲它們會很快影響到很多人。開發人員 -希望儘快了解此類問題,因此有一個簡化流程來報告這些問題。注意,使用更新內核版 -本線的回歸(比如從5.9.15切換到5.10.5時出現故障)不符合條件。 +穩定版和長期支持內核版本線中的迴歸是Linux開發人員非常希望解決的問題,這樣的 +問題甚至比主線開發分支中的迴歸更不應出現,因爲它們會很快影響到很多人。開發人員 +希望儘快瞭解此類問題,因此有一個簡化流程來報告這些問題。注意,使用更新內核版 +本線的迴歸(比如從5.9.15切換到5.10.5時出現故障)不符合條件。 你需要將問題報告到何處 @@ -462,9 +456,9 @@ Linux內核破壞了它處理的數據或損壞了它運行的硬體。當內核 過郵件發送給維護人員和公共郵件列表。* 將報告發送給合適的人是至關重要的,因爲Linux內核是一個大項目,大多數開發人員 -只熟悉其中的一小部分。例如,相當多的程式設計師只關心一個驅動程序,比如一個WiFi -晶片驅動程序;它的開發人員可能對疏遠的或不相關的「子系統」(如TCP堆棧、 -PCIe/PCI子系統、內存管理或文件系統)的內部知識了解很少或完全不了解。 +只熟悉其中的一小部分。例如,相當多的程序員只關心一個驅動程序,比如一個WiFi +芯片驅動程序;它的開發人員可能對疏遠的或不相關的“子系統”(如TCP堆棧、 +PCIe/PCI子系統、內存管理或文件系統)的內部知識瞭解很少或完全不瞭解。 問題在於:Linux內核缺少一個,可以簡單地將問題歸檔並讓需要了解它的開發人員了 解它的,中心化缺陷跟蹤器。這就是爲什麼你必須找到正確的途徑來自己報告問題。 @@ -476,10 +470,10 @@ PCIe/PCI子系統、內存管理或文件系統)的內部知識了解很少或 爲了說明如何使用 :ref:`MAINTAINERS ` 文件,讓我們假設您的筆記 本電腦中的WiFi在更新內核後突然出現了錯誤行爲。這種情況下可能是WiFi驅動的問 -題。顯然,它也可能由於驅動基於的某些代碼,但除非你懷疑有這樣的東西會附著在 -驅動程序上。如果真的是其他的問題,驅動程序的開發人員會讓合適的人參與進來。 +題。顯然,它也可能由於驅動基於的某些代碼,但除非你懷疑有這樣的東西會附着在 +驅動程序上。如果真的是其他的問題,驅動程序的開發人員會讓合適的人蔘與進來。 -遺憾的是,沒有通用且簡單的辦法來檢查哪個代碼驅動了特定硬體組件。 +遺憾的是,沒有通用且簡單的辦法來檢查哪個代碼驅動了特定硬件組件。 在WiFi驅動出現問題的情況下,你可能想查看 ``lspci -k`` 的輸出,因爲它列出了 PCI/PCIe總線上的設備和驅動它的內核模塊:: @@ -492,19 +486,19 @@ PCI/PCIe總線上的設備和驅動它的內核模塊:: Kernel modules: ath10k_pci [...] -但如果你的WiFi晶片通過USB或其他內部總線連接,這種方法就行不通了。在這種情況 +但如果你的WiFi芯片通過USB或其他內部總線連接,這種方法就行不通了。在這種情況 下,您可能需要檢查您的WiFi管理器或 ``ip link`` 的輸出。尋找有問題的網絡接口 -的名稱,它可能類似於「wlp58s0」。此名稱可以用來找到驅動它的模塊:: +的名稱,它可能類似於“wlp58s0”。此名稱可以用來找到驅動它的模塊:: [user@something ~]$ realpath --relative-to=/sys/module//sys/class/net/wlp58s0/device/driver/module ath10k_pci 如果這些技巧不能進一步幫助您,請嘗試在網上搜索如何縮小相關驅動程序或子系統 -的範圍。如果你不確定是哪一個:試著猜一下,即使你猜得不好,也會有人會幫助你 +的範圍。如果你不確定是哪一個:試着猜一下,即使你猜得不好,也會有人會幫助你 的。 一旦您知道了相應的驅動程序或子系統,您就希望在MAINTAINERS文件中搜索它。如果 -是「ath10k_pci」,您不會找到任何東西,因爲名稱太具體了。有時你需要在網上尋找 +是“ath10k_pci”,您不會找到任何東西,因爲名稱太具體了。有時你需要在網上尋找 幫助;但在此之前,請嘗試使用一個稍短或修改過的名稱來搜索MAINTAINERS文件,因 爲這樣你可能會發現類似這樣的東西:: @@ -516,23 +510,23 @@ PCI/PCIe總線上的設備和驅動它的內核模塊:: SCM: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git Files: drivers/net/wireless/ath/ath10k/ -注意:如果您閱讀在Linux原始碼樹的根目錄中找到的原始維護者文件,則行描述將是 -縮寫。例如,「Mail:(郵件)」將是「M:」,「Mailing list:(郵件列表)」將是「L」, -「Status:(狀態)」將是「S:」。此文件頂部有一段解釋了這些和其他縮寫。 +注意:如果您閱讀在Linux源代碼樹的根目錄中找到的原始維護者文件,則行描述將是 +縮寫。例如,“Mail:(郵件)”將是“M:”,“Mailing list:(郵件列表)”將是“L”, +“Status:(狀態)”將是“S:”。此文件頂部有一段解釋了這些和其他縮寫。 -首先查看「Status」狀態行。理想情況下,它應該得到「Supported(支持)」或 -「Maintained(維護)」。如果狀態爲「Obsolete(過時的)」,那麼你在使用一些過時的 -方法,需要轉換到新的解決方案上。有時候,只有在感到有動力時,才會有人爲代碼 -提供「Odd Fixes」。如果碰見「Orphan」,你就完全不走運了,因爲再也沒有人關心代碼 -了,只剩下這些選項:準備好與問題共存,自己修復它,或者找一個願意修復它的程式設計師。 +首先查看“Status”狀態行。理想情況下,它應該得到“Supported(支持)”或 +“Maintained(維護)”。如果狀態爲“Obsolete(過時的)”,那麼你在使用一些過時的 +方法,需要轉換到新的解決方案上。有時候,只有在感到有動力時,纔會有人爲代碼 +提供“Odd Fixes”。如果碰見“Orphan”,你就完全不走運了,因爲再也沒有人關心代碼 +了,只剩下這些選項:準備好與問題共存,自己修復它,或者找一個願意修復它的程序員。 -檢查狀態後,尋找以「bug:」開頭的一行:它將告訴你在哪裡可以找到子系統特定的缺 +檢查狀態後,尋找以“bug:”開頭的一行:它將告訴你在哪裏可以找到子系統特定的缺 陷跟蹤器來提交你的問題。上面的例子沒有此行。大多數部分都是這樣,因爲 Linux 內核的開發完全是由郵件驅動的。很少有子系統使用缺陷跟蹤器,且其中只有一部分 依賴於 bugzilla.kernel.org。 -在這種以及其他很多情況下,你必須尋找以「Mail:」開頭的行。這些行提到了特定代碼 -的維護者的名字和電子郵件地址。也可以查找以「Mailing list:」開頭的行,它告訴你 +在這種以及其他很多情況下,你必須尋找以“Mail:”開頭的行。這些行提到了特定代碼 +的維護者的名字和電子郵件地址。也可以查找以“Mailing list:”開頭的行,它告訴你 開發代碼的公共郵件列表。你的報告之後需要通過郵件發到這些地址。另外,對於所有 通過電子郵件發送的問題報告,一定要抄送 Linux Kernel Mailing List(LKML) 。在以後通過郵件發送問題報告時,不要遺漏任何 @@ -544,8 +538,8 @@ PCI/PCIe總線上的設備和驅動它的內核模塊:: ~~~~~~~~~~~~~~~~~~~~ 對於手頭有Linux源碼的人來說,有第二個可以找到合適的報告地點的選擇:腳本 -「scripts/get_maintainer.pl」,它嘗試找到所有要聯繫的人。它會查詢MAINTAINERS -文件,並需要用相關原始碼的路徑來調用。對於編譯成模塊的驅動程序,經常可以用 +“scripts/get_maintainer.pl”,它嘗試找到所有要聯繫的人。它會查詢MAINTAINERS +文件,並需要用相關源代碼的路徑來調用。對於編譯成模塊的驅動程序,經常可以用 這樣的命令找到:: $ modinfo ath10k_pci | grep filename | sed 's!/lib/modules/.*/kernel/!!; s!filename:!!; s!\.ko\(\|\.xz\)!!' @@ -561,13 +555,13 @@ PCI/PCIe總線上的設備和驅動它的內核模塊:: netdev@vger.kernel.org (open list:NETWORKING DRIVERS) linux-kernel@vger.kernel.org (open list) -不要把你的報告發給所有的人。發送給維護者,腳本稱之爲「supporter:」;另外抄送 +不要把你的報告發給所有的人。發送給維護者,腳本稱之爲“supporter:”;另外抄送 代碼最相關的郵件列表,以及 Linux 內核郵件列表(LKML)。在此例中,你需要將報 -告發送給 「Some Human 」 ,並抄送 -「ath10k@lists.infradead.org」和「linux-kernel@vger.kernel.org」。 +告發送給 “Some Human ” ,並抄送 +“ath10k@lists.infradead.org”和“linux-kernel@vger.kernel.org”。 -注意:如果你用 git 克隆了 Linux 原始碼,你可能需要用--git 再次調用 -get_maintainer.pl。腳本會查看提交歷史,以找到最近哪些人參與了相關代碼的編寫, +注意:如果你用 git 克隆了 Linux 源代碼,你可能需要用--git 再次調用 +get_maintainer.pl。腳本會查看提交歷史,以找到最近哪些人蔘與了相關代碼的編寫, 因爲他們可能會提供幫助。但要小心使用這些結果,因爲它很容易讓你誤入歧途。 例如,這種情況常常會發生在很少被修改的地方(比如老舊的或未維護的驅動程序): 有時這樣的代碼會在樹級清理期間被根本不關心此驅動程序的開發者修改。 @@ -580,73 +574,74 @@ get_maintainer.pl。腳本會查看提交歷史,以找到最近哪些人參與 如果找到匹配的報告,請加入討論而不是發送新報告。* 如前所述:報告一個別人已經提出的問題,對每個人來說都是浪費時間,尤其是作爲報告 -人的你。這就是爲什麼你應該再次搜索現有的報告。現在你已經知道問題需要報告到哪裡。 +人的你。這就是爲什麼你應該再次搜索現有的報告。現在你已經知道問題需要報告到哪裏。 如果是郵件列表,那麼一般在 `lore.kernel.org `_ 可以 找到相應存檔。 但有些列表運行在其他地方。例如前面步驟中當例子的ath10k WiFi驅動程序就是這種 -情況。但是你通常可以在網上很容易地找到這些列表的檔案。例如搜索「archive -ath10k@lists.infradead.org」,將引導您到ath10k郵件列表的信息頁,該頁面頂部連結 +情況。但是你通常可以在網上很容易地找到這些列表的檔案。例如搜索“archive +ath10k@lists.infradead.org”,將引導您到ath10k郵件列表的信息頁,該頁面頂部鏈接 到其 `列表存檔 `_ 。遺憾的是, -這個列表和其他一些列表缺乏搜索其存檔的功能。在這種情況下可以使用常規的網際網路 -搜尋引擎,並添加類似「site:lists.infadead.org/pipermail/ath10k/」這 -樣的搜索條件,這會把結果限制在該連結中的檔案。 +這個列表和其他一些列表缺乏搜索其存檔的功能。在這種情況下可以使用常規的互聯網 +搜索引擎,並添加類似“site:lists.infadead.org/pipermail/ath10k/”這 +樣的搜索條件,這會把結果限制在該鏈接中的檔案。 -也請進一步搜索網絡、LKML和bugzilla.kernel.org網站。 +也請進一步搜索網絡、LKML和bugzilla.kernel.org網站。如果你的報告需要發送到缺陷 +跟蹤器中,那麼您可能還需要檢查子系統的郵件列表存檔,因爲可能有人只在那裏報告了它。 -有關如何搜索以及在找到匹配報告時如何操作的詳細信息,請參閱上面的「搜索現有報告 -(第一部分)」。 +有關如何搜索以及在找到匹配報告時如何操作的詳細信息,請參閱上面的“搜索現有報告 +(第一部分)”。 -不要急著完成報告過程的這一步:花30到60分鐘甚至更多的時間可以爲你和其他人節省 / +不要急着完成報告過程的這一步:花30到60分鐘甚至更多的時間可以爲你和其他人節省 / 減少相當多的時間和麻煩。 安裝一個新的內核進行測試 -------------------------- - *除非您已經在運行最新的「主線」Linux內核,否則最好在報告流程前安裝它。在 - 某些情況下,使用最新的「穩定版」Linux進行測試和報告也是可以接受的替代方案; + *除非您已經在運行最新的“主線”Linux內核,否則最好在報告流程前安裝它。在 + 某些情況下,使用最新的“穩定版”Linux進行測試和報告也是可以接受的替代方案; 在合併窗口期間,這實際上可能是最好的方法,但在開發階段最好還是暫停幾天。 - 無論你選擇什麼版本,最好使用「普通」構建。忽略這些建議會大大增加您的報告 + 無論你選擇什麼版本,最好使用“普通”構建。忽略這些建議會大大增加您的報告 被拒絕或忽略的風險。* -正如第一步的詳細解釋中所提到的:與大多數程式設計師一樣,與大多數程式設計師一樣,Linux -內核開發人員不喜歡花時間處理他們維護的原始碼中根本不會發生的問題的報告。這隻 +正如第一步的詳細解釋中所提到的:與大多數程序員一樣,與大多數程序員一樣,Linux +內核開發人員不喜歡花時間處理他們維護的源代碼中根本不會發生的問題的報告。這隻 會浪費每個人的時間,尤其是你的時間。這就是爲什麼在報告問題之前,您必須先確認 問題仍然存在於最新的上游代碼中,這符合每個人的利益。您可以忽略此建議,但如前 所述:這樣做會極大地增加問題報告被拒絕或被忽略的風險。 -內核「最新上游」的範圍通常指: +內核“最新上游”的範圍通常指: * 安裝一個主線內核;最新的穩定版內核也可以是一個選擇,但大多數時候都最好避免。 - 長期支持內核(有時稱爲「LTS內核」)不適合此流程。下一小節將更詳細地解釋所有 + 長期支持內核(有時稱爲“LTS內核”)不適合此流程。下一小節將更詳細地解釋所有 這些。 * 下一小節描述獲取和安裝這樣一個內核的方法。它還指出了使用預編譯內核是可以的, - 但普通的內核更好,這意味著:它是直接使用從 `kernel.org `_ - 獲得的Linux原始碼構建並且沒有任何方式修改或增強。 + 但普通的內核更好,這意味着:它是直接使用從 `kernel.org `_ + 獲得的Linux源代碼構建並且沒有任何方式修改或增強。 選擇適合測試的版本 ~~~~~~~~~~~~~~~~~~~~ -前往 `kernel.org `_ 來決定使用哪個版本。忽略那個寫著 -「Latest release最新版本」的巨大黃色按鈕,往下看有一個表格。在表格的頂部,你會 -看到一行以「mainline」開頭的字樣,大多數情況下它會指向一個版本號類似「5.8-rc2」 -的預發布版本。如果是這樣的話,你將需要使用這個主線內核進行測試。不要讓「rc」 -嚇到你,這些「開發版內核」實際上非常可靠——而且你已經按照上面的指示做了備份, +前往 `kernel.org `_ 來決定使用哪個版本。忽略那個寫着 +“Latest release最新版本”的巨大黃色按鈕,往下看有一個表格。在表格的頂部,你會 +看到一行以“mainline”開頭的字樣,大多數情況下它會指向一個版本號類似“5.8-rc2” +的預發佈版本。如果是這樣的話,你將需要使用這個主線內核進行測試。不要讓“rc” +嚇到你,這些“開發版內核”實際上非常可靠——而且你已經按照上面的指示做了備份, 不是嗎? -大概每九到十周,「mainline」可能會給你指出一個版本號類似「5.7」的正式版本。如果 -碰見這種情況,請考慮暫停報告過程,直到下一個版本的第一個預發布(5.8-rc1)出 -現在 `kernel.org `_ 上。這是因爲 Linux 的開發周期正在 -兩周的「合併窗口」內。大部分的改動和所有干擾性的改動都會在這段時間內被合併到 +大概每九到十週,“mainline”可能會給你指出一個版本號類似“5.7”的正式版本。如果 +碰見這種情況,請考慮暫停報告過程,直到下一個版本的第一個預發佈(5.8-rc1)出 +現在 `kernel.org `_ 上。這是因爲 Linux 的開發週期正在 +兩週的“合併窗口”內。大部分的改動和所有干擾性的改動都會在這段時間內被合併到 下一個版本中。在此期間使用主線是比較危險的。內核開發者通常也很忙,可能沒有 多餘的時間來處理問題報告。這也是很有可能在合併窗口中應用了許多修改來修復你 -所面臨的問題;這就是爲什麼你很快就得用一個新的內核版本重新測試,就像下面「發 -布報告後的責任」一節中所述的那樣。 +所面臨的問題;這就是爲什麼你很快就得用一個新的內核版本重新測試,就像下面“發 +布報告後的責任”一節中所述的那樣。 -這就是爲什麼要等到合併窗口結束後才去做。但是如果你處理的是一些不應該等待的 +這就是爲什麼要等到合併窗口結束後纔去做。但是如果你處理的是一些不應該等待的 東西,則無需這樣做。在這種情況下,可以考慮通過 git 獲取最新的主線內核(見下 文),或者使用 kernel.org 上提供的最新穩定版本。如果 mainline 因爲某些原因 不無法正常工作,那麼使用它也是可以接受的。總的來說:用它來重現問題也比完全 @@ -657,7 +652,7 @@ ath10k@lists.infradead.org」,將引導您到ath10k郵件列表的信息頁, 需要先在主線修復,然後才能得到回傳,這可能需要幾天或幾周。另一個原因是:您 希望的修復對於回傳來說可能太難或太冒險;因此再次報告問題不太可能改變任何事情。 -這些方面也部分表明了爲什麼長期支持內核(有時稱爲「LTS內核」)不適合報告流程: +這些方面也部分表明了爲什麼長期支持內核(有時稱爲“LTS內核”)不適合報告流程: 它們與當前代碼的距離太遠。因此,先去測試主線,然後再按流程走:如果主線沒有 出現問題,流程將指導您如何在舊版本線中修復它。 @@ -669,31 +664,31 @@ ath10k@lists.infradead.org」,將引導您到ath10k郵件列表的信息頁, **使用預編譯的內核** :這往往是最快速、最簡單、最安全的方法——尤其是在你不熟 悉 Linux 內核的情況下。問題是:發行商或附加存儲庫提供的大多數版本都是從修改 -過的Linux原始碼構建的。因此它們不是普通的,通常不適合於測試和問題報告:這些 +過的Linux源代碼構建的。因此它們不是普通的,通常不適合於測試和問題報告:這些 更改可能會導致您面臨的問題或以某種方式影響問題。 但是如果您使用的是流行的Linux發行版,那麼您就很幸運了:對於大部分的發行版, 您可以在網上找到包含最新主線或穩定版本Linux內核包的存儲庫。使用這些是完全可 -以的,只要從存儲庫的描述中確認它們是普通的或者至少接近普通。此外,請確保軟體 -包包含kernel.org上提供的最新版本內核。如果這些軟體包的時間超過一周,那麼它們 -可能就不合適了,因爲新的主線和穩定版內核通常至少每周發布一次。 +以的,只要從存儲庫的描述中確認它們是普通的或者至少接近普通。此外,請確保軟件 +包包含kernel.org上提供的最新版本內核。如果這些軟件包的時間超過一週,那麼它們 +可能就不合適了,因爲新的主線和穩定版內核通常至少每週發佈一次。 請注意,您以後可能需要手動構建自己的內核:有時這是調試或測試修復程序所必需的, 如後文所述。還要注意,預編譯的內核可能缺少在出現panic、Oops、warning或BUG時 -解碼內核列印的消息所需的調試符號;如果您計劃解碼這些消息,最好自己編譯內核 -(有關詳細信息,請參閱本小節結尾和「解碼失敗信息」小節)。 +解碼內核打印的消息所需的調試符號;如果您計劃解碼這些消息,最好自己編譯內核 +(有關詳細信息,請參閱本小節結尾和“解碼失敗信息”小節)。 **使用git** :熟悉 git 的開發者和有經驗的 Linux 用戶通常最好直接從 `kernel.org 上的官方開發倉庫 `_ -中獲取最新的 Linux 內核原始碼。這些很可能比最新的主線預發布版本更新一些。不 -用擔心:它們和正式的預發布版本一樣可靠,除非內核的開發周期目前正處於合併窗 +中獲取最新的 Linux 內核源代碼。這些很可能比最新的主線預發佈版本更新一些。不 +用擔心:它們和正式的預發佈版本一樣可靠,除非內核的開發週期目前正處於合併窗 口中。不過即便如此,它們也是相當可靠的。 **常規方法** :不熟悉 git 的人通常最好從 `kernel.org `_ 下載源碼的tar 存檔包。 -如何實際構建一個內核並不在這裡描述,因爲許多網站已經解釋了必要的步驟。如果 +如何實際構建一個內核並不在這裏描述,因爲許多網站已經解釋了必要的步驟。如果 你是新手,可以考慮按照那些建議使用 ``make localmodconfig`` 來做,它將嘗試獲 取你當前內核的配置,然後根據你的系統進行一些調整。這樣做並不能使編譯出來的 內核更好,但可以更快地編譯。 @@ -702,19 +697,19 @@ ath10k@lists.infradead.org」,將引導您到ath10k郵件列表的信息頁, 啓用 CONFIG_KALLSYMS 選項。此外,還可以啓用 CONFIG_DEBUG_KERNEL 和 CONFIG_DEBUG_INFO;後者是相關選項,但只有啓用前者才能開啓。請注意, CONFIG_DEBUG_INFO 會需要更多儲存空間來構建內核。但這是值得的,因爲這些選項將 -允許您稍後精確定位觸發問題的確切代碼行。下面的「解碼失敗信息」一節對此進行了更 +允許您稍後精確定位觸發問題的確切代碼行。下面的“解碼失敗信息”一節對此進行了更 詳細的解釋。 但請記住:始終記錄遇到的問題,以防難以重現。發送未解碼的報告總比不報告要好。 -檢查「汙染」標誌 +檢查“污染”標誌 ---------------- - *確保您剛剛安裝的內核在運行時不會「汙染」自己。* + *確保您剛剛安裝的內核在運行時不會“污染”自己。* 正如上面已經詳細介紹過的:當發生一些可能會導致一些看起來完全不相關的後續錯 -誤的事情時,內核會設置一個「汙染」標誌。這就是爲什麼你需要檢查你剛剛安裝的內 +誤的事情時,內核會設置一個“污染”標誌。這就是爲什麼你需要檢查你剛剛安裝的內 核是否有設置此標誌。如果有的話,幾乎在任何情況下你都需要在報告問題之前先消 除它。詳細的操作方法請看上面的章節。 @@ -729,43 +724,43 @@ CONFIG_DEBUG_INFO 會需要更多儲存空間來構建內核。但這是值得 可以考慮使用此版本線,放棄報告問題。但是請記住,只要它沒有在 `kernel.org `_ 的穩定版和長期版(以及由這些版本衍生出來的廠商內核) 中得到修復,其他用戶可能仍然會受到它的困擾。如果你喜歡使用其中的一個,或 -者只是想幫助它們的用戶,請前往下面的「報告只發生在較舊內核版本線的問題」一節。 +者只是想幫助它們的用戶,請前往下面的“報告只發生在較舊內核版本線的問題”一節。 優化復現問題的描述 -------------------- - *優化你的筆記:試著找到並寫出最直接的復現問題的方法。確保最終結果包含所 + *優化你的筆記:試着找到並寫出最直接的復現問題的方法。確保最終結果包含所 有重要的細節,同時讓第一次聽說的人容易閱讀和理解。如果您在此過程中學到 了一些東西,請考慮再次搜索關於該問題的現有報告。* 過於複雜的報告會讓別人很難理解。因此請儘量找到一個可以直接描述、易於以書面 形式理解的再現方法。包含所有重要的細節,但同時也要儘量保持簡短。 -在這在前面的步驟中,你很可能已經了解了一些關於你所面臨的問題的點。利用這些 +在這在前面的步驟中,你很可能已經瞭解了一些關於你所面臨的問題的點。利用這些 知識,再次搜索可以轉而加入的現有報告。 解碼失敗信息 ------------- - *如果失敗涉及「panic」、「Oops」、「warning」或「BUG」,請考慮解碼內核日誌以查找 + *如果失敗涉及“panic”、“Oops”、“warning”或“BUG”,請考慮解碼內核日誌以查找 觸發錯誤的代碼行。* -當內核檢測到內部問題時,它會記錄一些有關已執行代碼的信息。這使得在原始碼中精 +當內核檢測到內部問題時,它會記錄一些有關已執行代碼的信息。這使得在源代碼中精 確定位觸發問題的行並顯示如何調用它成爲可能。但只有在配置內核時啓用了 CONFIG_DEBUG_INFO 和 CONFIG_KALLSYMS選項時,這種方法才起效。如果已啓用此選項, -請考慮解碼內核日誌中的信息。這將使我們更容易理解是什麼導致了「panic」、「Oops」、 -「warning」或「BUG」,從而增加了有人提供修復的機率。 +請考慮解碼內核日誌中的信息。這將使我們更容易理解是什麼導致了“panic”、“Oops”、 +“warning”或“BUG”,從而增加了有人提供修復的幾率。 -解碼可以通過Linux原始碼樹中的腳本來完成。如果您運行的內核是之前自己編譯的, +解碼可以通過Linux源代碼樹中的腳本來完成。如果您運行的內核是之前自己編譯的, 這樣這樣調用它:: [user@something ~]$ sudo dmesg | ./linux-5.10.5/scripts/decode_stacktrace.sh ./linux-5.10.5/vmlinux /usr/lib/debug/lib/modules/5.10.10-4.1.x86_64/vmlinux /usr/src/kernels/5.10.10-4.1.x86_64/ 如果您運行的是打包好的普通內核,則可能需要安裝帶有調試符號的相應包。然後按以下 -方式調用腳本(如果發行版未打包,則可能需要從Linux原始碼獲取):: +方式調用腳本(如果發行版未打包,則可能需要從Linux源代碼獲取):: [user@something ~]$ sudo dmesg | ./linux-5.10.5/scripts/decode_stacktrace.sh \ /usr/lib/debug/lib/modules/5.10.10-4.1.x86_64/vmlinux /usr/src/kernels/5.10.10-4.1.x86_64/ @@ -778,10 +773,10 @@ CONFIG_DEBUG_INFO 和 CONFIG_KALLSYMS選項時,這種方法才起效。如果 [ 68.387301] RIP: 0010:test_module_init (/home/username/linux-5.10.5/test-module/test-module.c:16) test_module -在本例中,執行的代碼是從文件「~/linux-5.10.5/test-module/test-module.c」構建的, +在本例中,執行的代碼是從文件“~/linux-5.10.5/test-module/test-module.c”構建的, 錯誤出現在第16行的指令中。 -該腳本也會如此解碼以「Call trace」開頭的部分中提到的地址,該部分顯示出現問題的 +該腳本也會如此解碼以“Call trace”開頭的部分中提到的地址,該部分顯示出現問題的 函數的路徑。此外,腳本還會顯示內核正在執行的代碼部分的彙編輸出。 注意,如果你沒法做到這一點,只需跳過這一步,並在報告中說明原因。如果你幸運的 @@ -790,60 +785,60 @@ CONFIG_DEBUG_INFO 和 CONFIG_KALLSYMS選項時,這種方法才起效。如果 別擔心,如果您碰到的情況需要這樣做,開發人員會告訴您該怎麼做。 -對回歸的特別關照 +對迴歸的特別關照 ----------------- - *如果您的問題是回歸問題,請儘可能縮小引入問題時的範圍。* + *如果您的問題是迴歸問題,請儘可能縮小引入問題時的範圍。* Linux 首席開發者 Linus Torvalds 認爲 Linux 內核永遠不應惡化,這就是爲什麼他 -認爲回歸是不可接受的,並希望看到它們被迅速修復。這就是爲什麼引入了回歸的改 -動導致的問題若無法通過其他方式快速解決,通常會被迅速撤銷。因此,報告回歸有 -點像「王炸」,會迅速得到修復。但要做到這一點,需要知道導致回歸的變化。通常情 +認爲迴歸是不可接受的,並希望看到它們被迅速修復。這就是爲什麼引入了迴歸的改 +動導致的問題若無法通過其他方式快速解決,通常會被迅速撤銷。因此,報告迴歸有 +點像“王炸”,會迅速得到修復。但要做到這一點,需要知道導致迴歸的變化。通常情 況下,要由報告者來追查罪魁禍首,因爲維護者往往沒有時間或手頭設置不便來自行 重現它。 -有一個叫做「二分」的過程可以來尋找變化,這在 -「Documentation/translations/zh_TW/admin-guide/bug-bisect.rst」文檔中進行了詳細 +有一個叫做“二分”的過程可以來尋找變化,這在 +Documentation/translations/zh_CN/admin-guide/bug-bisect.rst 文檔中進行了詳細 的描述,這個過程通常需要你構建十到二十個內核鏡像,每次都嘗試在構建下一個鏡像 -之前重現問題。是的,這需要花費一些時間,但不用擔心,它比大多數人想像的要快得多。 -多虧了「binary search二進位搜索」,這將引導你在原始碼管理系統中找到導致回歸的提交。 +之前重現問題。是的,這需要花費一些時間,但不用擔心,它比大多數人想象的要快得多。 +多虧了“binary search二分搜索”,這將引導你在源代碼管理系統中找到導致迴歸的提交。 一旦你找到它,就在網上搜索其主題、提交ID和縮短的提交ID(提交ID的前12個字符)。 如果有的話,這將引導您找到關於它的現有報告。 需要注意的是,二分法需要一點竅門,不是每個人都懂得訣竅,也需要相當多的努力, 不是每個人都願意投入。儘管如此,還是強烈建議自己進行一次二分。如果你真的 -不能或者不想走這條路,至少要找出是哪個主線內核引入的回歸。比如說從 5.5.15 +不能或者不想走這條路,至少要找出是哪個主線內核引入的迴歸。比如說從 5.5.15 切換到 5.8.4 的時候出現了一些問題,那麼至少可以嘗試一下相近的所有的主線版本 (5.6、5.7 和 5.8)來檢查它是什麼時候出現的。除非你想在一個穩定版或長期支持 -內核中找到一個回歸,否則要避免測試那些編號有三段的版本(5.6.12、5.7.8),因 -爲那會使結果難以解釋,可能會讓你的測試變得無用。一旦你找到了引入回歸的主要 +內核中找到一個迴歸,否則要避免測試那些編號有三段的版本(5.6.12、5.7.8),因 +爲那會使結果難以解釋,可能會讓你的測試變得無用。一旦你找到了引入迴歸的主要 版本,就可以放心地繼續報告了。但請記住:在不知道罪魁禍首的情況下,開發人員 是否能夠提供幫助取決於手頭的問題。有時他們可能會從報告中確認是什麼出現了問 題,並能修復它;有時他們可能無法提供幫助,除非你進行二分。 -當處理回歸問題時,請確保你所面臨的問題真的是由內核引起的,而不是由其他東西 +當處理迴歸問題時,請確保你所面臨的問題真的是由內核引起的,而不是由其他東西 引起的,如上文所述。 -在整個過程中,請記住:只有當舊內核和新內核的配置相似時,問題才算回歸。最好 -的方法是:把配置文件(``.config``)從舊的工作內核直接複製到你嘗試的每個新內 -核版本。之後運行 ``make oldnoconfig`` 來調整它以適應新版本的需要,而不啓用 -任何新的功能,因爲那些功能也可能導致回歸。 +在整個過程中,請記住:只有當舊內核和新內核的配置相似時,問題纔算迴歸。這可以 +通過 ``make olddefconfig`` 來實現,詳細解釋參見 +Documentation/admin-guide/reporting-regressions.rst ;它還提供了大量其他您 +可能希望瞭解的有關回歸的信息。 -撰寫並發送報告 +撰寫併發送報告 --------------- *通過詳細描述問題來開始編寫報告。記得包括以下條目:您爲復現而安裝的最新 內核版本、使用的Linux發行版以及關於如何復現該問題的說明。如果可能,將內 - 核構建配置(.config)和 ``dmesg`` 的輸出放在網上的某個地方,並連結到它。 + 核構建配置(.config)和 ``dmesg`` 的輸出放在網上的某個地方,並鏈接到它。 包含或上傳所有其他可能相關的信息,如Oops的輸出/截圖或來自 ``lspci`` 的輸出。一旦你寫完了這個主要部分,請在上方插入一個正常長度的段落快速概 述問題和影響。再在此之上添加一個簡單描述問題的句子,以得到人們的閱讀。 現在給出一個更短的描述性標題或主題。然後就可以像MAINTAINERS文件告訴你的 - 那樣發送或提交報告了,除非你在處理一個「高優先級問題」:它們需要按照下面 - 「高優先級問題的特殊處理」所述特別關照。* + 那樣發送或提交報告了,除非你在處理一個“高優先級問題”:它們需要按照下面 + “高優先級問題的特殊處理”所述特別關照。* -現在你已經準備好了一切,是時候寫你的報告了。上文前言中連結的三篇文檔對如何 +現在你已經準備好了一切,是時候寫你的報告了。上文前言中鏈接的三篇文檔對如何 寫報告做了部分解釋。這就是爲什麼本文將只提到一些基本的內容以及 Linux 內核特 有的東西。 @@ -855,7 +850,7 @@ Linux 首席開發者 Linus Torvalds 認爲 Linux 內核永遠不應惡化,這 每份報告都應提及的事項 ~~~~~~~~~~~~~~~~~~~~~~~~ -詳細描述你的問題是如何發生在你安裝的新純淨內核上的。試著包含你之前寫的和優 +詳細描述你的問題是如何發生在你安裝的新純淨內核上的。試着包含你之前寫的和優 化過的分步說明,概述你和其他人如何重現這個問題;在極少數無法重現的情況下, 儘量描述你做了什麼來觸發它。 @@ -864,19 +859,19 @@ Linux 首席開發者 Linus Torvalds 認爲 Linux 內核永遠不應惡化,這 * ``cat /proc/version`` 的輸出,其中包含 Linux 內核版本號和構建時的編譯器。 - * 機器正在運行的 Linux 發行版( ``hostnamectl | grep 「Operating System「`` ) + * 機器正在運行的 Linux 發行版( ``hostnamectl | grep “Operating System“`` ) - * CPU 和作業系統的架構( ``uname -mi`` ) + * CPU 和操作系統的架構( ``uname -mi`` ) - * 如果您正在處理回歸,並進行了二分,請提及導致回歸的變更的主題和提交ID。 + * 如果您正在處理迴歸,並進行了二分,請提及導致迴歸的變更的主題和提交ID。 -許多情況下,讓讀你報告的人多了解兩件事也是明智之舉: +許多情況下,讓讀你報告的人多瞭解兩件事也是明智之舉: - * 用於構建 Linux 內核的配置(「.config」文件) + * 用於構建 Linux 內核的配置(“.config”文件) - * 內核的信息,你從 ``dmesg`` 得到的信息寫到一個文件里。確保它以像「Linux + * 內核的信息,你從 ``dmesg`` 得到的信息寫到一個文件裏。確保它以像“Linux version 5.8-1 (foobar@example.com) (gcc (GCC) 10.2.1, GNU ld version - 2.34) #1 SMP Mon Aug 3 14:54:37 UTC 2020」這樣的行開始,如果沒有,那麼第 + 2.34) #1 SMP Mon Aug 3 14:54:37 UTC 2020”這樣的行開始,如果沒有,那麼第 一次啓動階段的重要信息已經被丟棄了。在這種情況下,可以考慮使用 ``journalctl -b 0 -k`` ;或者你也可以重啓,重現這個問題,然後調用 ``dmesg`` 。 @@ -887,39 +882,39 @@ Linux 首席開發者 Linus Torvalds 認爲 Linux 內核永遠不應惡化,這 * 將文件上傳到某個公開的地方(你的網站,公共文件粘貼服務,在 `bugzilla.kernel.org `_ 上創建的工單……), - 並在你的報告中放上連結。理想情況下請使用允許這些文件保存很多年的地方,因 + 並在你的報告中放上鍊接。理想情況下請使用允許這些文件保存很多年的地方,因 爲它們可能在很多年後對別人有用;例如 5 年或 10 年後,一個開發者正在修改 一些代碼,而這些代碼正是爲了修復你的問題。 - * 把文件放在一邊,然後說明你會在他人回復時再單獨發送。只要記得報告發出去後, + * 把文件放在一邊,然後說明你會在他人回覆時再單獨發送。只要記得報告發出去後, 真正做到這一點就可以了。;-) 提供這些東西可能是明智的 ~~~~~~~~~~~~~~~~~~~~~~~~~~ -根據問題的不同,你可能需要提供更多的背景數據。這裡有一些關於提供什麼比較好 +根據問題的不同,你可能需要提供更多的背景數據。這裏有一些關於提供什麼比較好 的建議: - * 如果你處理的是內核的「warning」、「OOPS」或「panic」,請包含它。如果你不能複製 - 粘貼它,試著用netconsole網絡終端遠程跟蹤或者至少拍一張屏幕的照片。 + * 如果你處理的是內核的“warning”、“OOPS”或“panic”,請包含它。如果你不能複製 + 粘貼它,試着用netconsole網絡終端遠程跟蹤或者至少拍一張屏幕的照片。 - * 如果問題可能與你的電腦硬體有關,請說明你使用的是什麼系統。例如,如果你的 - 顯卡有問題,請提及它的製造商,顯卡的型號,以及使用的晶片。如果是筆記本電 - 腦,請提及它的型號名稱,但儘量確保意義明確。例如「戴爾 XPS 13」就不很明確, + * 如果問題可能與你的電腦硬件有關,請說明你使用的是什麼系統。例如,如果你的 + 顯卡有問題,請提及它的製造商,顯卡的型號,以及使用的芯片。如果是筆記本電 + 腦,請提及它的型號名稱,但儘量確保意義明確。例如“戴爾 XPS 13”就不很明確, 因爲它可能是 2012 年的那款,那款除了看起來和現在銷售的沒有什麼不同之外, 兩者沒有任何共同之處。因此,在這種情況下,要加上準確的型號,例如 2019 - 年內推出的 XPS 13 型號爲「9380」或「7390」。像「聯想 Thinkpad T590」這樣的名字 + 年內推出的 XPS 13 型號爲“9380”或“7390”。像“聯想 Thinkpad T590”這樣的名字 也有些含糊不清:這款筆記本有帶獨立顯卡和不帶的子型號,所以要儘量找到準確 的型號名稱或註明主要部件。 - * 說明正在使用的相關軟體。如果你在加載模塊時遇到了問題,你要說明正在使用的 + * 說明正在使用的相關軟件。如果你在加載模塊時遇到了問題,你要說明正在使用的 kmod、systemd 和 udev 的版本。如果其中一個 DRM 驅動出現問題,你要說明 libdrm 和 Mesa 的版本;還要說明你的 Wayland 合成器或 X-Server 及其驅動。 如果你有文件系統問題,請註明相應的文件系統實用程序的版本(e2fsprogs, btrfs-progs, xfsprogs……)。 * 從內核中收集可能有用的額外信息。例如, ``lspci -nn`` 的輸出可以幫助別人 - 識別你使用的硬體。如果你的硬體有問題,你甚至可以給出 ``sudo lspci -vvv`` + 識別你使用的硬件。如果你的硬件有問題,你甚至可以給出 ``sudo lspci -vvv`` 的結果,因爲它提供了組件是如何配置的信息。對於一些問題,可能最好包含 ``/proc/cpuinfo`` , ``/proc/ioports`` , ``/proc/iomem`` , ``/proc/modules`` 或 ``/proc/scsi/scsi`` 等文件的內容。一些子系統還提 @@ -936,7 +931,7 @@ Linux 首席開發者 Linus Torvalds 認爲 Linux 內核永遠不應惡化,這 ~~~~~~~~~~~~~~~~~~~~~~ 現在你已經準備好了報告的詳細部分,讓我們進入最重要的部分:開頭幾句。現在到 -報告的最前面,在你剛才寫的部分之前加上類似「The detailed description:」(詳細 +報告的最前面,在你剛纔寫的部分之前加上類似“The detailed description:”(詳細 描述)這樣的內容,並在最前面插入兩個新行。現在寫一個正常長度的段落,大致概 述這個問題。去掉所有枯燥的細節,把重點放在讀者需要知道的關鍵部分,以讓人了 解這是怎麼回事;如果你認爲這個缺陷影響了很多用戶,就提一下這點來吸引大家關 @@ -946,10 +941,10 @@ Linux 首席開發者 Linus Torvalds 認爲 Linux 內核永遠不應惡化,這 要更加抽象,爲報告寫一個更短的主題/標題。 現在你已經寫好了這部分,請花點時間來優化它,因爲它是你的報告中最重要的部分: -很多人會先讀這部分,然後才會決定是否值得花時間閱讀其他部分。 +很多人會先讀這部分,然後纔會決定是否值得花時間閱讀其他部分。 現在就像 :ref:`MAINTAINERS ` 維護者文件告訴你的那樣發送或提交 -報告,除非它是前面概述的那些「高優先級問題」之一:在這種情況下,請先閱讀下一 +報告,除非它是前面概述的那些“高優先級問題”之一:在這種情況下,請先閱讀下一 小節,然後再發送報告。 高優先級問題的特殊處理 @@ -960,11 +955,19 @@ Linux 首席開發者 Linus Torvalds 認爲 Linux 內核永遠不應惡化,這 **非常嚴重的缺陷** :確保在主題或工單標題以及第一段中明顯標出 severeness (非常嚴重的)。 -**回歸** :如果問題是一個回歸,請在郵件的主題或缺陷跟蹤器的標題中添加 -[REGRESSION]。如果您沒有進行二分,請至少註明您測試的最新主線版本(比如 5.7) -和出現問題的最新版本(比如 5.8)。如果您成功地進行了二分,請註明導致回歸 -的提交ID和主題。也請添加該變更的作者到你的報告中;如果您需要將您的缺陷提交 -到缺陷跟蹤器中,請將報告以私人郵件的形式轉發給他,並註明報告提交地點。 +**迴歸** :報告的主題應以“[REGRESSION]”開頭。 + +如果您成功用二分法定位了問題,請使用引入迴歸之更改的標題作爲主題的第二部分。 +請在報告中寫明“罪魁禍首”的提交ID。如果未能成功二分,請在報告中講明最後一個 +正常工作的版本(例如5.7)和最先發生問題的版本(例如5.8-rc1)。 + +通過郵件發送報告時,請抄送Linux迴歸郵件列表(regressions@lists.linux.dev)。 +如果報告需要提交到某個web追蹤器,請繼續提交;並在提交後,通過郵件將報告轉發 +至迴歸列表;抄送相關子系統的維護人員和郵件列表。請確保報告是內聯轉發的,不要 +把它作爲附件。另外請在頂部添加一個簡短的說明,在那裏寫上工單的網址。 + +在郵寄或轉發報告時,如果成功二分,需要將“罪魁禍首”的作者添加到收件人中;同時 +抄送signed-off-by鏈中的每個人,您可以在提交消息的末尾找到。 **安全問題** :對於這種問題,你將必須評估:如果細節被公開披露,是否會對其他 用戶產生短期風險。如果不會,只需按照所述繼續報告問題。如果有此風險,你需要 @@ -972,47 +975,47 @@ Linux 首席開發者 Linus Torvalds 認爲 Linux 內核永遠不應惡化,這 * 如果 MAINTAINERS 文件指示您通過郵件報告問題,請不要抄送任何公共郵件列表。 - * 如果你應該在缺陷跟蹤器中提交問題,請確保將工單標記爲「私有」或「安全問題」。 + * 如果你應該在缺陷跟蹤器中提交問題,請確保將工單標記爲“私有”或“安全問題”。 如果缺陷跟蹤器沒有提供保持報告私密性的方法,那就別想了,把你的報告以私人 郵件的形式發送給維護者吧。 -在這兩種情況下,都一定要將報告發到 MAINTAINERS 文件中「安全聯絡」部分列出的 +在這兩種情況下,都一定要將報告發到 MAINTAINERS 文件中“安全聯絡”部分列出的 地址。理想的情況是在發送報告的時候直接抄送他們。如果您在缺陷跟蹤器中提交了 -報告,請將報告的文本轉發到這些地址;但請在報告的頂部加上注釋,表明您提交了 -報告,並附上工單連結。 +報告,請將報告的文本轉發到這些地址;但請在報告的頂部加上註釋,表明您提交了 +報告,並附上工單鏈接。 -更多信息請參見「Documentation/translations/zh_TW/admin-guide/security-bugs.rst」。 +更多信息請參見 Documentation/translations/zh_CN/admin-guide/security-bugs.rst 。 -發布報告後的責任 +發佈報告後的責任 ------------------ *等待別人的反應,繼續推進事情,直到你能夠接受這樣或那樣的結果。因此,請 公開和及時地回應任何詢問。測試提出的修復。積極地測試:至少重新測試每個 新主線版本的首個候選版本(RC),並報告你的結果。如果出現拖延,就友好地 - 提醒一下。如果你沒有得到任何幫助或者未能滿意,請試著自己幫助自己。* + 提醒一下。如果你沒有得到任何幫助或者未能滿意,請試着自己幫助自己。* 如果你的報告非常優秀,而且你真的很幸運,那麼某個開發者可能會立即發現導致問 題的原因;然後他們可能會寫一個補丁來修復、測試它,並直接發送給主線集成,同 -時標記它以便以後回溯到需要它的穩定版和長期支持內核。那麼你需要做的就是回復 -一句「Thank you very much」(非常感謝),然後在發布後換上修復好的版本。 +時標記它以便以後回溯到需要它的穩定版和長期支持內核。那麼你需要做的就是回覆 +一句“Thank you very much”(非常感謝),然後在發佈後換上修復好的版本。 -但這種理想狀況很少發生。這就是爲什麼你把報告拿出來之後工作才開始。你要做的 -事情要視情況而定,但通常會是下面列出的事情。但在深入研究細節之前,這裡有幾 +但這種理想狀況很少發生。這就是爲什麼你把報告拿出來之後工作纔開始。你要做的 +事情要視情況而定,但通常會是下面列出的事情。但在深入研究細節之前,這裏有幾 件重要的事情,你需要記住這部分的過程。 關於進一步互動的一般建議 ~~~~~~~~~~~~~~~~~~~~~~~~~~ -**總是公開回復** :當你在缺陷跟蹤器中提交問題時,一定要在那裡回復,不要私下 -聯繫任何開發者。對於郵件報告,在回復您收到的任何郵件時,總是使用「全部回復」 +**總是公開回復** :當你在缺陷跟蹤器中提交問題時,一定要在那裏回覆,不要私下 +聯繫任何開發者。對於郵件報告,在回覆您收到的任何郵件時,總是使用“全部回覆” 功能。這包括帶有任何你可能想要添加到你的報告中的額外數據的郵件:進入郵件應 -用程序「已發送」文件夾,並在郵件上使用「全部回復」來回復報告。這種方法可以確保 -公共郵件列表和其他所有參與者都能及時了解情況;它還能保持郵件線程的完整性, +用程序“已發送”文件夾,並在郵件上使用“全部回覆”來回復報告。這種方法可以確保 +公共郵件列表和其他所有參與者都能及時瞭解情況;它還能保持郵件線程的完整性, 這對於郵件列表將所有相關郵件歸爲一類是非常重要的。 -只有兩種情況不適合在缺陷跟蹤器或「全部回復」中發表評論: +只有兩種情況不適合在缺陷跟蹤器或“全部回覆”中發表評論: * 有人讓你私下發東西。 @@ -1022,32 +1025,32 @@ Linux 首席開發者 Linus Torvalds 認爲 Linux 內核永遠不應惡化,這 **在請求解釋或幫助之前先研究一下** :在這部分過程中,有人可能會告訴你用尚未 掌握的技能做一些事情。例如你可能會被要求使用一些你從未聽說過的測試工具;或 -者你可能會被要求在 Linux 內核原始碼上應用一個補丁來測試它是否有幫助。在某些 -情況下,發個回復詢問如何做就可以了。但在走這條路之前,儘量通過在網際網路上搜 +者你可能會被要求在 Linux 內核源代碼上應用一個補丁來測試它是否有幫助。在某些 +情況下,發個回覆詢問如何做就可以了。但在走這條路之前,儘量通過在互聯網上搜 索自行找到答案;或者考慮在其他地方詢問建議。比如詢問朋友,或者到你平時常去 的聊天室或論壇發帖諮詢。 **要有耐心** :如果你真的很幸運,你可能會在幾個小時內收到對你的報告的答覆。 但大多數情況下會花費更多的時間,因爲維護者分散在全球各地,因此可能在不同的 -時區——在那裡他們已經享受著遠離鍵盤的夜晚。 +時區——在那裏他們已經享受着遠離鍵盤的夜晚。 一般來說,內核開發者需要一到五個工作日來回復報告。有時會花費更長的時間,因 爲他們可能正忙於合併窗口、其他工作、參加開發者會議,或者只是在享受一個漫長 的暑假。 -「高優先級的問題」(見上面的解釋)例外:維護者應該儘快解決這些問題;這就是爲 +“高優先級的問題”(見上面的解釋)例外:維護者應該儘快解決這些問題;這就是爲 什麼你應該最多等待一個星期(如果是緊急的事情,則只需兩天),然後再發送友好 的提醒。 -有時維護者可能沒有及時回復;有時候可能會出現分歧,例如一個問題是否符合回歸 +有時維護者可能沒有及時回覆;有時候可能會出現分歧,例如一個問題是否符合迴歸 的條件。在這種情況下,在郵件列表上提出你的顧慮,並請求其他人公開或私下回復 如何繼續推進。如果失敗了,可能應該讓更高級別的維護者介入。如果是 WiFi 驅動, 那就是無線維護者;如果沒有更高級別的維護者,或者其他一切努力都失敗了,那 這可能是一種罕見的、可以讓 Linus Torvalds 參與進來的情況。 -**主動測試** :每當一個新的主線內核版本的第一個預發布版本(rc1)發布的時候, +**主動測試** :每當一個新的主線內核版本的第一個預發佈版本(rc1)發佈的時候, 去檢查一下這個問題是否得到了解決,或者是否有什麼重要的變化。在工單中或在 -回復報告的郵件中提及結果(確保所有參與討論的人都被抄送)。這將表明你的承諾 +回覆報告的郵件中提及結果(確保所有參與討論的人都被抄送)。這將表明你的承諾 和你願意幫忙。如果問題持續存在,它也會提醒開發者確保他們不會忘記它。其他一 些不定期的重新測試(例如用rc3、rc5 和最終版本)也是一個好主意,但只有在相關 的東西發生變化或者你正在寫什麼東西的時候才報告你的結果。 @@ -1057,10 +1060,10 @@ Linux 首席開發者 Linus Torvalds 認爲 Linux 內核永遠不應惡化,這 查詢和測試請求 ~~~~~~~~~~~~~~~ -如果你的報告得到了回復則需履行以下責任: +如果你的報告得到了回覆則需履行以下責任: **檢查與你打交道的人** :大多數情況下,會是維護者或特定代碼區域的開發人員對 -你的報告做出回應。但由於問題通常是公開報告的,所以回復的可能是任何人——包括 +你的報告做出回應。但由於問題通常是公開報告的,所以回覆的可能是任何人——包括 那些想要幫忙的人,但最後可能會用他們的問題或請求引導你完全偏離軌道。這很少 發生,但這是快速上網搜搜看你正在與誰互動是明智之舉的許多原因之一。通過這樣 做,你也可以知道你的報告是否被正確的人聽到,因爲如果討論沒有導致滿意的問題 @@ -1086,63 +1089,63 @@ Linux 首席開發者 Linus Torvalds 認爲 Linux 內核永遠不應惡化,這 報告到達時,維護者剛剛離開鍵盤一段時間,或者有更重要的事情要處理。在寫提醒 信的時候,要善意地問一下,是否還需要你這邊提供什麼來讓事情推進下去。如果報 告是通過郵件發出來的,那就在郵件的第一行回覆你的初始郵件(見上文),其中包 -括下方的原始報告的完整引用:這是少數幾種情況下,這樣的「TOFU」(Text Over, +括下方的原始報告的完整引用:這是少數幾種情況下,這樣的“TOFU”(Text Over, Fullquote Under文字在上,完整引用在下)是正確的做法,因爲這樣所有的收件人都 會以適當的順序立即讓細節到手頭上來。 -在提醒之後,再等三周的回覆。如果你仍然沒有得到適當的反饋,你首先應該重新考 +在提醒之後,再等三週的回覆。如果你仍然沒有得到適當的反饋,你首先應該重新考 慮你的方法。你是否可能嘗試接觸了錯誤的人?是不是報告也許令人反感或者太混亂, 以至於人們決定完全遠離它?排除這些因素的最好方法是:把報告給一兩個熟悉 FLOSS 問題報告的人看,詢問他們的意見。同時徵求他們關於如何繼續推進的建議。 -這可能意味著:準備一份更好的報告,讓這些人在你發出去之前對它進行審查。這樣 +這可能意味着:準備一份更好的報告,讓這些人在你發出去之前對它進行審查。這樣 的方法完全可以;只需說明這是關於這個問題的第二份改進的報告,並附上第一份報 -告的連結。 +告的鏈接。 如果報告是恰當的,你可以發送第二封提醒信;在其中詢問爲什麼報告沒有得到任何 -回復。第二封提醒郵件的好時機是在新 Linux 內核版本的首個預發布版本('rc1') -發布後不久,因爲無論如何你都應該在那個時候重新測試並提供狀態更新(見上文)。 +回覆。第二封提醒郵件的好時機是在新 Linux 內核版本的首個預發佈版本('rc1') +發佈後不久,因爲無論如何你都應該在那個時候重新測試並提供狀態更新(見上文)。 -如果第二次提醒的結果又在一周內沒有任何反應,可以嘗試聯繫上級維護者詢問意見: +如果第二次提醒的結果又在一週內沒有任何反應,可以嘗試聯繫上級維護者詢問意見: 即使再忙的維護者在這時候也至少應該發過某種確認。 記住要做好失望的準備:理想狀況下維護者最好對每一個問題報告做出回應,但他們 -只有義務解決之前列出的「高優先級問題」。所以,如果你得到的回覆是「謝謝你的報告, -我目前有更重要的問題要處理,在可預見的未來沒有時間去研究這個問題」,那請不 +只有義務解決之前列出的“高優先級問題”。所以,如果你得到的回覆是“謝謝你的報告, +我目前有更重要的問題要處理,在可預見的未來沒有時間去研究這個問題”,那請不 要太沮喪。 也有可能在缺陷跟蹤器或列表中進行了一些討論之後,什麼都沒有發生,提醒也無助 於激勵大家進行修復。這種情況可能是毀滅性的,但在 Linux 內核開發中確實會發生。 -這些和其他得不到幫助的原因在本文結尾處的「爲什麼有些問題在被報告後沒有得到 -任何回應或者仍然沒有修復」中進行了解釋。 +這些和其他得不到幫助的原因在本文結尾處的“爲什麼有些問題在被報告後沒有得到 +任何回應或者仍然沒有修復”中進行了解釋。 如果你沒有得到任何幫助或問題最終沒有得到解決,不要沮喪:Linux 內核是 FLOSS, -因此你仍然可以自己幫助自己。例如,你可以試著找到其他受影響的人,和他們一 +因此你仍然可以自己幫助自己。例如,你可以試着找到其他受影響的人,和他們一 起合作來解決這個問題。這樣的團隊可以一起準備一份新的報告,提到團隊有多少人, 爲什麼你們認爲這是應該得到解決的事情。也許你們還可以一起縮小確切原因或引 -入回歸的變化,這往往會使修復更容易。而且如果運氣好的話,團隊中可能會有懂點 -編程的人,也許能寫出一個修複方案。 +入迴歸的變化,這往往會使修復更容易。而且如果運氣好的話,團隊中可能會有懂點 +編程的人,也許能寫出一個修復方案。 -「報告穩定版和長期支持內核線的回歸」的參考 +“報告穩定版和長期支持內核線的迴歸”的參考 ------------------------------------------ -本小節提供了在穩定版和長期支持內核線中面對回歸時需要執行的步驟的詳細信息。 +本小節提供了在穩定版和長期支持內核線中面對迴歸時需要執行的步驟的詳細信息。 確保特定版本線仍然受支持 ~~~~~~~~~~~~~~~~~~~~~~~~~ *檢查內核開發人員是否仍然維護你關心的Linux內核版本線:去 kernel.org 的 - 首頁,確保此特定版本線的最新版沒有「[EOL]」標記。* + 首頁,確保此特定版本線的最新版沒有“[EOL]”標記。* 大多數內核版本線只支持三個月左右,因爲延長維護時間會帶來相當多的工作。因此, 每年只會選擇一個版本來支持至少兩年(通常是六年)。這就是爲什麼你需要檢查 內核開發者是否還支持你關心的版本線。 -注意,如果 `kernel.org `_ 在首頁上列出了兩個「穩定」版本, +注意,如果 `kernel.org `_ 在首頁上列出了兩個“穩定”版本, 你應該考慮切換到較新的版本,而忘掉較舊的版本:對它的支持可能很快就會結束。 -然後,它將被標記爲「生命周期結束」(EOL)。達到這個程度的版本線仍然會在 -`kernel.org `_ 首頁上被顯示一兩周,但不適合用於測試和 +然後,它將被標記爲“生命週期結束”(EOL)。達到這個程度的版本線仍然會在 +`kernel.org `_ 首頁上被顯示一兩週,但不適合用於測試和 報告。 搜索穩定版郵件列表 @@ -1158,57 +1161,63 @@ FLOSS 問題報告的人看,詢問他們的意見。同時徵求他們關於 用最新版本復現問題 ~~~~~~~~~~~~~~~~~~~ - *從特定的版本線安裝最新版本作爲純淨內核。確保這個內核沒有被汙染,並且仍 - 然存在問題,因爲問題可能已經在那裡被修復了。* + *從特定的版本線安裝最新版本作爲純淨內核。確保這個內核沒有被污染,並且仍 + 然存在問題,因爲問題可能已經在那裏被修復了。* 在投入更多時間到這個過程中之前,你要檢查這個問題是否在你關注的版本線的最新 -版本中已經得到了修復。這個內核需要是純淨的,在問題發生之前不應該被汙染,正 +版本中已經得到了修復。這個內核需要是純淨的,在問題發生之前不應該被污染,正 如上面已經在測試主線的過程中詳細介紹過的一樣。 -您是否是第一次注意到供應商內核的回歸?供應商的更改可能會發生變化。你需要重新 +您是否是第一次注意到供應商內核的迴歸?供應商的更改可能會發生變化。你需要重新 檢查排除來這個問題。當您從5.10.4-vendor.42更新到5.10.5-vendor.43時,記錄損壞 的信息。然後在測試了前一段中所述的最新5.10版本之後,檢查Linux 5.10.4的普通版本 -是否也可以正常工作。如果問題在那裡出現,那就不符合上游回歸的條件,您需要切換 +是否也可以正常工作。如果問題在那裏出現,那就不符合上游迴歸的條件,您需要切換 回主逐步指南來報告問題。 -報告回歸 +報告迴歸 ~~~~~~~~~~ - *向Linux穩定版郵件列表發送一個簡短的問題報告(stable@vger.kernel.org)。 - 大致描述問題,並解釋如何復現。講清楚首個出現問題的版本和最後一個工作正常 - 的版本。然後等待進一步的指示。* + *向Linux穩定版郵件列表發送一個簡短的問題報告(stable@vger.kernel.org)並 + 抄送Linux迴歸郵件列表(regressions@lists.linux.dev);如果你懷疑是由某 + 子系統引起的,請抄送其維護人員和子系統郵件列表。大致描述問題,並解釋如 + 何復現。講清楚首個出現問題的版本和最後一個工作正常的版本。然後等待進一 + 步的指示。* -當報告在穩定版或長期支持內核線內發生的回歸(例如在從5.10.4更新到5.10.5時), -一份簡短的報告足以快速報告問題。因此只需要粗略的描述。 +當報告在穩定版或長期支持內核線內發生的迴歸(例如在從5.10.4更新到5.10.5時), +一份簡短的報告足以快速報告問題。因此只需向穩定版和迴歸郵件列表發送粗略的描述; +不過如果你懷疑某子系統導致此問題的話,請一併抄送其維護人員和子系統郵件列表, +這會加快進程。 -但是請注意,如果您能夠指明引入問題的確切版本,這將對開發人員有很大幫助。因此 -如果有時間的話,請嘗試使用普通內核找到該版本。讓我們假設發行版發布Linux內核 +請注意,如果您能夠指明引入問題的確切版本,這將對開發人員有很大幫助。因此 +如果有時間的話,請嘗試使用普通內核找到該版本。讓我們假設發行版發佈Linux內核 5.10.5到5.10.8的更新時發生了故障。那麼按照上面的指示,去檢查該版本線中的最新 內核,比如5.10.9。如果問題出現,請嘗試普通5.10.5,以確保供應商應用的補丁不會 干擾。如果問題沒有出現,那麼嘗試5.10.7,然後直到5.10.8或5.10.6(取決於結果) 找到第一個引入問題的版本。在報告中寫明這一點,並指出5.10.9仍然存在故障。 -前一段基本粗略地概述了「二分」方法。一旦報告出來,您可能會被要求做一個正確的 +前一段基本粗略地概述了“二分”方法。一旦報告出來,您可能會被要求做一個正確的 報告,因爲它允許精確地定位導致問題的確切更改(然後很容易被恢復以快速修復問題)。 -因此如果時間允許,考慮立即進行適當的二分。有關如何詳細信息,請參閱「對回歸的 -特別關照」部分和文檔「Documentation/translations/zh_TW/admin-guide/bug-bisect.rst」。 +因此如果時間允許,考慮立即進行適當的二分。有關如何詳細信息,請參閱“對迴歸的 +特別關照”部分和文檔 Documentation/translations/zh_CN/admin-guide/bug-bisect.rst 。 +如果成功二分的話,請將“罪魁禍首”的作者添加到收件人中;同時抄送所有在 +signed-off-by鏈中的人,您可以在提交消息的末尾找到。 -「報告僅在舊內核版本線中發生的問題」的參考 ------------------------------------------- +“報告僅在舊內核版本線中發生的問題”的參考 +---------------------------------------- -本節詳細介紹了如果無法用主線內核重現問題,但希望在舊版本線(又稱穩定版內核和 +本節詳細介紹瞭如果無法用主線內核重現問題,但希望在舊版本線(又稱穩定版內核和 長期支持內核)中修復問題時需要採取的步驟。 有些修復太複雜 ~~~~~~~~~~~~~~~ *請做好準備,接下來的幾個步驟可能無法在舊版本中解決問題:修復可能太大或 - 太冒險,無法移植到那裡。* + 太冒險,無法移植到那裏。* 即使是微小的、看似明顯的代碼變化,有時也會帶來新的、完全意想不到的問題。穩 定版和長期支持內核的維護者非常清楚這一點,因此他們只對這些內核進行符合 -「Documentation/translations/zh_TW/process/stable-kernel-rules.rst」中所列出的 +Documentation/translations/zh_CN/process/stable-kernel-rules.rst 中所列出的 規則的修改。 複雜或有風險的修改不符合條件,因此只能應用於主線。其他的修復很容易被回溯到 @@ -1220,7 +1229,7 @@ FLOSS 問題報告的人看,詢問他們的意見。同時徵求他們關於 通用準備 ~~~~~~~~~~ - *執行上面「報告僅在舊內核版本線中發生的問題」一節中的前三個步驟。* + *執行上面“報告僅在舊內核版本線中發生的問題”一節中的前三個步驟。* 您需要執行本指南另一節中已經描述的幾個步驟。這些步驟將讓您: @@ -1242,21 +1251,21 @@ FLOSS 問題報告的人看,詢問他們的意見。同時徵求他們關於 在許多情況下,你所處理的問題會發生在主線上,但已在主線上得到了解決。修正它 的提交也需要被回溯才能解決這個問題。這就是爲什麼你要搜索它或任何相關討論。 - * 首先嘗試在存放 Linux 內核原始碼的 Git 倉庫中找到修復。你可以通過 + * 首先嚐試在存放 Linux 內核源代碼的 Git 倉庫中找到修復。你可以通過 `kernel.org 上的網頁 `_ 或 `GitHub 上的鏡像 `_ 來實現;如果你 有一個本地克隆,你也可以在命令行用 ``git log --grep=`` 來搜索。 - 如果你找到了修復,請查看提交消息的尾部是否包含了類似這樣的「穩定版標籤」: + 如果你找到了修復,請查看提交消息的尾部是否包含了類似這樣的“穩定版標籤”: Cc: # 5.4+ 像上面這行,開發者標記了安全修復可以回傳到 5.4 及以後的版本。大多數情況 - 下,它會在兩周內被應用到那裡,但有時需要更長的時間。 + 下,它會在兩週內被應用到那裏,但有時需要更長的時間。 * 如果提交沒有告訴你任何東西,或者你找不到修復,請再找找關於這個問題的討論。 - 用你最喜歡的搜尋引擎搜索網絡,以及 `Linux kernel developers mailing + 用你最喜歡的搜索引擎搜索網絡,以及 `Linux kernel developers mailing list 內核開發者郵件列表 `_ 的檔案。也可以 閱讀上面的 `定位導致問題的內核區域` 一節,然後按照說明找到導致問題的子系 統:它的缺陷跟蹤器或郵件列表存檔中可能有你要找的答案。 @@ -1286,41 +1295,41 @@ FLOSS 問題報告的人看,詢問他們的意見。同時徵求他們關於 爲什麼有些問題在報告後沒有任何回應或仍未解決? =============================================== -當向 Linux 開發者報告問題時,要注意只有「高優先級的問題」(回歸、安全問題、嚴 +當向 Linux 開發者報告問題時,要注意只有“高優先級的問題”(迴歸、安全問題、嚴 重問題)才一定會得到解決。如果維護者或其他人都失敗了,Linus Torvalds 他自己 會確保這一點。他們和其他內核開發者也會解決很多其他問題。但是要知道,有時他 們也會不能或不願幫忙;有時甚至沒有人發報告給他們。 最好的解釋就是那些內核開發者常常是在業餘時間爲 Linux 內核做出貢獻。內核中的 -不少驅動程序都是由這樣的程式設計師編寫的,往往只是因爲他們想讓自己的硬體可以在 -自己喜歡的作業系統上使用。 +不少驅動程序都是由這樣的程序員編寫的,往往只是因爲他們想讓自己的硬件可以在 +自己喜歡的操作系統上使用。 -這些程式設計師大多數時候會很樂意修復別人報告的問題。但是沒有人可以強迫他們這樣 +這些程序員大多數時候會很樂意修復別人報告的問題。但是沒有人可以強迫他們這樣 做,因爲他們是自願貢獻的。 還有一些情況下,這些開發者真的很想解決一個問題,但卻不能解決:有時他們缺乏 -硬體編程文檔來解決問題。這種情況往往由於公開的文檔太簡陋,或者驅動程序是通 +硬件編程文檔來解決問題。這種情況往往由於公開的文檔太簡陋,或者驅動程序是通 過逆向工程編寫的。 -業餘開發者遲早也會不再關心某驅動。也許他們的測試硬體壞了,被更高級的玩意取 -代了,或者是太老了以至於只能在計算機博物館裡找到。有時開發者根本就不關心他 +業餘開發者遲早也會不再關心某驅動。也許他們的測試硬件壞了,被更高級的玩意取 +代了,或者是太老了以至於只能在計算機博物館裏找到。有時開發者根本就不關心他 們的代碼和 Linux 了,因爲在他們的生活中一些不同的東西變得更重要了。在某些情 況下,沒有人願意接手維護者的工作——也沒有人可以被強迫,因爲對 Linux 內核的貢 獻是自願的。然而被遺棄的驅動程序仍然存在於內核中:它們對人們仍然有用,刪除 -它們可能導致回歸。 +它們可能導致迴歸。 對於那些爲 Linux 內核工作而獲得報酬的開發者來說,情況並沒有什麼不同。這些人 現在貢獻了大部分的變更。但是他們的僱主遲早也會停止關注他們的代碼或者讓程序 -員專注於其他事情。例如,硬體廠商主要通過銷售新硬體來賺錢;因此,他們中的不 +員專注於其他事情。例如,硬件廠商主要通過銷售新硬件來賺錢;因此,他們中的不 少人並沒有投入太多時間和精力來維護他們多年前就停止銷售的東西的 Linux 內核驅 動。企業級 Linux 發行商往往持續維護的時間比較長,但在新版本中往往會把對老舊 -和稀有硬體的支持放在一邊,以限制範圍。一旦公司拋棄了一些代碼,往往由業餘貢 +和稀有硬件的支持放在一邊,以限制範圍。一旦公司拋棄了一些代碼,往往由業餘貢 獻者接手,但正如上面提到的:他們遲早也會放下代碼。 優先級是一些問題沒有被修復的另一個原因,因爲維護者相當多的時候是被迫設置這 些優先級的,因爲在 Linux 上工作的時間是有限的。對於業餘時間或者僱主給予他們 的開發人員用於上游內核維護工作的時間也是如此。有時維護人員也會被報告淹沒, -即使一個驅動程序幾乎完美地工作。爲了不被完全纏住,程式設計師可能別無選擇,只能 +即使一個驅動程序幾乎完美地工作。爲了不被完全纏住,程序員可能別無選擇,只能 對問題報告進行優先級排序而拒絕其中的一些報告。 不過這些都不用太過擔心,很多驅動都有積極的維護者,他們對儘可能多的解決問題 @@ -1330,8 +1339,32 @@ FLOSS 問題報告的人看,詢問他們的意見。同時徵求他們關於 結束語 ======= -與其他免費/自由&開源軟體(Free/Libre & Open Source Software,FLOSS)相比, -向 Linux 內核開發者報告問題是很難的:這個文檔的長度和複雜性以及字裡行間的內 +與其他免費/自由&開源軟件(Free/Libre & Open Source Software,FLOSS)相比, +向 Linux 內核開發者報告問題是很難的:這個文檔的長度和複雜性以及字裏行間的內 涵都說明了這一點。但目前就是這樣了。這篇文字的主要作者希望通過記錄現狀來爲 以後改善這種狀況打下一些基礎。 + +.. + end-of-content +.. + This English version of this document is maintained by Thorsten Leemhuis + . If you spot a typo or small mistake, feel free to + let him know directly and he'll fix it. For translation problems, please + contact with translators. You are free to do the same in a mostly informal + way if you want to contribute changes to the text, but for copyright + reasons please CC linux-doc@vger.kernel.org and "sign-off" your + contribution as Documentation/process/submitting-patches.rst outlines in + the section "Sign your work - the Developer's Certificate of Origin". +.. + This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top + of the file. If you want to distribute this text under CC-BY-4.0 only, + please use "The Linux kernel developers" for author attribution and link + this as source: + https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/reporting-issues.rst +.. + Note: Only the content of this RST file as found in the Linux kernel sources + is available under CC-BY-4.0, as versions of this text that were processed + (for example by the kernel's build system) might contain content taken from + files which use a more restrictive license. + diff --git a/Documentation/translations/zh_TW/admin-guide/reporting-regressions.rst b/Documentation/translations/zh_TW/admin-guide/reporting-regressions.rst new file mode 100644 index 0000000000..d7dcb2a265 --- /dev/null +++ b/Documentation/translations/zh_TW/admin-guide/reporting-regressions.rst @@ -0,0 +1,371 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0) +.. 【重分發信息參見本文件結尾】 + +.. include:: ../disclaimer-zh_TW.rst + +:Original: Documentation/admin-guide/reporting-regressions.rst + +:譯者: + + 吳想成 Wu XiangCheng + + +============ +報告迴歸問題 +============ + +“*我們拒絕出現迴歸*”是Linux內核開發的首要規則;Linux的發起者和領軍開發者Linus +Torvalds立下了此規則並確保它被落實。 + +本文檔描述了這條規則對用戶的意義,以及Linux內核開發模型如何確保解決所有被報告 +的迴歸;關於內核開發者如何處理的方面參見 Documentation/process/handling-regressions.rst 。 + + +本文重點(亦即“太長不看”) +========================== + +#. 如果某程序在原先的Linux內核上運行良好,但在較新版本上效果更差、或者根本不 + 能用,那麼你就碰見迴歸問題了。注意,新內核需要使用類似配置編譯;更多相關細 + 節參見下方。 + +#. 按照 Documentation/translations/zh_CN/admin-guide/reporting-issues.rst 中 + 所說的報告你的問題,該文檔已經包含了所有關於迴歸的重要方面,爲了方便起見也 + 複製到了下面。兩個重點:在報告主題中使用“[REGRESSION]”開頭並抄送或轉發到 + `迴歸郵件列表 `_ + (regressions@lists.linux.dev)。 + +#. 可選但是建議:在發送或轉發報告時,指明該回歸發生的起點,以便Linux內核迴歸 + 追蹤機器人“regzbot”可以追蹤此問題:: + + #regzbot introduced v5.13..v5.14-rc1 + + +與用戶相關的所有Linux內核迴歸細節 +================================= + + +基本重點 +-------- + + +什麼是“迴歸”以及什麼是“無迴歸規則”? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +如果某程序/實例在原先的Linux內核上運行良好,但在較新版本上效果更差、或者根本 +不能用,那麼你就碰見迴歸問題了。“無迴歸規則”不允許出現這種情況。如果偶然發 +生了,導致問題的開發者應當迅速修復問題。 + +也就是說,若Linux 5.13中的WiFi驅動程序運行良好,但是在5.14版本上卻不能用、速 +度明顯變慢或出現錯誤,那就出現了迴歸。如果某正常工作的應用程序突然在新內核上 +出現不穩定,這也是迴歸;這些問題可能是由於procfs、sysfs或Linux提供給用戶空間 +軟件的許多其他接口之一的變化。但請記住,前述例子中的5.14需要使用類似於5.13的 +配置構建。這可以用 ``make olddefconfig`` 實現,詳細解釋見下。 + +注意本節第一句話中的“實例”:即使開發者需要遵循“無迴歸”規則,但仍可自由地改 +變內核的任何方面,甚至是導出到用戶空間的API或ABI,只要別破壞現有的應用程序或 +用例。 + +還需注意,“無迴歸”規則只限制內核提供給用戶空間的接口。它不適用於內核內部接 +口,比如一些外部開發的驅動程序用來插入鉤子到內核的模塊API。 + +如何報告迴歸? +~~~~~~~~~~~~~~ + +只需按照 Documentation/translations/zh_CN/admin-guide/reporting-issues.rst 中 +所說的報告你的問題,該文檔已經包含了要點。下面幾點概述了一下只在迴歸中重要的 +方面: + + * 在檢查可加入討論的現有報告時,別忘了搜索 `Linux迴歸郵件列表 + `_ 和 `regzbot網頁界面 + `_ 。 + + * 在報告主題的開頭加上“[REGRESSION]”。 + + * 在你的報告中明確最後一個正常工作的內核版本和首個出問題的版本。如若可能, + 用二分法嘗試找出導致迴歸的變更,更多細節見下。 + + * 記得把報告發到Linux迴歸郵件列表(regressions@lists.linux.dev)。 + + * 如果通過郵件報告迴歸,請抄送回歸列表。 + + * 如果你使用某些缺陷追蹤器報告迴歸,請通過郵件轉發已提交的報告到迴歸列表, + 並抄送維護者以及出問題的相關子系統的郵件列表。 + + 如果是穩定版或長期支持版系列(如v5.15.3…v5.15.5)的迴歸,請記得抄送 + `Linux穩定版郵件列表 `_ (stable@vger.kernel.org)。 + + 如果你成功地執行了二分,請抄送肇事提交的信息中所有簽了“Signed-off-by:”的人。 + +在抄送你的報告到列表時,也請記得通知前述的Linux內核迴歸追蹤機器人。只需在郵件 +中包含如下片段:: + + #regzbot introduced: v5.13..v5.14-rc1 + +Regzbot會就將你的郵件視爲在某個特定版本區間的迴歸報告。上例中即linux v5.13仍 +然正常,而Linux 5.14-rc1是首個您遇到問題的版本。如果你執行了二分以查找導致回 +歸的提交,請使用指定肇事提交的id代替:: + + #regzbot introduced: 1f2e3d4c5d + +添加這樣的“regzbot命令”對你是有好處的,它會確保報告不會被忽略。如果你省略了 +它,Linux內核的迴歸跟蹤者會把你的迴歸告訴regzbot,只要你發送了一個副本到迴歸 +郵件列表。但是迴歸跟蹤者只有一個人,有時不得不休息或甚至偶爾享受可以遠離電腦 +的時光(聽起來很瘋狂)。因此,依賴此人手動將回歸添加到 `已追蹤且尚未解決的 +Linux內核迴歸列表 `_ 和 +regzbot發送的每週迴歸報告,可能會出現延遲。 這樣的延誤會導致Linus Torvalds +在決定“繼續開發還是發佈新版本?”時忽略嚴重的迴歸。 + +真的修復了所有的迴歸嗎? +~~~~~~~~~~~~~~~~~~~~~~~~ + +幾乎所有都是,只要引起問題的變更(肇事提交)被可靠定位。也有些迴歸可以不用這 +樣,但通常是必須的。 + +誰需要找出迴歸的根本原因? +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +受影響代碼區域的開發者應該自行嘗試定位問題所在。但僅靠他們的努力往往是不可 +能做到的,很多問題只發生在開發者的無法接觸的其他特定外部環境中——例如特定的 +硬件平臺、固件、Linux發行版、系統的配置或應用程序。這就是爲什麼最終往往是報 +告者定位肇事提交;有時用戶甚至需要再運行額外測試以查明確切的根本原因。開發 +者應該提供建議和可能的幫助,以使普通用戶更容易完成該流程。 + +如何找到罪魁禍首? +~~~~~~~~~~~~~~~~~~ + +如 Documentation/translations/zh_CN/admin-guide/reporting-issues.rst (簡要) +和 Documentation/translations/zh_CN/admin-guide/bug-bisect.rst (詳細)中所 +述,執行二分。聽起來工作量很大,但大部分情況下很快就能找到罪魁禍首。如果這很 +困難或可靠地重現問題很耗時,請考慮與其他受影響的用戶合作,一起縮小搜索範圍。 + +當出現迴歸時我可以向誰尋求建議? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +發送郵件到迴歸郵件列表(regressions@lists.linux.dev)同時抄送Linux內核的迴歸 +跟蹤者(regressions@leemhuis.info);如果問題需要保密處理,可以省略列表。 + + +關於迴歸的更多細節 +------------------ + + +“無迴歸規則”的目標是什麼? +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +用戶應該放心升級內核版本,而不必擔心有程序可能崩潰。這符合內核開發者的利益, +可以使更新有吸引力:他們不希望用戶停留在停止維護或超過一年半的穩定/長期Linux +版本系列上。這也符合所有人的利益,因爲 `那些系列可能含有已知的缺陷、安全問題 +或其他後續版本已經修復的問題 +`_ 。 +此外,內核開發者希望使用戶測試最新的預發行版或常規發行版變得簡單而有吸引力。 +這同樣符合所有人的利益,如果新版本出來後很快就有相關報告,會使追蹤和修復問題 +更容易。 + +實際中“無迴歸”規則真的可行嗎? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +這不是句玩笑話,請見Linux創建者和主要開發人員Linus Torvalds在郵件列表中的許 +多發言,其中一些在 Documentation/process/handling-regressions.rst 中被引用。 + +此規則的例外情況極爲罕見;之前當開發者認爲某個特定的情況有必要援引例外時, +基本都被證明錯了。 + +誰來確保“無迴歸”被落實? +~~~~~~~~~~~~~~~~~~~~~~~~ + +照看和支撐樹的子系統維護者應該關心這一點——例如,Linus Torvalds之於主線, +Greg Kroah-Hartman等人之於各種穩定/長期系列。 + +他們都得到了別人的幫助,以確保迴歸報告不會被遺漏。其中之一是Thorsten +Leemhuis,他目前擔任Linux內核的“迴歸跟蹤者”;爲了做好這項工作,他使用了 +regzbot——Linux內核迴歸跟蹤機器人。所以這就是爲什麼要抄送或轉發你的報告到 +迴歸郵件列表來通知這些人,已經最好在你的郵件中包含“regzbot命令”來立即追蹤它。 + +迴歸通常多久能修復? +~~~~~~~~~~~~~~~~~~~~ + +開發者應該儘快修復任何被報告的迴歸,以提供及時爲受影響的用戶提供解決方案,並 +防止更多用戶遇到問題;然而,開發人員需要花足夠的時間和注意力確保迴歸修復不會 +造成額外的損害。 + +因此,答案取決於各種因素,如迴歸的影響、存在時長或出現於哪個Linux版本系列。 +但最終,大多數的迴歸應該在兩週內修復。 + +當問題可以通過升級某些軟件解決時,是迴歸嗎? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +基本都是。如果開發人員告訴您其他情況,請諮詢上述迴歸跟蹤者。 + +當新內核變慢或能耗增加,是迴歸嗎? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +是的,但有一些差別。在微型基準測試中變慢5%不太可能被視爲迴歸,除非它也會對 +廣泛基準測試的結果產生超過1%的影響。如果有疑問,請尋求建議。 + +當更新Linux時外部內核模塊崩潰了,是迴歸嗎? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +不,因爲“無迴歸”規則僅限於Linux內核提供給用戶空間的接口和服務。因此,它不包括 +構建或運行外部開發的內核模塊,因爲它們在內核空間中運行與掛進內核使用的內部接 +口偶爾會變化。 + +如何處理安全修復引起的迴歸? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +在極爲罕見的情況下,安全問題無法在不引起迴歸的情況下修復;這些修復都被放棄了, +因爲它們終究會引起問題。幸運的是這種兩難境地基本都可以避免,受影響區域的主要 +開發者以及Linus Torvalds本人通常都會努力在不引入迴歸的情況下解決安全問題。 + +如果你仍然面臨此種情況,請查看郵件列表檔案是否有人盡力避免過迴歸。如果沒有, +請報告它;如有疑問,請如上所述尋求建議。 + +當修復迴歸時不可避免會引入另一個,如何處理? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +很遺憾這種事確實會出現,但幸運的是並不經常出現;如果發生了,受影響代碼區的資 +深開發者應當調查該問題以找到避免迴歸的解決方法,至少避免它們的影響。如果你遇 +到這樣的情況,如上所述:檢查之前的討論是否有人已經盡了最大努力,如有疑問請尋 +求建議。 + +小提示:如果人們在每個開發週期中定期給出主線預發佈(即v5.15-rc1或-rc3)以供 +測試,則可以避免這種情況。爲了更好地解釋,可以設想一個在Linux v5.14和v5.15-rc1 +之間集成的更改,該更改導致了迴歸,但同時是應用於5.15-rc1的其他改進的強依賴。 +如果有人在5.15發佈之前就發現並報告了這個問題,那麼所有更改都可以直接撤銷,從 +而解決迴歸問題。而就在幾天或幾周後,此解決方案變成了不可能,因爲一些軟件可能 +已經開始依賴於後續更改之一:撤銷所有更改將導致上述用戶軟件出現迴歸,這是不可 +接受的。 + +若我所依賴的功能在數月前被移除了,是迴歸嗎? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +是的,但如前節所述,通常很難修復此類迴歸。因此需要逐案處理。這也是定期測試主 +線預發佈對所有人有好處的另一個原因。 + +如果我似乎是唯一受影響的人,是否仍適用“無迴歸”規則? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +適用,但僅限於實際使用:Linux開發人員希望能夠自由地取消那些只能在閣樓和博物 +館中找到的硬件的支持。 + +請注意,有時爲了取得進展,不得不出現迴歸——後者也是防止Linux停滯不前所必需 +的。因此如果迴歸所影響的用戶很少,那麼爲了他們和其他人更大的利益,還是讓事情 +過去吧。尤其是存在某種規避迴歸的簡單方法,例如更新一些軟件或者使用專門爲此目 +的創建的內核參數。 + +迴歸規則是否也適用於staging樹中的代碼? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +不,參見 `適用於所有staging代碼配置選項的幫助文本 +`_ , +其早已聲明:: + + 請注意:這些驅動正在積極開發中,可能無法正常工作,並可能包含會在不久的 + 將來發生變化的用戶接口。 + +雖然staging開發人員通常堅持“無迴歸”的原則,但有時爲了取得進展也會違背它。這就 +是爲什麼當staging樹的WiFi驅動被基本推倒重來時,有些用戶不得不處理迴歸(通常可 +以忽略)。 + +爲什麼較新版本必須“使用相似配置編譯”? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +因爲Linux內核開發人員有時會集成已知的會導致迴歸的變更,但使它們成爲可選的,並 +在內核的默認配置下禁用它們。這一技巧允許進步,否則“無迴歸”規則將導致停滯。 + +例如,試想一個新的可以阻止惡意軟件濫用某個內核的接口的安全特性,同時又需要滿足 +另一個很罕見的應用程序。上述的方法可使兩方都滿意:使用這些應用程序的人可以關閉 +新的安全功能,而其他不會遇到麻煩的人可以啓用它。 + +如何創建與舊內核相似的配置? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +用一個已知良好的內核啓動機器,並用 ``make olddefconfig`` 配置新版的Linux。這 +會讓內核的構建腳本從正在運行的內核中摘錄配置文件(“.config”文件),作爲即將編 +譯的新版本的基礎配置;同時將所有新的配置選項設爲默認值,以禁用可能導致迴歸的 +新功能。 + +如何報告在預編譯的普通內核中發現的迴歸? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +您需要確保新的內核是用與舊版相似的配置編譯(見上文),因爲那些構建它們的人可 +能啓用了一些已知的與新內核不兼容的特性。如有疑問,請向內核的提供者報告問題並 +尋求建議。 + + +用“regzbot”追蹤迴歸的更多信息 +----------------------------- + +什麼是迴歸追蹤?爲啥我需要關心它? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +像“無迴歸”這樣的規則需要有人來確保它們被遵守,否則會被有意/無意打破。歷史證 +明瞭這一點對於Linux內核開發也適用。這就是爲什麼Linux內核的迴歸跟蹤者Thorsten +Leemhuis,,和另一些人盡力關注所有的迴歸直到他們解決。他們從未爲此獲得報酬, +因此這項工作是在盡最大努力的基礎上完成的。 + +爲什麼/如何使用機器人追蹤Linux內核迴歸? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +由於Linux內核開發過程的分佈式和鬆散結構,完全手動跟蹤迴歸已經被證明是相當困難 +的。因此Linux內核的迴歸跟蹤者開發了regzbot來促進這項工作,其長期目標是儘可能爲 +所有相關人員自動化迴歸跟蹤。 + +Regzbot通過監視跟蹤的迴歸報告的回覆來工作。此外,它還查找用“Link:”標籤引用這 +些報告的補丁;對這些補丁的回覆也會被跟蹤。結合這些數據,可以很好地瞭解當前修 +復過程的狀態。 + +如何查看regzbot當前追蹤的迴歸? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +參見 `regzbot在線 `_ 。 + +何種問題可以由regzbot追蹤? +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +該機器人只爲了跟蹤迴歸,因此請不要讓regzbot涉及常規問題。但是對於Linux內核的 +迴歸跟蹤者來說,讓regzbot跟蹤嚴重問題也可以,如有關掛起、損壞數據或內部錯誤 +(Panic、Oops、BUG()、warning…)的報告。 + +如何修改被追蹤迴歸的相關信息? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +在直接或間接回復報告郵件時使用“regzbot命令”即可。最簡單的方法是:在“已發送”文 +件夾或郵件列表存檔中找到報告,然後使用郵件客戶端的“全部回覆”功能對其進行回覆。 +在該郵件中的獨立段落中可使用以下命令之一(即使用空行將這些命令中的一個或多個與 +其餘郵件文本分隔開)。 + + * 更新迴歸引入起點,例如在執行二分之後:: + + #regzbot introduced: 1f2e3d4c5d + + * 設置或更新標題:: + + #regzbot title: foo + + * 監視討論或bugzilla.kernel.org上有關討論或修復的工單:: + + #regzbot monitor: https://lore.kernel.org/r/30th.anniversary.repost@klaava.Helsinki.FI/ + #regzbot monitor: https://bugzilla.kernel.org/show_bug.cgi?id=123456789 + + * 標記一個有更多相關細節的地方,例如有關但主題不同的郵件列表帖子或缺陷追蹤器中的工單:: + + #regzbot link: https://bugzilla.kernel.org/show_bug.cgi?id=123456789 + + * 標記迴歸已失效:: + + #regzbot invalid: wasn't a regression, problem has always existed + +Regzbot還支持其他一些主要由開發人員或迴歸追蹤人員使用的命令。命令的更多細節請 +參考 `入門指南 `_ +和 `參考手冊 `_ 。 + +.. + 正文結束 +.. + 如本文件開頭所述,本文以GPL-2.0+或CC-BY-4.0許可發行。如您想僅在CC-BY-4.0許 + 可下重分發本文,請用“Linux內核開發者”作爲作者,並用如下鏈接作爲來源: + https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/translations/zh_CN/admin-guide/reporting-regressions.rst +.. + 注意:本RST文件內容只有在來自Linux內核源代碼時是使用CC-BY-4.0許可的,因爲經 + 過處理的版本(如經內核的構建系統)可能包含來自使用更嚴格許可證的文件的內容。 + diff --git a/Documentation/translations/zh_TW/admin-guide/security-bugs.rst b/Documentation/translations/zh_TW/admin-guide/security-bugs.rst index 65c8dd24c9..c0e9fc2476 100644 --- a/Documentation/translations/zh_TW/admin-guide/security-bugs.rst +++ b/Documentation/translations/zh_TW/admin-guide/security-bugs.rst @@ -19,17 +19,17 @@ Linux內核開發人員非常重視安全性。因此我們想知道何時發現 ----- 可以通過電子郵件聯繫Linux內核安全團隊。這是一個安全人員 -的私有列表,他們將幫助驗證錯誤報告並開發和發布修復程序。如果您已經有了一個 +的私有列表,他們將幫助驗證錯誤報告並開發和發佈修復程序。如果您已經有了一個 修復,請將其包含在您的報告中,這樣可以大大加快進程。安全團隊可能會從區域維護 -人員那裡獲得額外的幫助,以理解和修復安全漏洞。 +人員那裏獲得額外的幫助,以理解和修復安全漏洞。 與任何缺陷一樣,提供的信息越多,診斷和修復就越容易。如果您不清楚哪些信息有用, -請查看「Documentation/translations/zh_TW/admin-guide/reporting-issues.rst」中 +請查看“Documentation/translations/zh_CN/admin-guide/reporting-issues.rst”中 概述的步驟。任何利用漏洞的攻擊代碼都非常有用,未經報告者同意不會對外發布,除 非已經公開。 -請儘可能發送無附件的純文本電子郵件。如果所有的細節都藏在附件里,那麼就很難對 -一個複雜的問題進行上下文引用的討論。把它想像成一個 +請儘可能發送無附件的純文本電子郵件。如果所有的細節都藏在附件裏,那麼就很難對 +一個複雜的問題進行上下文引用的討論。把它想象成一個 :doc:`常規的補丁提交 <../process/submitting-patches>` (即使你還沒有補丁): 描述問題和影響,列出復現步驟,然後給出一個建議的解決方案,所有這些都是純文本的。 @@ -38,15 +38,15 @@ Linux內核開發人員非常重視安全性。因此我們想知道何時發現 安全列表不是公開渠道。爲此,請參見下面的協作。 -一旦開發出了健壯的補丁,發布過程就開始了。對公開的缺陷的修復會立即發布。 +一旦開發出了健壯的補丁,發佈過程就開始了。對公開的缺陷的修復會立即發佈。 -儘管我們傾向於在未公開缺陷的修復可用時即發布補丁,但應報告者或受影響方的請求, -這可能會被推遲到發布過程開始後的7日內,如果根據缺陷的嚴重性需要更多的時間, -則可額外延長到14天。推遲發布修復的唯一有效原因是爲了適應QA的邏輯和需要發布 +儘管我們傾向於在未公開缺陷的修復可用時即發佈補丁,但應報告者或受影響方的請求, +這可能會被推遲到發佈過程開始後的7日內,如果根據缺陷的嚴重性需要更多的時間, +則可額外延長到14天。推遲發佈修復的唯一有效原因是爲了適應QA的邏輯和需要發佈 協調的大規模部署。 雖然可能與受信任的個人共享受限信息以開發修復,但未經報告者許可,此類信息不會 -與修復程序一起發布或發布在任何其他披露渠道上。這包括但不限於原始錯誤報告和 +與修復程序一起發佈或發佈在任何其他披露渠道上。這包括但不限於原始錯誤報告和 後續討論(如有)、漏洞、CVE信息或報告者的身份。 換句話說,我們唯一感興趣的是修復缺陷。提交給安全列表的所有其他資料以及對報告 @@ -57,10 +57,10 @@ Linux內核開發人員非常重視安全性。因此我們想知道何時發現 對敏感缺陷(例如那些可能導致權限提升的缺陷)的修復可能需要與私有郵件列表 進行協調,以便分發供應商做好準備,在公開披露 -上游補丁時發布一個已修復的內核。發行版將需要一些時間來測試建議的補丁,通常 -會要求至少幾天的限制,而供應商更新發布更傾向於周二至周四。若合適,安全團隊 +上游補丁時發佈一個已修復的內核。發行版將需要一些時間來測試建議的補丁,通常 +會要求至少幾天的限制,而供應商更新發布更傾向於週二至週四。若合適,安全團隊 可以協助這種協調,或者報告者可以從一開始就包括linux發行版。在這種情況下,請 -記住在電子郵件主題行前面加上「[vs]」,如linux發行版wiki中所述: +記住在電子郵件主題行前面加上“[vs]”,如linux發行版wiki中所述: 。 CVE分配 diff --git a/Documentation/translations/zh_TW/admin-guide/sysrq.rst b/Documentation/translations/zh_TW/admin-guide/sysrq.rst new file mode 100644 index 0000000000..4a08db00a4 --- /dev/null +++ b/Documentation/translations/zh_TW/admin-guide/sysrq.rst @@ -0,0 +1,281 @@ +.. include:: ../disclaimer-zh_TW.rst + +:Original: Documentation/admin-guide/sysrq.rst + +:翻譯: + + 黃軍華 Junhua Huang + +:校譯: + + 司延騰 Yanteng Si + +.. _tw_admin-guide_sysrq: + +Linux 魔法系統請求鍵駭客 +======================== + +針對 sysrq.c 的文檔說明 + +什麼是魔法 SysRq 鍵? +~~~~~~~~~~~~~~~~~~~~~ + +它是一個你可以輸入的具有魔法般的組合鍵。 +無論內核在做什麼,內核都會響應 SysRq 鍵的輸入,除非內核完全卡死。 + +如何使能魔法 SysRq 鍵? +~~~~~~~~~~~~~~~~~~~~~~~ + +在配置內核時,我們需要設置 'Magic SysRq key (CONFIG_MAGIC_SYSRQ)' 爲 'Y'。 +當運行一個編譯進 sysrq 功能的內核時,/proc/sys/kernel/sysrq 控制着被 +SysRq 鍵調用的功能許可。這個文件的默認值由 CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE +配置符號設定,文件本身默認設置爲 1。以下是 /proc/sys/kernel/sysrq 中可能的 +值列表: + + - 0 - 完全不使能 SysRq 鍵 + - 1 - 使能 SysRq 鍵的全部功能 + - >1 - 對於允許的 SysRq 鍵功能的比特掩碼(參見下面更詳細的功能描述):: + + 2 = 0x2 - 使能對控制檯日誌記錄級別的控制 + 4 = 0x4 - 使能對鍵盤的控制 (SAK, unraw) + 8 = 0x8 - 使能對進程的調試導出等 + 16 = 0x10 - 使能同步命令 + 32 = 0x20 - 使能重新掛載只讀 + 64 = 0x40 - 使能對進程的信號操作 (term, kill, oom-kill) + 128 = 0x80 - 允許重啓、斷電 + 256 = 0x100 - 允許讓所有實時任務變普通任務 + +你可以通過如下命令把值設置到這個文件中:: + + echo "number" >/proc/sys/kernel/sysrq + +這裏被寫入的 number 可以是 10 進制數,或者是帶着 0x 前綴的 16 進制數。 +CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE 必須是以 16 進制數寫入。 + +注意,``/proc/sys/kernel/sysrq`` 的值隻影響通過鍵盤觸發 SySRq 的調用,對於 +通過 ``/proc/sysrq-trigger`` 的任何操作調用都是允許的 +(通過具有系統權限的用戶)。 + +如何使用魔法 SysRq 鍵? +~~~~~~~~~~~~~~~~~~~~~~~ + +在 x86 架構上 + 你可以按下鍵盤組合鍵 :kbd:`ALT-SysRq-`。 + + .. note:: + 一些鍵盤可能沒有標識 'SySRq' 鍵。'SySRq' 鍵也被當做 'Print Screen'鍵。 + 同時有些鍵盤無法處理同時按下這麼多鍵,因此你可以先按下鍵盤 :kbd:`Alt` 鍵, + 然後按下鍵盤 :kbd:`SysRq` 鍵,再釋放鍵盤 :kbd:`SysRq` 鍵,之後按下鍵盤上命令鍵 + :kbd:``,最後釋放所有鍵。 + +在 SPARC 架構上 + 你可以按下鍵盤組合鍵 :kbd:`ALT-STOP-` 。 + +在串行控制檯(只針對 PC 類型的標準串口) + 你可以發一個 ``BREAK`` ,然後在 5 秒內發送一個命令鍵, + 發送 ``BREAK`` 兩次將被翻譯爲一個正常的 BREAK 操作。 + +在 PowerPC 架構上 + 按下鍵盤組合鍵 :kbd:`ALT - Print Screen` (或者 :kbd:`F13`) - :kbd:`<命令鍵>` 。 + :kbd:`Print Screen` (或者 :kbd:`F13`) - :kbd:`<命令鍵>` 或許也能實現。 + +在其他架構上 + 如果你知道其他架構的組合鍵,請告訴我,我可以把它們添加到這部分。 + +在所有架構上 + 寫一個字符到 /proc/sysrq-trigger 文件,例如:: + + echo t > /proc/sysrq-trigger + +這個命令鍵 :kbd:`` 是區分大小寫的。 + +什麼是命令鍵? +~~~~~~~~~~~~~~ + +=========== ================================================================ +命令鍵 功能 +=========== ================================================================ +``b`` 將立即重啓系統,不會同步或者卸載磁盤。 + +``c`` 將執行系統 crash,如果配置了系統 crashdump,將執行 crashdump。 + +``d`` 顯示所有持有的鎖。 + +``e`` 發送 SIGTERM 信號給所有進程,除了 init 進程。 + +``f`` 將調用 oom killer 殺掉一個過度佔用內存的進程,如果什麼任務都沒殺, + 也不會 panic。 + +``g`` kgdb 使用(內核調試器)。 + +``h`` 將會顯示幫助。(實際上除了這裏列舉的鍵,其他的都將顯示幫助, + 但是 ``h`` 容易記住):-) + +``i`` 發送 SIGKILL 給所有進程,除了 init 進程。 + +``j`` 強制性的 “解凍它” - 用於被 FIFREEZE ioctl 操作凍住的文件系統。 + +``k`` 安全訪問祕鑰(SAK)殺掉在當前虛擬控制檯的所有程序,注意:參考 + 下面 SAK 節重要論述。 + +``l`` 顯示所有活動 cpu 的棧回溯。 + +``m`` 將導出當前內存信息到你的控制檯。 + +``n`` 用於使所有實時任務變成普通任務。 + +``o`` 將關閉系統(如果配置和支持的話)。 + +``p`` 將導出當前寄存器和標誌位到控制檯。 + +``q`` 將導出每個 cpu 上所有已裝備的高精度定時器(不是完整的 + time_list 文件顯示的 timers)和所有時鐘事件設備的詳細信息。 + +``r`` 關閉鍵盤的原始模式,設置爲轉換模式。 + +``s`` 將嘗試同步所有的已掛載文件系統。 + +``t`` 將導出當前所有任務列表和它們的信息到控制檯。 + +``u`` 將嘗試重新掛載已掛載文件系統爲只讀。 + +``v`` 強制恢復幀緩存控制檯。 +``v`` 觸發 ETM 緩存導出 [ARM 架構特有] + +``w`` 導出處於不可中斷狀態(阻塞)的任務。 + +``x`` 在 ppc/powerpc 架構上用於 xmon 接口。 + 在 sparc64 架構上用於顯示全局的 PMU(性能監控單元)寄存器。 + 在 MIPS 架構上導出所有的 tlb 條目。 + +``y`` 顯示全局 cpu 寄存器 [SPARC-64 架構特有] + +``z`` 導出 ftrace 緩存信息 + +``0``-``9`` 設置控制檯日誌級別,該級別控制什麼樣的內核信息將被打印到你的 + 控制檯。(比如 ``0`` ,將使得只有緊急信息,像 PANICs or OOPSes + 才能到你的控制檯。) +=========== ================================================================ + +好了,我能用他們做什麼呢? +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +嗯,當你的 X 服務端或者 svgalib 程序崩潰,unraw(r) 非原始模式命令鍵是非常 +方便的。 + +sak(k)(安全訪問祕鑰)在你嘗試登陸的同時,又想確保當前控制檯沒有可以獲取你的 +密碼的特洛伊木馬程序運行時是有用的。它會殺掉給定控制檯的所有程序,這樣你 +就可以確認當前的登陸提示程序是實際來自 init 進程的程序,而不是某些特洛伊 +木馬程序。 + +.. important:: + + 在其實際的形式中,在兼容 C2 安全標準的系統上,它不是一個真正的 SAK, + 它也不應該誤認爲此。 + +似乎其他人發現其可以作爲(系統終端聯機鍵)當你想退出一個程序, +同時不會讓你切換控制檯的方法。(比如,X 服務端或者 svgalib 程序) + +``reboot(b)`` 是個好方法,當你不能關閉機器時,它等同於按下"復位"按鈕。 + +``crash(c)`` 可以用於手動觸發一個 crashdump,當系統卡住時。 +注意當 crashdump 機制不可用時,這個只是觸發一個內核 crash。 + +``sync(s)`` 在拔掉可移動介質之前,或者在使用不提供優雅關機的 +救援 shell 之後很方便 -- 它將確保你的數據被安全地寫入磁盤。注意,在你看到 +屏幕上出現 "OK" 和 "Done" 之前,同步還沒有發生。 + +``umount(u)`` 可以用來標記文件系統正常卸載,從正在運行的系統角度來看,它們將 +被重新掛載爲只讀。這個重新掛載動作直到你看到 "OK" 和 "Done" 信息出現在屏幕上 +纔算完成。 + +日誌級別 ``0`` - ``9`` 用於當你的控制檯被大量的內核信息衝擊,你不想看見的時候。 +選擇 ``0`` 將禁止除了最緊急的內核信息外的所有的內核信息輸出到控制檯。(但是如果 +syslogd/klogd 進程是運行的,它們仍將被記錄。) + +``term(e)`` 和 ``kill(i)`` 用於當你有些有點失控的進程,你無法通過其他方式殺掉 +它們的時候,特別是它正在創建其他進程。 + +"just thaw ``it(j)`` " 用於當你的系統由於一個 FIFREEZE ioctl 調用而產生的文件 +系統凍結,而導致的不響應時。 + +有的時候 SysRq 鍵在使用它之後,看起來像是“卡住”了,我能做些什麼? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +這也會發生在我這,我發現輕敲鍵盤兩側的 shift、alt 和 control 鍵,然後再次敲擊 +一個無效的 SysRq 鍵序列可以解決問題。(比如,像鍵盤組合鍵 :kbd:`alt-sysrq-z` ) +切換到另一個虛擬控制檯(鍵盤操作 :kbd:`ALT+Fn` ),然後再切回來應該也有幫助。 + +我敲擊了 SysRq 鍵,但像是什麼都沒發生,發生了什麼錯誤? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +有一些鍵盤對於 SysRq 鍵設置了不同的鍵值,而不是提前定義的 99 +(查看在 ``include/uapi/linux/input-event-codes.h`` 文件中 ``KEY_SYSRQ`` 的定義) +或者就根本沒有 SysRq 鍵。在這些場景下,執行 ``showkey -s`` 命令來找到一個合適 +的掃描碼序列,然後使用 ``setkeycodes 99`` 命令映射這個序列值到通用 +的 SysRq 鍵編碼上(比如 ``setkeycodes e05b 99`` )。最好將這個命令放在啓動腳本 +中。 +哦,順便說一句,你十秒鐘不輸入任何東西就將退出 “showkey”。 + +我想添加一個 SysRq 鍵事件到一個模塊中,如何去做呢? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +爲了註冊一個基礎函數到這個表中,首先你必須包含 ``include/linux/sysrq.h`` 頭 +文件,這個頭文件定義了你所需要的所有東西。然後你必須創建一個 ``sysrq_key_op`` +結構體,然後初始化它,使用如下內容,A) 你將使用的這個鍵的處理函數, B) 一個 +help_msg 字符串,在 SysRq 鍵打印幫助信息時將打印出來,C) 一個 action_msg 字 +符串,就在你的處理函數調用前打印出來。你的處理函數必須符合在 'sysrq.h' 文件中 +的函數原型。 + +在 ``sysrq_key_op`` 結構體被創建後,你可以調用內核函數 +``register_sysrq_key(int key, const struct sysrq_key_op *op_p);``, +該函數在表中的 'key' 對應位置內容是空的情況下,將通過 ``op_p`` 指針註冊這個操作 +函數到表中 'key' 對應位置上。在模塊卸載的時候,你必須調用 +``unregister_sysrq_key(int key, const struct sysrq_key_op *op_p)`` 函數,該函數 +只有在當前該鍵對應的處理函數被註冊到了 'key' 對應位置時,纔會移除 'op_p' 指針 +對應的鍵值操作函數。這是爲了防止在你註冊之後,該位置被改寫的情況。 + +魔法 SysRq 鍵系統的工作原理是將鍵對應操作函數註冊到鍵的操作查找表, +該表定義在 'drivers/tty/sysrq.c' 文件中。 +該鍵表有許多在編譯時候就註冊進去的操作函數,但是是可變的。 +並且有兩個函數作爲操作該表的接口被導出:: + + register_sysrq_key 和 unregister_sysrq_key. + +當然,永遠不要在表中留下無效指針,即,當你的模塊存在調用 register_sysrq_key() +函數,它一定要調用 unregister_sysrq_key() 來清除它使用過的 SysRq 鍵表條目。 +表中的空指針是安全的。:) + +如果對於某種原因,在 handle_sysrq 調用的處理函數中,你認爲有必要調用 +handle_sysrq 函數時,你必須意識到當前你處於一個鎖中(你同時也處於一箇中斷處理 +函數中,這意味着不能睡眠)。所以這時你必須使用 ``__handle_sysrq_nolock`` 替代。 + +當我敲擊一個 SysRq 組合鍵時,只有標題打印出現在控制檯? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +SysRq 鍵的輸出和所有其他控制檯輸出一樣,受制於控制檯日誌級別控制。 +這意味着,如果內核以發行版內核中常見的 "quiet" 方式啓動,則輸出可能不會出現在實際 +的控制檯上,即使它會出現在 dmesg 緩存中,也可以通過 dmesg 命令和 ``/proc/kmsg`` +文件的消費訪問到。作爲一個特例,來自 sysrq 命令的標題行將被傳遞給所有控制檯 +使用者,就好像當前日誌級別是最大的一樣。如果只發出標題頭,則幾乎可以肯定內核日誌 +級別太低。如果你需要控制檯上的輸出,那麼你將需要臨時提高控制檯日誌級別,通過使用 +鍵盤組合鍵 :kbd:`alt-sysrq-8` 或者:: + + echo 8 > /proc/sysrq-trigger + +在觸發了你感興趣的 SysRq 鍵命令後,記得恢復日誌級別到正常情況。 + +我有很多問題時,可以請教誰? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +請教在內核郵件列表上的人,郵箱: + linux-kernel@vger.kernel.org + +致謝 +~~~~ + +- Mydraal 撰寫了該文件 +- Adam Sulmicki 進行了更新 +- Jeremy M. Dolan 在 2001/01/28 10:15:59 進行了更新 +- Crutcher Dunnavant 添加鍵註冊部分 + diff --git a/Documentation/translations/zh_TW/admin-guide/tainted-kernels.rst b/Documentation/translations/zh_TW/admin-guide/tainted-kernels.rst index ebe3812ead..47629f6b05 100644 --- a/Documentation/translations/zh_TW/admin-guide/tainted-kernels.rst +++ b/Documentation/translations/zh_TW/admin-guide/tainted-kernels.rst @@ -9,27 +9,27 @@ 吳想成 Wu XiangCheng 胡皓文 Hu Haowen -受汙染的內核 +受污染的內核 ------------- -當發生一些在稍後調查問題時可能相關的事件時,內核會將自己標記爲「受汙染 -(tainted)」的。不用太過擔心,大多數情況下運行受汙染的內核沒有問題;這些信息 -主要在有人想調查某個問題時才有意義的,因爲問題的真正原因可能是導致內核受汙染 -的事件。這就是爲什麼來自受汙染內核的缺陷報告常常被開發人員忽略,因此請嘗試用 -未受汙染的內核重現問題。 +當發生一些在稍後調查問題時可能相關的事件時,內核會將自己標記爲“受污染 +(tainted)”的。不用太過擔心,大多數情況下運行受污染的內核沒有問題;這些信息 +主要在有人想調查某個問題時纔有意義的,因爲問題的真正原因可能是導致內核受污染 +的事件。這就是爲什麼來自受污染內核的缺陷報告常常被開發人員忽略,因此請嘗試用 +未受污染的內核重現問題。 -請注意,即使在您消除導致汙染的原因(亦即卸載專有內核模塊)之後,內核仍將保持 -汙染狀態,以表示內核仍然不可信。這也是爲什麼內核在注意到內部問題(「kernel -bug」)、可恢復錯誤(「kernel oops」)或不可恢復錯誤(「kernel panic」)時會列印 -受汙染狀態,並將有關此的調試信息寫入日誌 ``dmesg`` 輸出。也可以通過 -``/proc/`` 中的文件在運行時檢查受汙染的狀態。 +請注意,即使在您消除導致污染的原因(亦即卸載專有內核模塊)之後,內核仍將保持 +污染狀態,以表示內核仍然不可信。這也是爲什麼內核在注意到內部問題(“kernel +bug”)、可恢復錯誤(“kernel oops”)或不可恢復錯誤(“kernel panic”)時會打印 +受污染狀態,並將有關此的調試信息寫入日誌 ``dmesg`` 輸出。也可以通過 +``/proc/`` 中的文件在運行時檢查受污染的狀態。 -BUG、Oops或Panics消息中的汙染標誌 +BUG、Oops或Panics消息中的污染標誌 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -在頂部以「CPU:」開頭的一行中可以找到受汙染的狀態;內核是否受到汙染和原因會顯示 -在進程ID(「PID:」)和觸發事件命令的縮寫名稱(「Comm:」)之後:: +在頂部以“CPU:”開頭的一行中可以找到受污染的狀態;內核是否受到污染和原因會顯示 +在進程ID(“PID:”)和觸發事件命令的縮寫名稱(“Comm:”)之後:: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 Oops: 0002 [#1] SMP PTI @@ -38,27 +38,27 @@ BUG、Oops或Panics消息中的汙染標誌 RIP: 0010:my_oops_init+0x13/0x1000 [kpanic] [...] -如果內核在事件發生時沒有被汙染,您將在那裡看到「Not-tainted:」;如果被汙染,那 -麼它將是「Tainted:」以及字母或空格。在上面的例子中,它看起來是這樣的:: +如果內核在事件發生時沒有被污染,您將在那裏看到“Not-tainted:”;如果被污染,那 +麼它將是“Tainted:”以及字母或空格。在上面的例子中,它看起來是這樣的:: Tainted: P W O 下表解釋了這些字符的含義。在本例中,由於加載了專有模塊( ``P`` ),出現了 警告( ``W`` ),並且加載了外部構建的模塊( ``O`` ),所以內核早些時候受到 -了汙染。要解碼其他字符,請使用下表。 +了污染。要解碼其他字符,請使用下表。 -解碼運行時的汙染狀態 +解碼運行時的污染狀態 ~~~~~~~~~~~~~~~~~~~~~ -在運行時,您可以通過讀取 ``cat /proc/sys/kernel/tainted`` 來查詢受汙染狀態。 -如果返回 ``0`` ,則內核沒有受到汙染;任何其他數字都表示受到汙染的原因。解碼 +在運行時,您可以通過讀取 ``cat /proc/sys/kernel/tainted`` 來查詢受污染狀態。 +如果返回 ``0`` ,則內核沒有受到污染;任何其他數字都表示受到污染的原因。解碼 這個數字的最簡單方法是使用腳本 ``tools/debugging/kernel-chktaint`` ,您的 發行版可能會將其作爲名爲 ``linux-tools`` 或 ``kernel-tools`` 的包的一部分提 供;如果沒有,您可以從 `git.kernel.org `_ 網站下載此腳本並用 ``sh kernel-chktaint`` 執行,它會在上面引用的日誌中有類似 -語句的機器上列印這樣的內容:: +語句的機器上打印這樣的內容:: Kernel is Tainted for following reasons: * Proprietary module was loaded (#0) @@ -69,19 +69,19 @@ BUG、Oops或Panics消息中的汙染標誌 a more details explanation of the various taint flags. Raw taint value as int/string: 4609/'P W O ' -你也可以試著自己解碼這個數字。如果內核被汙染的原因只有一個,那麼這很簡單, +你也可以試着自己解碼這個數字。如果內核被污染的原因只有一個,那麼這很簡單, 在本例中您可以通過下表找到數字。如果你需要解碼有多個原因的數字,因爲它是一 -個位域(bitfield),其中每個位表示一個特定類型的汙染的存在或不存在,最好讓 +個位域(bitfield),其中每個位表示一個特定類型的污染的存在或不存在,最好讓 前面提到的腳本來處理。但是如果您需要快速看一下,可以使用這個shell命令來檢查 設置了哪些位:: $ for i in $(seq 18); do echo $(($i-1)) $(($(cat /proc/sys/kernel/tainted)>>($i-1)&1));done -汙染狀態代碼表 +污染狀態代碼表 ~~~~~~~~~~~~~~~ === ===== ====== ======================================================== - 位 日誌 數字 內核被汙染的原因 + 位 日誌 數字 內核被污染的原因 === ===== ====== ======================================================== 0 G/P 1 已加載專用模塊 1 _/F 2 模塊被強制加載 @@ -89,23 +89,23 @@ BUG、Oops或Panics消息中的汙染標誌 3 _/R 8 模塊被強制卸載 4 _/M 16 處理器報告了機器檢測異常(MCE) 5 _/B 32 引用了錯誤的頁或某些意外的頁標誌 - 6 _/U 64 用戶空間應用程式請求的汙染 + 6 _/U 64 用戶空間應用程序請求的污染 7 _/D 128 內核最近死機了,即曾出現OOPS或BUG 8 _/A 256 ACPI表被用戶覆蓋 9 _/W 512 內核發出警告 10 _/C 1024 已加載staging驅動程序 - 11 _/I 2048 已應用平台固件缺陷的解決方案 - 12 _/O 4096 已加載外部構建(「樹外」)模塊 + 11 _/I 2048 已應用平臺固件缺陷的解決方案 + 12 _/O 4096 已加載外部構建(“樹外”)模塊 13 _/E 8192 已加載未簽名的模塊 14 _/L 16384 發生軟鎖定 15 _/K 32768 內核已實時打補丁 - 16 _/X 65536 備用汙染,爲發行版定義並使用 + 16 _/X 65536 備用污染,爲發行版定義並使用 17 _/T 131072 內核是用結構隨機化插件構建的 === ===== ====== ======================================================== -註:字符 ``_`` 表示空白,以便於閱讀表。 +注:字符 ``_`` 表示空白,以便於閱讀表。 -汙染的更詳細解釋 +污染的更詳細解釋 ~~~~~~~~~~~~~~~~~ 0) ``G`` 加載的所有模塊都有GPL或兼容許可證, ``P`` 加載了任何專有模塊。 @@ -115,14 +115,14 @@ BUG、Oops或Panics消息中的汙染標誌 1) ``F`` 任何模塊被 ``insmod -f`` 強制加載, ``' '`` 所有模塊正常加載。 - 2) ``S`` 內核運行在不合規範的處理器或系統上:硬體已運行在不受支持的配置中, - 因此無法保證正確執行。內核將被汙染,例如: + 2) ``S`` 內核運行在不合規範的處理器或系統上:硬件已運行在不受支持的配置中, + 因此無法保證正確執行。內核將被污染,例如: - 在x86上:PAE是通過intel CPU(如Pentium M)上的forcepae強制執行的,這些 CPU不報告PAE,但可能有功能實現,SMP內核在非官方支持的SMP Athlon CPU上 運行,MSR被暴露到用戶空間中。 - 在arm上:在某些CPU(如Keystone 2)上運行的內核,沒有啓用某些內核特性。 - - 在arm64上:CPU之間存在不匹配的硬體特性,引導加載程序以不同的模式引導CPU。 + - 在arm64上:CPU之間存在不匹配的硬件特性,引導加載程序以不同的模式引導CPU。 - 某些驅動程序正在被用在不受支持的體系結構上(例如x86_64以外的其他系統 上的scsi/snic,非x86/x86_64/itanium上的scsi/ips,已經損壞了arm64上 irqchip/irq-gic的固件設置…)。 @@ -131,22 +131,22 @@ BUG、Oops或Panics消息中的汙染標誌 4) ``M`` 任何處理器報告了機器檢測異常, ``' '`` 未發生機器檢測異常。 - 5) ``B`` 頁面釋放函數發現錯誤的頁面引用或某些意外的頁面標誌。這表示硬體問題 - 或內核錯誤;日誌中應該有其他信息指示發生此汙染的原因。 + 5) ``B`` 頁面釋放函數發現錯誤的頁面引用或某些意外的頁面標誌。這表示硬件問題 + 或內核錯誤;日誌中應該有其他信息指示發生此污染的原因。 - 6) ``U`` 用戶或用戶應用程式特意請求設置受汙染標誌,否則應爲 ``' '`` 。 + 6) ``U`` 用戶或用戶應用程序特意請求設置受污染標誌,否則應爲 ``' '`` 。 7) ``D`` 內核最近死機了,即出現了OOPS或BUG。 8) ``A`` ACPI表被重寫。 - 9) ``W`` 內核之前已發出過警告(儘管有些警告可能會設置更具體的汙染標誌)。 + 9) ``W`` 內核之前已發出過警告(儘管有些警告可能會設置更具體的污染標誌)。 10) ``C`` 已加載staging驅動程序。 - 11) ``I`` 內核正在處理平台固件(BIOS或類似軟體)中的嚴重錯誤。 + 11) ``I`` 內核正在處理平臺固件(BIOS或類似軟件)中的嚴重錯誤。 - 12) ``O`` 已加載外部構建(「樹外」)模塊。 + 12) ``O`` 已加載外部構建(“樹外”)模塊。 13) ``E`` 在支持模塊簽名的內核中加載了未簽名的模塊。 @@ -154,8 +154,8 @@ BUG、Oops或Panics消息中的汙染標誌 15) ``K`` 內核已經實時打了補丁。 - 16) ``X`` 備用汙染,由Linux發行版定義和使用。 + 16) ``X`` 備用污染,由Linux發行版定義和使用。 17) ``T`` 內核構建時使用了randstruct插件,它可以有意生成非常不尋常的內核結構 - 布局(甚至是性能病態的布局),這在調試時非常有用。於構建時設置。 + 佈局(甚至是性能病態的佈局),這在調試時非常有用。於構建時設置。 diff --git a/Documentation/translations/zh_TW/admin-guide/unicode.rst b/Documentation/translations/zh_TW/admin-guide/unicode.rst index 7908b369b8..a2b48b5d0a 100644 --- a/Documentation/translations/zh_TW/admin-guide/unicode.rst +++ b/Documentation/translations/zh_TW/admin-guide/unicode.rst @@ -37,15 +37,15 @@ IBMPC_MAP IBM code page 437 ESC ( U USER_MAP User defined ESC ( K =============== =============================== ================ -特別是 ESC ( U 不再是「直通字體」,因爲字體可能與IBM字符集完全不同。 +特別是 ESC ( U 不再是“直通字體”,因爲字體可能與IBM字符集完全不同。 例如,即使加載了一個Latin-1字體,也允許使用塊圖形(block graphics)。 請注意,儘管這些代碼與ISO 2022類似,但這些代碼及其用途都與ISO 2022不匹配; Linux有兩個八位代碼(G0和G1),而ISO 2022有四個七位代碼(G0-G3)。 -根據Unicode標準/ISO 10646,U+F000到U+F8FF被保留用於作業系統範圍內的分配 -(Unicode標準將其稱爲「團體區域(Corporate Zone)」,因爲這對於Linux是不準確 -的,所以我們稱之爲「Linux區域」)。選擇U+F000作爲起點,因爲它允許直接映射 +根據Unicode標準/ISO 10646,U+F000到U+F8FF被保留用於操作系統範圍內的分配 +(Unicode標準將其稱爲“團體區域(Corporate Zone)”,因爲這對於Linux是不準確 +的,所以我們稱之爲“Linux區域”)。選擇U+F000作爲起點,因爲它允許直接映射 區域以2的大倍數開始(以防需要1024或2048個字符的字體)。這就留下U+E000到 U+EFFF作爲最終用戶區。 @@ -87,7 +87,7 @@ U+F813 KEYBOARD SYMBOL SOLID APPLE 克林貢(Klingon)語支持 ------------------------ -1996年,Linux是世界上第一個添加對人工語言克林貢支持的作業系統,克林貢是由 +1996年,Linux是世界上第一個添加對人工語言克林貢支持的操作系統,克林貢是由 Marc Okrand爲《星際迷航》電視連續劇創造的。這種編碼後來被徵募Unicode註冊表 (ConScript Unicode Registry,CSUR)採用,並建議(但最終被拒絕)納入Unicode 平面一。不過,它仍然是Linux區域中的Linux/CSUR私有分配。 diff --git a/Documentation/translations/zh_TW/arch/arm/Booting b/Documentation/translations/zh_TW/arch/arm/Booting new file mode 100644 index 0000000000..a5375f262d --- /dev/null +++ b/Documentation/translations/zh_TW/arch/arm/Booting @@ -0,0 +1,176 @@ +Chinese translated version of Documentation/arch/arm/booting.rst + +If you have any comment or update to the content, please contact the +original document maintainer directly. However, if you have a problem +communicating in English you can also ask the Chinese maintainer for +help. Contact the Chinese maintainer if this translation is outdated +or if there is a problem with the translation. + +Maintainer: Russell King +Chinese maintainer: Fu Wei +--------------------------------------------------------------------- +Documentation/arch/arm/booting.rst 的中文翻譯 + +如果想評論或更新本文的內容,請直接聯繫原文檔的維護者。如果你使用英文 +交流有困難的話,也可以向中文版維護者求助。如果本翻譯更新不及時或者翻 +譯存在問題,請聯繫中文版維護者。 + +英文版維護者: Russell King +中文版維護者: 傅煒 Fu Wei +中文版翻譯者: 傅煒 Fu Wei +中文版校譯者: 傅煒 Fu Wei + +以下爲正文 +--------------------------------------------------------------------- + + 啓動 ARM Linux + ============== + +作者:Russell King +日期:2002年5月18日 + +以下文檔適用於 2.4.18-rmk6 及以上版本。 + +爲了啓動 ARM Linux,你需要一個引導裝載程序(boot loader), +它是一個在主內核啓動前運行的一個小程序。引導裝載程序需要初始化各種 +設備,並最終調用 Linux 內核,將信息傳遞給內核。 + +從本質上講,引導裝載程序應提供(至少)以下功能: + +1、設置和初始化 RAM。 +2、初始化一個串口。 +3、檢測機器的類型(machine type)。 +4、設置內核標籤列表(tagged list)。 +5、調用內核映像。 + + +1、設置和初始化 RAM +------------------- + +現有的引導加載程序: 強制 +新開發的引導加載程序: 強制 + +引導裝載程序應該找到並初始化系統中所有內核用於保持系統變量數據的 RAM。 +這個操作的執行是設備依賴的。(它可能使用內部算法來自動定位和計算所有 +RAM,或可能使用對這個設備已知的 RAM 信息,還可能使用任何引導裝載程序 +設計者想到的匹配方法。) + + +2、初始化一個串口 +----------------------------- + +現有的引導加載程序: 可選、建議 +新開發的引導加載程序: 可選、建議 + +引導加載程序應該初始化並使能一個目標板上的串口。這允許內核串口驅動 +自動檢測哪個串口用於內核控制檯。(一般用於調試或與目標板通信。) + +作爲替代方案,引導加載程序也可以通過標籤列表傳遞相關的'console=' +選項給內核以指定某個串口,而串口數據格式的選項在以下文檔中描述: + + Documentation/admin-guide/kernel-parameters.rst。 + + +3、檢測機器類型 +-------------------------- + +現有的引導加載程序: 可選 +新開發的引導加載程序: 強制 + +引導加載程序應該通過某些方式檢測自身所處的機器類型。這是一個硬件 +代碼或通過查看所連接的硬件用某些算法得到,這些超出了本文檔的範圍。 +引導加載程序最終必須能提供一個 MACH_TYPE_xxx 值給內核。 +(詳見 linux/arch/arm/tools/mach-types )。 + +4、設置啓動數據 +------------------ + +現有的引導加載程序: 可選、強烈建議 +新開發的引導加載程序: 強制 + +引導加載程序必須提供標籤列表或者 dtb 映像以傳遞配置數據給內核。啓動 +數據的物理地址通過寄存器 r2 傳遞給內核。 + +4a、設置內核標籤列表 +-------------------------------- + +bootloader 必須創建和初始化內核標籤列表。一個有效的標籤列表以 +ATAG_CORE 標籤開始,並以 ATAG_NONE 標籤結束。ATAG_CORE 標籤可以是 +空的,也可以是非空。一個空 ATAG_CORE 標籤其 size 域設置爲 +‘2’(0x00000002)。ATAG_NONE 標籤的 size 域必須設置爲零。 + +在列表中可以保存任意數量的標籤。對於一個重複的標籤是追加到之前標籤 +所攜帶的信息之後,還是會覆蓋原來的信息,是未定義的。某些標籤的行爲 +是前者,其他是後者。 + +bootloader 必須傳遞一個系統內存的位置和最小值,以及根文件系統位置。 +因此,最小的標籤列表如下所示: + + +-----------+ +基地址 -> | ATAG_CORE | | + +-----------+ | + | ATAG_MEM | | 地址增長方向 + +-----------+ | + | ATAG_NONE | | + +-----------+ v + +標籤列表應該保存在系統的 RAM 中。 + +標籤列表必須置於內核自解壓和 initrd'bootp' 程序都不會覆蓋的內存區。 +建議放在 RAM 的頭 16KiB 中。 + +4b、設置設備樹 +------------------------- + +bootloader 必須以 64bit 地址對齊的形式加載一個設備樹映像(dtb)到系統 +RAM 中,並用啓動數據初始化它。dtb 格式在文檔 +https://www.devicetree.org/specifications/ 中。內核將會在 +dtb 物理地址處查找 dtb 魔數值(0xd00dfeed),以確定 dtb 是否已經代替 +標籤列表被傳遞進來。 + +bootloader 必須傳遞一個系統內存的位置和最小值,以及根文件系統位置。 +dtb 必須置於內核自解壓不會覆蓋的內存區。建議將其放置於 RAM 的頭 16KiB +中。但是不可將其放置於“0”物理地址處,因爲內核認爲:r2 中爲 0,意味着 +沒有標籤列表和 dtb 傳遞過來。 + +5、調用內核映像 +--------------------------- + +現有的引導加載程序: 強制 +新開發的引導加載程序: 強制 + +調用內核映像 zImage 有兩個選擇。如果 zImge 保存在 flash 中,且是爲了 +在 flash 中直接運行而被正確鏈接的。這樣引導加載程序就可以在 flash 中 +直接調用 zImage。 + +zImage 也可以被放在系統 RAM(任意位置)中被調用。注意:內核使用映像 +基地址的前 16KB RAM 空間來保存頁表。建議將映像置於 RAM 的 32KB 處。 + +對於以上任意一種情況,都必須符合以下啓動狀態: + +- 停止所有 DMA 設備,這樣內存數據就不會因爲虛假網絡包或磁盤數據而被破壞。 + 這可能可以節省你許多的調試時間。 + +- CPU 寄存器配置 + r0 = 0, + r1 = (在上面 3 中獲取的)機器類型碼。 + r2 = 標籤列表在系統 RAM 中的物理地址,或 + 設備樹塊(dtb)在系統 RAM 中的物理地址 + +- CPU 模式 + 所有形式的中斷必須被禁止 (IRQs 和 FIQs) + CPU 必須處於 SVC 模式。(對於 Angel 調試有特例存在) + +- 緩存,MMUs + MMU 必須關閉。 + 指令緩存開啓或關閉都可以。 + 數據緩存必須關閉。 + +- 引導加載程序應該通過直接跳轉到內核映像的第一條指令來調用內核映像。 + + 對於支持 ARM 指令集的 CPU,跳入內核入口時必須處在 ARM 狀態,即使 + 對於 Thumb-2 內核也是如此。 + + 對於僅支持 Thumb 指令集的 CPU,比如 Cortex-M 系列的 CPU,跳入 + 內核入口時必須處於 Thumb 狀態。 + diff --git a/Documentation/translations/zh_TW/arch/arm/kernel_user_helpers.txt b/Documentation/translations/zh_TW/arch/arm/kernel_user_helpers.txt new file mode 100644 index 0000000000..4c0bff97af --- /dev/null +++ b/Documentation/translations/zh_TW/arch/arm/kernel_user_helpers.txt @@ -0,0 +1,285 @@ +Chinese translated version of Documentation/arch/arm/kernel_user_helpers.rst + +If you have any comment or update to the content, please contact the +original document maintainer directly. However, if you have a problem +communicating in English you can also ask the Chinese maintainer for +help. Contact the Chinese maintainer if this translation is outdated +or if there is a problem with the translation. + +Maintainer: Nicolas Pitre + Dave Martin +Chinese maintainer: Fu Wei +--------------------------------------------------------------------- +Documentation/arch/arm/kernel_user_helpers.rst 的中文翻譯 + +如果想評論或更新本文的內容,請直接聯繫原文檔的維護者。如果你使用英文 +交流有困難的話,也可以向中文版維護者求助。如果本翻譯更新不及時或者翻 +譯存在問題,請聯繫中文版維護者。 +英文版維護者: Nicolas Pitre + Dave Martin +中文版維護者: 傅煒 Fu Wei +中文版翻譯者: 傅煒 Fu Wei +中文版校譯者: 宋冬生 Dongsheng Song + 傅煒 Fu Wei + + +以下爲正文 +--------------------------------------------------------------------- +內核提供的用戶空間輔助代碼 +========================= + +在內核內存空間的固定地址處,有一個由內核提供並可從用戶空間訪問的代碼 +段。它用於向用戶空間提供因在許多 ARM CPU 中未實現的特性和/或指令而需 +內核提供幫助的某些操作。這些代碼直接在用戶模式下執行的想法是爲了獲得 +最佳效率,但那些與內核計數器聯繫過於緊密的部分,則被留給了用戶庫實現。 +事實上,此代碼甚至可能因不同的 CPU 而異,這取決於其可用的指令集或它 +是否爲 SMP 系統。換句話說,內核保留在不作出警告的情況下根據需要更改 +這些代碼的權利。只有本文檔描述的入口及其結果是保證穩定的。 + +這與完全成熟的 VDSO 實現不同(但兩者並不衝突),儘管如此,VDSO 可阻止 +某些通過常量高效跳轉到那些代碼段的彙編技巧。且由於那些代碼段在返回用戶 +代碼前僅使用少量的代碼週期,則一個 VDSO 間接遠程調用將會在這些簡單的 +操作上增加一個可測量的開銷。 + +在對那些擁有原生支持的新型處理器進行代碼優化時,僅在已爲其他操作使用 +了類似的新增指令,而導致二進制結果已與早期 ARM 處理器不兼容的情況下, +用戶空間才應繞過這些輔助代碼,並在內聯函數中實現這些操作(無論是通過 +編譯器在代碼中直接放置,還是作爲庫函數調用實現的一部分)。也就是說, +如果你編譯的代碼不會爲了其他目的使用新指令,則不要僅爲了避免使用這些 +內核輔助代碼,導致二進制程序無法在早期處理器上運行。 + +新的輔助代碼可能隨着時間的推移而增加,所以新內核中的某些輔助代碼在舊 +內核中可能不存在。因此,程序必須在對任何輔助代碼調用假設是安全之前, +檢測 __kuser_helper_version 的值(見下文)。理想情況下,這種檢測應該 +只在進程啓動時執行一次;如果內核版本不支持所需輔助代碼,則該進程可儘早 +中止執行。 + +kuser_helper_version +-------------------- + +位置: 0xffff0ffc + +參考聲明: + + extern int32_t __kuser_helper_version; + +定義: + + 這個區域包含了當前運行內核實現的輔助代碼版本號。用戶空間可以通過讀 + 取此版本號以確定特定的輔助代碼是否存在。 + +使用範例: + +#define __kuser_helper_version (*(int32_t *)0xffff0ffc) + +void check_kuser_version(void) +{ + if (__kuser_helper_version < 2) { + fprintf(stderr, "can't do atomic operations, kernel too old\n"); + abort(); + } +} + +注意: + + 用戶空間可以假設這個域的值不會在任何單個進程的生存期內改變。也就 + 是說,這個域可以僅在庫的初始化階段或進程啓動階段讀取一次。 + +kuser_get_tls +------------- + +位置: 0xffff0fe0 + +參考原型: + + void * __kuser_get_tls(void); + +輸入: + + lr = 返回地址 + +輸出: + + r0 = TLS 值 + +被篡改的寄存器: + + 無 + +定義: + + 獲取之前通過 __ARM_NR_set_tls 系統調用設置的 TLS 值。 + +使用範例: + +typedef void * (__kuser_get_tls_t)(void); +#define __kuser_get_tls (*(__kuser_get_tls_t *)0xffff0fe0) + +void foo() +{ + void *tls = __kuser_get_tls(); + printf("TLS = %p\n", tls); +} + +注意: + + - 僅在 __kuser_helper_version >= 1 時,此輔助代碼存在 + (從內核版本 2.6.12 開始)。 + +kuser_cmpxchg +------------- + +位置: 0xffff0fc0 + +參考原型: + + int __kuser_cmpxchg(int32_t oldval, int32_t newval, volatile int32_t *ptr); + +輸入: + + r0 = oldval + r1 = newval + r2 = ptr + lr = 返回地址 + +輸出: + + r0 = 成功代碼 (零或非零) + C flag = 如果 r0 == 0 則置 1,如果 r0 != 0 則清零。 + +被篡改的寄存器: + + r3, ip, flags + +定義: + + 僅在 *ptr 爲 oldval 時原子保存 newval 於 *ptr 中。 + 如果 *ptr 被改變,則返回值爲零,否則爲非零值。 + 如果 *ptr 被改變,則 C flag 也會被置 1,以實現調用代碼中的彙編 + 優化。 + +使用範例: + +typedef int (__kuser_cmpxchg_t)(int oldval, int newval, volatile int *ptr); +#define __kuser_cmpxchg (*(__kuser_cmpxchg_t *)0xffff0fc0) + +int atomic_add(volatile int *ptr, int val) +{ + int old, new; + + do { + old = *ptr; + new = old + val; + } while(__kuser_cmpxchg(old, new, ptr)); + + return new; +} + +注意: + + - 這個例程已根據需要包含了內存屏障。 + + - 僅在 __kuser_helper_version >= 2 時,此輔助代碼存在 + (從內核版本 2.6.12 開始)。 + +kuser_memory_barrier +-------------------- + +位置: 0xffff0fa0 + +參考原型: + + void __kuser_memory_barrier(void); + +輸入: + + lr = 返回地址 + +輸出: + + 無 + +被篡改的寄存器: + + 無 + +定義: + + 應用於任何需要內存屏障以防止手動數據修改帶來的一致性問題,以及 + __kuser_cmpxchg 中。 + +使用範例: + +typedef void (__kuser_dmb_t)(void); +#define __kuser_dmb (*(__kuser_dmb_t *)0xffff0fa0) + +注意: + + - 僅在 __kuser_helper_version >= 3 時,此輔助代碼存在 + (從內核版本 2.6.15 開始)。 + +kuser_cmpxchg64 +--------------- + +位置: 0xffff0f60 + +參考原型: + + int __kuser_cmpxchg64(const int64_t *oldval, + const int64_t *newval, + volatile int64_t *ptr); + +輸入: + + r0 = 指向 oldval + r1 = 指向 newval + r2 = 指向目標值 + lr = 返回地址 + +輸出: + + r0 = 成功代碼 (零或非零) + C flag = 如果 r0 == 0 則置 1,如果 r0 != 0 則清零。 + +被篡改的寄存器: + + r3, lr, flags + +定義: + + 僅在 *ptr 等於 *oldval 指向的 64 位值時,原子保存 *newval + 指向的 64 位值於 *ptr 中。如果 *ptr 被改變,則返回值爲零, + 否則爲非零值。 + + 如果 *ptr 被改變,則 C flag 也會被置 1,以實現調用代碼中的彙編 + 優化。 + +使用範例: + +typedef int (__kuser_cmpxchg64_t)(const int64_t *oldval, + const int64_t *newval, + volatile int64_t *ptr); +#define __kuser_cmpxchg64 (*(__kuser_cmpxchg64_t *)0xffff0f60) + +int64_t atomic_add64(volatile int64_t *ptr, int64_t val) +{ + int64_t old, new; + + do { + old = *ptr; + new = old + val; + } while(__kuser_cmpxchg64(&old, &new, ptr)); + + return new; +} + +注意: + + - 這個例程已根據需要包含了內存屏障。 + + - 由於這個過程的代碼長度(此輔助代碼跨越 2 個常規的 kuser “槽”), + 因此 0xffff0f80 不被作爲有效的入口點。 + + - 僅在 __kuser_helper_version >= 5 時,此輔助代碼存在 + (從內核版本 3.1 開始)。 + diff --git a/Documentation/translations/zh_TW/arch/arm64/amu.rst b/Documentation/translations/zh_TW/arch/arm64/amu.rst index 21ac0db638..1b451eae2b 100644 --- a/Documentation/translations/zh_TW/arch/arm64/amu.rst +++ b/Documentation/translations/zh_TW/arch/arm64/amu.rst @@ -28,11 +28,11 @@ AArch64 Linux 中擴展的活動監控單元 AMUv1 架構實現了一個由4個固定的64位事件計數器組成的計數器組。 - - CPU 周期計數器:同 CPU 的頻率增長 + - CPU 週期計數器:同 CPU 的頻率增長 - 常量計數器:同固定的系統時鐘頻率增長 - 淘汰指令計數器: 同每次架構指令執行增長 - - 內存停頓周期計數器:計算由在時鐘域內的最後一級緩存中未命中而引起 - 的指令調度停頓周期數 + - 內存停頓週期計數器:計算由在時鐘域內的最後一級緩存中未命中而引起 + 的指令調度停頓週期數 當處於 WFI 或者 WFE 狀態時,計數器不會增長。 diff --git a/Documentation/translations/zh_TW/arch/arm64/booting.txt b/Documentation/translations/zh_TW/arch/arm64/booting.txt index 3cc8f593e0..be0de91ece 100644 --- a/Documentation/translations/zh_TW/arch/arm64/booting.txt +++ b/Documentation/translations/zh_TW/arch/arm64/booting.txt @@ -41,8 +41,8 @@ AArch64 異常模型由多個異常級(EL0 - EL3)組成,對於 EL0 和 EL1 有對應的安全和非安全模式。EL2 是系統管理級,且僅存在於非安全模式下。 EL3 是最高特權級,且僅存在於安全模式下。 -基於本文檔的目的,我們將簡單地使用『引導裝載程序』(『boot loader』) -這個術語來定義在將控制權交給 Linux 內核前 CPU 上執行的所有軟體。 +基於本文檔的目的,我們將簡單地使用‘引導裝載程序’(‘boot loader’) +這個術語來定義在將控制權交給 Linux 內核前 CPU 上執行的所有軟件。 這可能包含安全監控和系統管理代碼,或者它可能只是一些用於準備最小啓動 環境的指令。 @@ -74,7 +74,7 @@ RAM,或可能使用對這個設備已知的 RAM 信息,還可能是引導裝 數據塊將在使能緩存的情況下以 2MB 粒度被映射,故其不能被置於必須以特定 屬性映射的2M區域內。 -註: v4.2 之前的版本同時要求設備樹數據塊被置於從內核映像以下 +注: v4.2 之前的版本同時要求設備樹數據塊被置於從內核映像以下 text_offset 字節處算起第一個 512MB 內。 3、解壓內核映像 @@ -106,7 +106,7 @@ AArch64 內核當前沒有提供自解壓代碼,因此如果使用了壓縮內 u32 res5; /* 保留 (用於 PE COFF 偏移) */ -映像頭注釋: +映像頭註釋: - 自 v3.17 起,除非另有說明,所有域都是小端模式。 @@ -143,7 +143,7 @@ AArch64 內核當前沒有提供自解壓代碼,因此如果使用了壓縮內 字節處,並從該處被調用。2MB 對齊基址和內核映像起始地址之間的區域對於 內核來說沒有特殊意義,且可能被用於其他目的。 從映像起始地址算起,最少必須準備 image_size 字節的空閒內存供內核使用。 -註: v4.6 之前的版本無法使用內核映像物理偏移以下的內存,所以當時建議 +注: v4.6 之前的版本無法使用內核映像物理偏移以下的內存,所以當時建議 將映像儘量放置在靠近系統內存起始的地方。 任何提供給內核的內存(甚至在映像起始地址之前),若未從內核中標記爲保留 @@ -151,7 +151,7 @@ AArch64 內核當前沒有提供自解壓代碼,因此如果使用了壓縮內 在跳轉入內核前,必須符合以下狀態: -- 停止所有 DMA 設備,這樣內存數據就不會因爲虛假網絡包或磁碟數據而 +- 停止所有 DMA 設備,這樣內存數據就不會因爲虛假網絡包或磁盤數據而 被破壞。這可能可以節省你許多的調試時間。 - 主 CPU 通用寄存器設置 @@ -175,7 +175,7 @@ AArch64 內核當前沒有提供自解壓代碼,因此如果使用了壓縮內 而不通過虛擬地址操作維護構架緩存的系統緩存(不推薦),必須被配置且 禁用。 - *譯者註:對於 PoC 以及緩存相關內容,請參考 ARMv8 構架參考手冊 + *譯者注:對於 PoC 以及緩存相關內容,請參考 ARMv8 構架參考手冊 ARM DDI 0487A - 架構計時器 @@ -189,7 +189,7 @@ AArch64 內核當前沒有提供自解壓代碼,因此如果使用了壓縮內 接收。 - 系統寄存器 - 在進入內核映像的異常級中,所有構架中可寫的系統寄存器必須通過軟體 + 在進入內核映像的異常級中,所有構架中可寫的系統寄存器必須通過軟件 在一個更高的異常級別下初始化,以防止在 未知 狀態下運行。 對於擁有 GICv3 中斷控制器並以 v3 模式運行的系統: @@ -214,14 +214,14 @@ AArch64 內核當前沒有提供自解壓代碼,因此如果使用了壓縮內 引導裝載程序必須在每個 CPU 處於以下狀態時跳入內核入口: - 主 CPU 必須直接跳入內核映像的第一條指令。通過此 CPU 傳遞的設備樹 - 數據塊必須在每個 CPU 節點中包含一個 『enable-method』 屬性,所 + 數據塊必須在每個 CPU 節點中包含一個 ‘enable-method’ 屬性,所 支持的 enable-method 請見下文。 引導裝載程序必須生成這些設備樹屬性,並在跳入內核入口之前將其插入 數據塊。 -- enable-method 爲 「spin-table」 的 CPU 必須在它們的 CPU - 節點中包含一個 『cpu-release-addr』 屬性。這個屬性標識了一個 +- enable-method 爲 “spin-table” 的 CPU 必須在它們的 CPU + 節點中包含一個 ‘cpu-release-addr’ 屬性。這個屬性標識了一個 64 位自然對齊且初始化爲零的內存位置。 這些 CPU 必須在內存保留區(通過設備樹中的 /memreserve/ 域傳遞 @@ -231,15 +231,15 @@ AArch64 內核當前沒有提供自解壓代碼,因此如果使用了壓縮內 時,CPU 必須跳入此值所指向的地址。此值爲一個單獨的 64 位小端值, 因此 CPU 須在跳轉前將所讀取的值轉換爲其本身的端模式。 -- enable-method 爲 「psci」 的 CPU 保持在內核外(比如,在 +- enable-method 爲 “psci” 的 CPU 保持在內核外(比如,在 memory 節點中描述爲內核空間的內存區外,或在通過設備樹 /memreserve/ 域中描述爲內核保留區的空間中)。內核將會發起在 ARM 文檔(編號 - ARM DEN 0022A:用於 ARM 上的電源狀態協調接口系統軟體)中描述的 + ARM DEN 0022A:用於 ARM 上的電源狀態協調接口系統軟件)中描述的 CPU_ON 調用來將 CPU 帶入內核。 *譯者注: ARM DEN 0022A 已更新到 ARM DEN 0022C。 - 設備樹必須包含一個 『psci』 節點,請參考以下文檔: + 設備樹必須包含一個 ‘psci’ 節點,請參考以下文檔: Documentation/devicetree/bindings/arm/psci.yaml diff --git a/Documentation/translations/zh_TW/arch/arm64/elf_hwcaps.rst b/Documentation/translations/zh_TW/arch/arm64/elf_hwcaps.rst index ca7ff749a6..d2c1c2f238 100644 --- a/Documentation/translations/zh_TW/arch/arm64/elf_hwcaps.rst +++ b/Documentation/translations/zh_TW/arch/arm64/elf_hwcaps.rst @@ -17,11 +17,11 @@ ARM64 ELF hwcaps 1. 簡介 ------- -有些硬體或軟體功能僅在某些 CPU 實現上和/或在具體某個內核配置上可用,但 +有些硬件或軟件功能僅在某些 CPU 實現上和/或在具體某個內核配置上可用,但 對於處於 EL0 的用戶空間代碼沒有可用的架構發現機制。內核通過在輔助向量表 公開一組稱爲 hwcaps 的標誌而把這些功能暴露給用戶空間。 -用戶空間軟體可以通過獲取輔助向量的 AT_HWCAP 或 AT_HWCAP2 條目來測試功能, +用戶空間軟件可以通過獲取輔助向量的 AT_HWCAP 或 AT_HWCAP2 條目來測試功能, 並測試是否設置了相關標誌,例如:: bool floating_point_is_present(void) @@ -33,7 +33,7 @@ ARM64 ELF hwcaps return false; } -如果軟體依賴於 hwcap 描述的功能,在嘗試使用該功能前則應檢查相關的 hwcap +如果軟件依賴於 hwcap 描述的功能,在嘗試使用該功能前則應檢查相關的 hwcap 標誌以驗證該功能是否存在。 不能通過其他方式探查這些功能。當一個功能不可用時,嘗試使用它可能導致不可 @@ -44,8 +44,8 @@ ARM64 ELF hwcaps ---------------- 大多數 hwcaps 旨在說明通過架構 ID 寄存器(處於 EL0 的用戶空間代碼無法訪問) -描述的功能的存在。這些 hwcap 通過 ID 寄存器欄位定義,並且應根據 ARM 體系 -結構參考手冊(ARM ARM)中定義的欄位來解釋說明。 +描述的功能的存在。這些 hwcap 通過 ID 寄存器字段定義,並且應根據 ARM 體系 +結構參考手冊(ARM ARM)中定義的字段來解釋說明。 這些 hwcaps 以下面的形式描述:: diff --git a/Documentation/translations/zh_TW/arch/arm64/legacy_instructions.txt b/Documentation/translations/zh_TW/arch/arm64/legacy_instructions.txt index c2d02cd501..7d1f0593d7 100644 --- a/Documentation/translations/zh_TW/arch/arm64/legacy_instructions.txt +++ b/Documentation/translations/zh_TW/arch/arm64/legacy_instructions.txt @@ -31,7 +31,7 @@ Documentation/arch/arm64/legacy_instructions.rst 的中文翻譯 以下爲正文 --------------------------------------------------------------------- Linux 內核在 arm64 上的移植提供了一個基礎框架,以支持構架中正在被淘汰或已廢棄指令的模擬執行。 -這個基礎框架的代碼使用未定義指令鉤子(hooks)來支持模擬。如果指令存在,它也允許在硬體中啓用該指令。 +這個基礎框架的代碼使用未定義指令鉤子(hooks)來支持模擬。如果指令存在,它也允許在硬件中啓用該指令。 模擬模式可通過寫 sysctl 節點(/proc/sys/abi)來控制。 不同的執行方式及 sysctl 節點的相應值,解釋如下: @@ -42,18 +42,18 @@ Linux 內核在 arm64 上的移植提供了一個基礎框架,以支持構架 * Emulate(模擬) 值: 1 - 使用軟體模擬方式。爲解決軟體遷移問題,這種模擬指令模式的使用是被跟蹤的,並會發出速率限制警告。 + 使用軟件模擬方式。爲解決軟件遷移問題,這種模擬指令模式的使用是被跟蹤的,並會發出速率限制警告。 它是那些構架中正在被淘汰的指令,如 CP15 barriers(隔離指令),的默認處理方式。 -* Hardware Execution(硬體執行) +* Hardware Execution(硬件執行) 值: 2 - 雖然標記爲正在被淘汰,但一些實現可能提供硬體執行這些指令的使能/禁用操作。 - 使用硬體執行一般會有更好的性能,但將無法收集運行時對正被淘汰指令的使用統計數據。 + 雖然標記爲正在被淘汰,但一些實現可能提供硬件執行這些指令的使能/禁用操作。 + 使用硬件執行一般會有更好的性能,但將無法收集運行時對正被淘汰指令的使用統計數據。 默認執行模式依賴於指令在構架中狀態。正在被淘汰的指令應該以模擬(Emulate)作爲默認模式, 而已廢棄的指令必須默認使用未定義(Undef)模式 -注意:指令模擬可能無法應對所有情況。更多詳情請參考單獨的指令注釋。 +注意:指令模擬可能無法應對所有情況。更多詳情請參考單獨的指令註釋。 受支持的遺留指令 ------------- @@ -71,7 +71,7 @@ Linux 內核在 arm64 上的移植提供了一個基礎框架,以支持構架 節點: /proc/sys/abi/setend 狀態: 正被淘汰,不推薦使用 默認執行方式: Emulate (1)* -註:爲了使能這個特性,系統中的所有 CPU 必須在 EL0 支持混合字節序。 +注:爲了使能這個特性,系統中的所有 CPU 必須在 EL0 支持混合字節序。 如果一個新的 CPU (不支持混合字節序) 在使能這個特性後被熱插入系統, 在應用中可能會出現不可預期的結果。 diff --git a/Documentation/translations/zh_TW/arch/arm64/memory.txt b/Documentation/translations/zh_TW/arch/arm64/memory.txt index 0280200e79..e41c518e71 100644 --- a/Documentation/translations/zh_TW/arch/arm64/memory.txt +++ b/Documentation/translations/zh_TW/arch/arm64/memory.txt @@ -28,17 +28,17 @@ Documentation/arch/arm64/memory.rst 的中文翻譯 以下爲正文 --------------------------------------------------------------------- - Linux 在 AArch64 中的內存布局 + Linux 在 AArch64 中的內存佈局 =========================== 作者: Catalin Marinas -本文檔描述 AArch64 Linux 內核所使用的虛擬內存布局。此構架可以實現 +本文檔描述 AArch64 Linux 內核所使用的虛擬內存佈局。此構架可以實現 頁大小爲 4KB 的 4 級轉換表和頁大小爲 64KB 的 3 級轉換表。 AArch64 Linux 使用 3 級或 4 級轉換表,其頁大小配置爲 4KB,對於用戶和內核 分別都有 39-bit (512GB) 或 48-bit (256TB) 的虛擬地址空間。 -對於頁大小爲 64KB的配置,僅使用 2 級轉換表,有 42-bit (4TB) 的虛擬地址空間,但內存布局相同。 +對於頁大小爲 64KB的配置,僅使用 2 級轉換表,有 42-bit (4TB) 的虛擬地址空間,但內存佈局相同。 用戶地址空間的 63:48 位爲 0,而內核地址空間的相應位爲 1。TTBRx 的 選擇由虛擬地址的 63 位給出。swapper_pg_dir 僅包含內核(全局)映射, @@ -46,7 +46,7 @@ AArch64 Linux 使用 3 級或 4 級轉換表,其頁大小配置爲 4KB,對 TTBR1 中,且從不寫入 TTBR0。 -AArch64 Linux 在頁大小爲 4KB,並使用 3 級轉換表時的內存布局: +AArch64 Linux 在頁大小爲 4KB,並使用 3 級轉換表時的內存佈局: 起始地址 結束地址 大小 用途 ----------------------------------------------------------------------- @@ -54,7 +54,7 @@ AArch64 Linux 在頁大小爲 4KB,並使用 3 級轉換表時的內存布局 ffffff8000000000 ffffffffffffffff 512GB 內核空間 -AArch64 Linux 在頁大小爲 4KB,並使用 4 級轉換表時的內存布局: +AArch64 Linux 在頁大小爲 4KB,並使用 4 級轉換表時的內存佈局: 起始地址 結束地址 大小 用途 ----------------------------------------------------------------------- @@ -62,7 +62,7 @@ AArch64 Linux 在頁大小爲 4KB,並使用 4 級轉換表時的內存布局 ffff000000000000 ffffffffffffffff 256TB 內核空間 -AArch64 Linux 在頁大小爲 64KB,並使用 2 級轉換表時的內存布局: +AArch64 Linux 在頁大小爲 64KB,並使用 2 級轉換表時的內存佈局: 起始地址 結束地址 大小 用途 ----------------------------------------------------------------------- @@ -70,7 +70,7 @@ AArch64 Linux 在頁大小爲 64KB,並使用 2 級轉換表時的內存布局 fffffc0000000000 ffffffffffffffff 4TB 內核空間 -AArch64 Linux 在頁大小爲 64KB,並使用 3 級轉換表時的內存布局: +AArch64 Linux 在頁大小爲 64KB,並使用 3 級轉換表時的內存佈局: 起始地址 結束地址 大小 用途 ----------------------------------------------------------------------- @@ -78,7 +78,7 @@ AArch64 Linux 在頁大小爲 64KB,並使用 3 級轉換表時的內存布局 ffff000000000000 ffffffffffffffff 256TB 內核空間 -更詳細的內核虛擬內存布局,請參閱內核啓動信息。 +更詳細的內核虛擬內存佈局,請參閱內核啓動信息。 4KB 頁大小的轉換表查找: diff --git a/Documentation/translations/zh_TW/arch/arm64/perf.rst b/Documentation/translations/zh_TW/arch/arm64/perf.rst index 645f3944a0..405d5f6696 100644 --- a/Documentation/translations/zh_TW/arch/arm64/perf.rst +++ b/Documentation/translations/zh_TW/arch/arm64/perf.rst @@ -59,7 +59,7 @@ EL2(VHE 內核 或 non-VHE 虛擬機監控器)。 KVM 客戶機可能運行在 EL0(用戶空間)和 EL1(內核)。 -由於宿主機和客戶機之間重疊的異常級別,我們不能僅僅依靠 PMU 的硬體異 +由於宿主機和客戶機之間重疊的異常級別,我們不能僅僅依靠 PMU 的硬件異 常過濾機制-因此我們必須啓用/禁用對於客戶機進入和退出的計數。而這在 VHE 和 non-VHE 系統上表現不同。 diff --git a/Documentation/translations/zh_TW/arch/arm64/silicon-errata.txt b/Documentation/translations/zh_TW/arch/arm64/silicon-errata.txt index f6f41835a5..70371807ca 100644 --- a/Documentation/translations/zh_TW/arch/arm64/silicon-errata.txt +++ b/Documentation/translations/zh_TW/arch/arm64/silicon-errata.txt @@ -28,39 +28,39 @@ Documentation/arch/arm64/silicon-errata.rst 的中文翻譯 以下爲正文 --------------------------------------------------------------------- - 晶片勘誤和軟體補救措施 + 芯片勘誤和軟件補救措施 ================== 作者: Will Deacon 日期: 2015年11月27日 -一個不幸的現實:硬體經常帶有一些所謂的「瑕疵(errata)」,導致其在 -某些特定情況下會違背構架定義的行爲。就基於 ARM 的硬體而言,這些瑕疵 +一個不幸的現實:硬件經常帶有一些所謂的“瑕疵(errata)”,導致其在 +某些特定情況下會違背構架定義的行爲。就基於 ARM 的硬件而言,這些瑕疵 大體可分爲以下幾類: A 類:無可行補救措施的嚴重缺陷。 B 類:有可接受的補救措施的重大或嚴重缺陷。 C 類:在正常操作中不會顯現的小瑕疵。 -更多資訊,請在 infocenter.arm.com (需註冊)中查閱「軟體開發者勘誤 -筆記」(「Software Developers Errata Notice」)文檔。 +更多資訊,請在 infocenter.arm.com (需註冊)中查閱“軟件開發者勘誤 +筆記”(“Software Developers Errata Notice”)文檔。 -對於 Linux 而言,B 類缺陷可能需要作業系統的某些特別處理。例如,避免 +對於 Linux 而言,B 類缺陷可能需要操作系統的某些特別處理。例如,避免 一個特殊的代碼序列,或是以一種特定的方式配置處理器。在某種不太常見的 情況下,爲將 A 類缺陷當作 C 類處理,可能需要用類似的手段。這些手段被 -統稱爲「軟體補救措施」,且僅在少數情況需要(例如,那些需要一個運行在 +統稱爲“軟件補救措施”,且僅在少數情況需要(例如,那些需要一個運行在 非安全異常級的補救措施 *並且* 能被 Linux 觸發的情況)。 -對於尚在討論中的可能對未受瑕疵影響的系統產生干擾的軟體補救措施,有一個 -相應的內核配置(Kconfig)選項被加在 「內核特性(Kernel Features)」-> -「基於可選方法框架的 ARM 瑕疵補救措施(ARM errata workarounds via +對於尚在討論中的可能對未受瑕疵影響的系統產生干擾的軟件補救措施,有一個 +相應的內核配置(Kconfig)選項被加在 “內核特性(Kernel Features)”-> +“基於可選方法框架的 ARM 瑕疵補救措施(ARM errata workarounds via the alternatives framework)"。這些選項被默認開啓,若探測到受影響的CPU, 補丁將在運行時被使用。至於對系統運行影響較小的補救措施,內核配置選項 -並不存在,且代碼以某種規避瑕疵的方式被構造(帶注釋爲宜)。 +並不存在,且代碼以某種規避瑕疵的方式被構造(帶註釋爲宜)。 -這種做法對於在任意內核原始碼樹中準確地判斷出哪個瑕疵已被軟體方法所補救 -稍微有點麻煩,所以在 Linux 內核中此文件作爲軟體補救措施的註冊表, -並將在新的軟體補救措施被提交和向後移植(backported)到穩定內核時被更新。 +這種做法對於在任意內核源代碼樹中準確地判斷出哪個瑕疵已被軟件方法所補救 +稍微有點麻煩,所以在 Linux 內核中此文件作爲軟件補救措施的註冊表, +並將在新的軟件補救措施被提交和向後移植(backported)到穩定內核時被更新。 | 實現者 | 受影響的組件 | 勘誤編號 | 內核配置 | +----------------+-----------------+-----------------+-------------------------+ diff --git a/Documentation/translations/zh_TW/arch/arm64/tagged-pointers.txt b/Documentation/translations/zh_TW/arch/arm64/tagged-pointers.txt index c0be1d1e0d..9812d99549 100644 --- a/Documentation/translations/zh_TW/arch/arm64/tagged-pointers.txt +++ b/Documentation/translations/zh_TW/arch/arm64/tagged-pointers.txt @@ -36,14 +36,14 @@ Documentation/arch/arm64/tagged-pointers.rst 的中文翻譯 AArch64 Linux 中的潛在用途。 內核提供的地址轉換表配置使通過 TTBR0 完成的虛擬地址轉換(即用戶空間 -映射),其虛擬地址的最高 8 位(63:56)會被轉換硬體所忽略。這種機制 -讓這些位可供應用程式自由使用,其注意事項如下: +映射),其虛擬地址的最高 8 位(63:56)會被轉換硬件所忽略。這種機制 +讓這些位可供應用程序自由使用,其注意事項如下: (1) 內核要求所有傳遞到 EL1 的用戶空間地址帶有 0x00 標記。 - 這意味著任何攜帶用戶空間虛擬地址的系統調用(syscall) + 這意味着任何攜帶用戶空間虛擬地址的系統調用(syscall) 參數 *必須* 在陷入內核前使它們的最高字節被清零。 - (2) 非零標記在傳遞信號時不被保存。這意味著在應用程式中利用了 + (2) 非零標記在傳遞信號時不被保存。這意味着在應用程序中利用了 標記的信號處理函數無法依賴 siginfo_t 的用戶空間虛擬 地址所攜帶的包含其內部域信息的標記。此規則的一個例外是 當信號是在調試觀察點的異常處理程序中產生的,此時標記的 @@ -53,5 +53,5 @@ AArch64 Linux 中的潛在用途。 的高字節,C 編譯器很可能無法判斷它們是不同的。 此構架會阻止對帶標記的 PC 指針的利用,因此在異常返回時,其高字節 -將被設置成一個爲 「55」 的擴展符。 +將被設置成一個爲 “55” 的擴展符。 diff --git a/Documentation/translations/zh_TW/arch/index.rst b/Documentation/translations/zh_TW/arch/index.rst new file mode 100644 index 0000000000..7c04905894 --- /dev/null +++ b/Documentation/translations/zh_TW/arch/index.rst @@ -0,0 +1,29 @@ +.. SPDX-License-Identifier: GPL-2.0 + +處理器體系結構 +============== + +以下文檔提供了具體架構實現的編程細節。 + +.. toctree:: + :maxdepth: 2 + + mips/index + arm64/index + openrisc/index + parisc/index + loongarch/index + +TODOList: + +* arm/index +* m68k/index +* nios2/index +* powerpc/index +* s390/index +* sh/index +* sparc/index +* x86/index +* xtensa/index +* ../riscv/index + diff --git a/Documentation/translations/zh_TW/arch/loongarch/booting.rst b/Documentation/translations/zh_TW/arch/loongarch/booting.rst new file mode 100644 index 0000000000..88291090ce --- /dev/null +++ b/Documentation/translations/zh_TW/arch/loongarch/booting.rst @@ -0,0 +1,49 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../../disclaimer-zh_TW.rst + +:Original: Documentation/arch/loongarch/booting.rst + +:翻譯: + + 司延騰 Yanteng Si + +==================== +啓動 Linux/LoongArch +==================== + +:作者: 司延騰 +:日期: 2022年11月18日 + +BootLoader傳遞給內核的信息 +========================== + +LoongArch支持ACPI和FDT啓動,需要傳遞給內核的信息包括memmap、initrd、cmdline、可 +選的ACPI/FDT表等。 + +內核在 `kernel_entry` 入口處被傳遞以下參數: + + - a0 = efi_boot: `efi_boot` 是一個標誌,表示這個啓動環境是否完全符合UEFI + 的要求。 + + - a1 = cmdline: `cmdline` 是一個指向內核命令行的指針。 + + - a2 = systemtable: `systemtable` 指向EFI的系統表,在這個階段涉及的所有 + 指針都是物理地址。 + +Linux/LoongArch內核鏡像文件頭 +============================= + +內核鏡像是EFI鏡像。作爲PE文件,它們有一個64字節的頭部結構體,如下所示:: + + u32 MZ_MAGIC /* "MZ", MS-DOS 頭 */ + u32 res0 = 0 /* 保留 */ + u64 kernel_entry /* 內核入口點 */ + u64 _end - _text /* 內核鏡像有效大小 */ + u64 load_offset /* 加載內核鏡像相對內存起始地址的偏移量 */ + u64 res1 = 0 /* 保留 */ + u64 res2 = 0 /* 保留 */ + u64 res3 = 0 /* 保留 */ + u32 LINUX_PE_MAGIC /* 魔術數 */ + u32 pe_header - _head /* 到PE頭的偏移量 */ + diff --git a/Documentation/translations/zh_TW/arch/loongarch/features.rst b/Documentation/translations/zh_TW/arch/loongarch/features.rst new file mode 100644 index 0000000000..c2175fd32b --- /dev/null +++ b/Documentation/translations/zh_TW/arch/loongarch/features.rst @@ -0,0 +1,9 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../../disclaimer-zh_TW.rst + +:Original: Documentation/arch/loongarch/features.rst +:Translator: Huacai Chen + +.. kernel-feat:: features loongarch + diff --git a/Documentation/translations/zh_TW/arch/loongarch/index.rst b/Documentation/translations/zh_TW/arch/loongarch/index.rst new file mode 100644 index 0000000000..7281e050fe --- /dev/null +++ b/Documentation/translations/zh_TW/arch/loongarch/index.rst @@ -0,0 +1,28 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../../disclaimer-zh_TW.rst + +:Original: Documentation/arch/loongarch/index.rst +:Translator: Huacai Chen + +================= +LoongArch體系結構 +================= + +.. toctree:: + :maxdepth: 2 + :numbered: + + introduction + booting + irq-chip-model + + features + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` + diff --git a/Documentation/translations/zh_TW/arch/loongarch/introduction.rst b/Documentation/translations/zh_TW/arch/loongarch/introduction.rst new file mode 100644 index 0000000000..a5603f9b0a --- /dev/null +++ b/Documentation/translations/zh_TW/arch/loongarch/introduction.rst @@ -0,0 +1,354 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../../disclaimer-zh_TW.rst + +:Original: Documentation/arch/loongarch/introduction.rst +:Translator: Huacai Chen + +============= +LoongArch介紹 +============= + +LoongArch是一種新的RISC ISA,在一定程度上類似於MIPS和RISC-V。LoongArch指令集 +包括一個精簡32位版(LA32R)、一個標準32位版(LA32S)、一個64位版(LA64)。 +LoongArch定義了四個特權級(PLV0~PLV3),其中PLV0是最高特權級,用於內核;而PLV3 +是最低特權級,用於應用程序。本文檔介紹了LoongArch的寄存器、基礎指令集、虛擬內 +存以及其他一些主題。 + +寄存器 +====== + +LoongArch的寄存器包括通用寄存器(GPRs)、浮點寄存器(FPRs)、向量寄存器(VRs) +和用於特權模式(PLV0)的控制狀態寄存器(CSRs)。 + +通用寄存器 +---------- + +LoongArch包括32個通用寄存器( ``$r0`` ~ ``$r31`` ),LA32中每個寄存器爲32位寬, +LA64中每個寄存器爲64位寬。 ``$r0`` 的內容總是固定爲0,而其他寄存器在體系結構層面 +沒有特殊功能。( ``$r1`` 算是一個例外,在BL指令中固定用作鏈接返回寄存器。) + +內核使用了一套LoongArch寄存器約定,定義在LoongArch ELF psABI規範中,詳細描述參見 +:ref:`參考文獻 `: + +================= =============== =================== ========== +寄存器名 別名 用途 跨調用保持 +================= =============== =================== ========== +``$r0`` ``$zero`` 常量0 不使用 +``$r1`` ``$ra`` 返回地址 否 +``$r2`` ``$tp`` TLS/線程信息指針 不使用 +``$r3`` ``$sp`` 棧指針 是 +``$r4``-``$r11`` ``$a0``-``$a7`` 參數寄存器 否 +``$r4``-``$r5`` ``$v0``-``$v1`` 返回值 否 +``$r12``-``$r20`` ``$t0``-``$t8`` 臨時寄存器 否 +``$r21`` ``$u0`` 每CPU變量基地址 不使用 +``$r22`` ``$fp`` 幀指針 是 +``$r23``-``$r31`` ``$s0``-``$s8`` 靜態寄存器 是 +================= =============== =================== ========== + +.. note:: + 注意: ``$r21`` 寄存器在ELF psABI中保留未使用,但是在Linux內核用於保 + 存每CPU變量基地址。該寄存器沒有ABI命名,不過在內核中稱爲 ``$u0`` 。在 + 一些遺留代碼中有時可能見到 ``$v0`` 和 ``$v1`` ,它們是 ``$a0`` 和 + ``$a1`` 的別名,屬於已經廢棄的用法。 + +浮點寄存器 +---------- + +當系統中存在FPU時,LoongArch有32個浮點寄存器( ``$f0`` ~ ``$f31`` )。在LA64 +的CPU核上,每個寄存器均爲64位寬。 + +浮點寄存器的使用約定與LoongArch ELF psABI規範的描述相同: + +================= ================== =================== ========== +寄存器名 別名 用途 跨調用保持 +================= ================== =================== ========== +``$f0``-``$f7`` ``$fa0``-``$fa7`` 參數寄存器 否 +``$f0``-``$f1`` ``$fv0``-``$fv1`` 返回值 否 +``$f8``-``$f23`` ``$ft0``-``$ft15`` 臨時寄存器 否 +``$f24``-``$f31`` ``$fs0``-``$fs7`` 靜態寄存器 是 +================= ================== =================== ========== + +.. note:: + 注意:在一些遺留代碼中有時可能見到 ``$fv0`` 和 ``$fv1`` ,它們是 + ``$fa0`` 和 ``$fa1`` 的別名,屬於已經廢棄的用法。 + + +向量寄存器 +---------- + +LoongArch現有兩種向量擴展: + +- 128位向量擴展LSX(全稱Loongson SIMD eXtention), +- 256位向量擴展LASX(全稱Loongson Advanced SIMD eXtention)。 + +LSX使用 ``$v0`` ~ ``$v31`` 向量寄存器,而LASX則使用 ``$x0`` ~ ``$x31`` 。 + +浮點寄存器和向量寄存器是複用的,比如:在一個實現了LSX和LASX的核上, ``$x0`` 的 +低128位與 ``$v0`` 共用, ``$v0`` 的低64位與 ``$f0`` 共用,其他寄存器依此類推。 + +控制狀態寄存器 +-------------- + +控制狀態寄存器只能在特權模式(PLV0)下訪問: + +================= ==================================== ========== +地址 全稱描述 簡稱 +================= ==================================== ========== +0x0 當前模式信息 CRMD +0x1 異常前模式信息 PRMD +0x2 擴展部件使能 EUEN +0x3 雜項控制 MISC +0x4 異常配置 ECFG +0x5 異常狀態 ESTAT +0x6 異常返回地址 ERA +0x7 出錯(Faulting)虛擬地址 BADV +0x8 出錯(Faulting)指令字 BADI +0xC 異常入口地址 EENTRY +0x10 TLB索引 TLBIDX +0x11 TLB表項高位 TLBEHI +0x12 TLB表項低位0 TLBELO0 +0x13 TLB表項低位1 TLBELO1 +0x18 地址空間標識符 ASID +0x19 低半地址空間頁全局目錄基址 PGDL +0x1A 高半地址空間頁全局目錄基址 PGDH +0x1B 頁全局目錄基址 PGD +0x1C 頁表遍歷控制低半部分 PWCL +0x1D 頁表遍歷控制高半部分 PWCH +0x1E STLB頁大小 STLBPS +0x1F 縮減虛地址配置 RVACFG +0x20 CPU編號 CPUID +0x21 特權資源配置信息1 PRCFG1 +0x22 特權資源配置信息2 PRCFG2 +0x23 特權資源配置信息3 PRCFG3 +0x30+n (0≤n≤15) 數據保存寄存器 SAVEn +0x40 定時器編號 TID +0x41 定時器配置 TCFG +0x42 定時器值 TVAL +0x43 計時器補償 CNTC +0x44 定時器中斷清除 TICLR +0x60 LLBit相關控制 LLBCTL +0x80 實現相關控制1 IMPCTL1 +0x81 實現相關控制2 IMPCTL2 +0x88 TLB重填異常入口地址 TLBRENTRY +0x89 TLB重填異常出錯(Faulting)虛地址 TLBRBADV +0x8A TLB重填異常返回地址 TLBRERA +0x8B TLB重填異常數據保存 TLBRSAVE +0x8C TLB重填異常表項低位0 TLBRELO0 +0x8D TLB重填異常表項低位1 TLBRELO1 +0x8E TLB重填異常表項高位 TLBEHI +0x8F TLB重填異常前模式信息 TLBRPRMD +0x90 機器錯誤控制 MERRCTL +0x91 機器錯誤信息1 MERRINFO1 +0x92 機器錯誤信息2 MERRINFO2 +0x93 機器錯誤異常入口地址 MERRENTRY +0x94 機器錯誤異常返回地址 MERRERA +0x95 機器錯誤異常數據保存 MERRSAVE +0x98 高速緩存標籤 CTAG +0x180+n (0≤n≤3) 直接映射配置窗口n DMWn +0x200+2n (0≤n≤31) 性能監測配置n PMCFGn +0x201+2n (0≤n≤31) 性能監測計數器n PMCNTn +0x300 內存讀寫監視點整體控制 MWPC +0x301 內存讀寫監視點整體狀態 MWPS +0x310+8n (0≤n≤7) 內存讀寫監視點n配置1 MWPnCFG1 +0x311+8n (0≤n≤7) 內存讀寫監視點n配置2 MWPnCFG2 +0x312+8n (0≤n≤7) 內存讀寫監視點n配置3 MWPnCFG3 +0x313+8n (0≤n≤7) 內存讀寫監視點n配置4 MWPnCFG4 +0x380 取指監視點整體控制 FWPC +0x381 取指監視點整體狀態 FWPS +0x390+8n (0≤n≤7) 取指監視點n配置1 FWPnCFG1 +0x391+8n (0≤n≤7) 取指監視點n配置2 FWPnCFG2 +0x392+8n (0≤n≤7) 取指監視點n配置3 FWPnCFG3 +0x393+8n (0≤n≤7) 取指監視點n配置4 FWPnCFG4 +0x500 調試寄存器 DBG +0x501 調試異常返回地址 DERA +0x502 調試數據保存 DSAVE +================= ==================================== ========== + +ERA,TLBRERA,MERRERA和DERA有時也分別稱爲EPC,TLBREPC,MERREPC和DEPC。 + +基礎指令集 +========== + +指令格式 +-------- + +LoongArch的指令字長爲32位,一共有9種基本指令格式(以及一些變體): + +=========== ========================== +格式名稱 指令構成 +=========== ========================== +2R Opcode + Rj + Rd +3R Opcode + Rk + Rj + Rd +4R Opcode + Ra + Rk + Rj + Rd +2RI8 Opcode + I8 + Rj + Rd +2RI12 Opcode + I12 + Rj + Rd +2RI14 Opcode + I14 + Rj + Rd +2RI16 Opcode + I16 + Rj + Rd +1RI21 Opcode + I21L + Rj + I21H +I26 Opcode + I26L + I26H +=========== ========================== + +Opcode是指令操作碼,Rj和Rk是源操作數(寄存器),Rd是目標操作數(寄存器),Ra是 +4R-type格式特有的附加操作數(寄存器)。I8/I12/I14/I16/I21/I26分別是8位/12位/14位/ +16位/21位/26位的立即數。其中較長的21位和26位立即數在指令字中被分割爲高位部分與低位 +部分,所以你們在這裏的格式描述中能夠看到I21L/I21H和I26L/I26H這樣帶後綴的表述。 + +指令列表 +-------- + +爲了簡便起見,我們在此只羅列一下指令名稱(助記符),需要詳細信息請閱讀 +:ref:`參考文獻 ` 中的文檔。 + +1. 算術運算指令:: + + ADD.W SUB.W ADDI.W ADD.D SUB.D ADDI.D + SLT SLTU SLTI SLTUI + AND OR NOR XOR ANDN ORN ANDI ORI XORI + MUL.W MULH.W MULH.WU DIV.W DIV.WU MOD.W MOD.WU + MUL.D MULH.D MULH.DU DIV.D DIV.DU MOD.D MOD.DU + PCADDI PCADDU12I PCADDU18I + LU12I.W LU32I.D LU52I.D ADDU16I.D + +2. 移位運算指令:: + + SLL.W SRL.W SRA.W ROTR.W SLLI.W SRLI.W SRAI.W ROTRI.W + SLL.D SRL.D SRA.D ROTR.D SLLI.D SRLI.D SRAI.D ROTRI.D + +3. 位域操作指令:: + + EXT.W.B EXT.W.H CLO.W CLO.D SLZ.W CLZ.D CTO.W CTO.D CTZ.W CTZ.D + BYTEPICK.W BYTEPICK.D BSTRINS.W BSTRINS.D BSTRPICK.W BSTRPICK.D + REVB.2H REVB.4H REVB.2W REVB.D REVH.2W REVH.D BITREV.4B BITREV.8B BITREV.W BITREV.D + MASKEQZ MASKNEZ + +4. 分支轉移指令:: + + BEQ BNE BLT BGE BLTU BGEU BEQZ BNEZ B BL JIRL + +5. 訪存讀寫指令:: + + LD.B LD.BU LD.H LD.HU LD.W LD.WU LD.D ST.B ST.H ST.W ST.D + LDX.B LDX.BU LDX.H LDX.HU LDX.W LDX.WU LDX.D STX.B STX.H STX.W STX.D + LDPTR.W LDPTR.D STPTR.W STPTR.D + PRELD PRELDX + +6. 原子操作指令:: + + LL.W SC.W LL.D SC.D + AMSWAP.W AMSWAP.D AMADD.W AMADD.D AMAND.W AMAND.D AMOR.W AMOR.D AMXOR.W AMXOR.D + AMMAX.W AMMAX.D AMMIN.W AMMIN.D + +7. 柵障指令:: + + IBAR DBAR + +8. 特殊指令:: + + SYSCALL BREAK CPUCFG NOP IDLE ERTN(ERET) DBCL(DBGCALL) RDTIMEL.W RDTIMEH.W RDTIME.D + ASRTLE.D ASRTGT.D + +9. 特權指令:: + + CSRRD CSRWR CSRXCHG + IOCSRRD.B IOCSRRD.H IOCSRRD.W IOCSRRD.D IOCSRWR.B IOCSRWR.H IOCSRWR.W IOCSRWR.D + CACOP TLBP(TLBSRCH) TLBRD TLBWR TLBFILL TLBCLR TLBFLUSH INVTLB LDDIR LDPTE + +虛擬內存 +======== + +LoongArch可以使用直接映射虛擬內存和分頁映射虛擬內存。 + +直接映射虛擬內存通過CSR.DMWn(n=0~3)來進行配置,虛擬地址(VA)和物理地址(PA) +之間有簡單的映射關係:: + + VA = PA + 固定偏移 + +分頁映射的虛擬地址(VA)和物理地址(PA)有任意的映射關係,這種關係記錄在TLB和頁 +表中。LoongArch的TLB包括一個全相聯的MTLB(Multiple Page Size TLB,多樣頁大小TLB) +和一個組相聯的STLB(Single Page Size TLB,單一頁大小TLB)。 + +缺省狀態下,LA32的整個虛擬地址空間配置如下: + +============ =========================== =========================== +區段名 地址範圍 屬性 +============ =========================== =========================== +``UVRANGE`` ``0x00000000 - 0x7FFFFFFF`` 分頁映射, 可緩存, PLV0~3 +``KPRANGE0`` ``0x80000000 - 0x9FFFFFFF`` 直接映射, 非緩存, PLV0 +``KPRANGE1`` ``0xA0000000 - 0xBFFFFFFF`` 直接映射, 可緩存, PLV0 +``KVRANGE`` ``0xC0000000 - 0xFFFFFFFF`` 分頁映射, 可緩存, PLV0 +============ =========================== =========================== + +用戶態(PLV3)只能訪問UVRANGE,對於直接映射的KPRANGE0和KPRANGE1,將虛擬地址的第 +30~31位清零就等於物理地址。例如:物理地址0x00001000對應的非緩存直接映射虛擬地址 +是0x80001000,而其可緩存直接映射虛擬地址是0xA0001000。 + +缺省狀態下,LA64的整個虛擬地址空間配置如下: + +============ ====================== ================================== +區段名 地址範圍 屬性 +============ ====================== ================================== +``XUVRANGE`` ``0x0000000000000000 - 分頁映射, 可緩存, PLV0~3 + 0x3FFFFFFFFFFFFFFF`` +``XSPRANGE`` ``0x4000000000000000 - 直接映射, 可緩存 / 非緩存, PLV0 + 0x7FFFFFFFFFFFFFFF`` +``XKPRANGE`` ``0x8000000000000000 - 直接映射, 可緩存 / 非緩存, PLV0 + 0xBFFFFFFFFFFFFFFF`` +``XKVRANGE`` ``0xC000000000000000 - 分頁映射, 可緩存, PLV0 + 0xFFFFFFFFFFFFFFFF`` +============ ====================== ================================== + +用戶態(PLV3)只能訪問XUVRANGE,對於直接映射的XSPRANGE和XKPRANGE,將虛擬地址的第 +60~63位清零就等於物理地址,而其緩存屬性是通過虛擬地址的第60~61位配置的(0表示強序 +非緩存,1表示一致可緩存,2表示弱序非緩存)。 + +目前,我們僅用XKPRANGE來進行直接映射,XSPRANGE保留給以後用。 + +此處給出一個直接映射的例子:物理地址0x00000000_00001000的強序非緩存直接映射虛擬地址 +(在XKPRANGE中)是0x80000000_00001000,其一致可緩存直接映射虛擬地址(在XKPRANGE中) +是0x90000000_00001000,而其弱序非緩存直接映射虛擬地址(在XKPRANGE中)是0xA0000000_ +00001000。 + +Loongson與LoongArch的關係 +========================= + +LoongArch是一種RISC指令集架構(ISA),不同於現存的任何一種ISA,而Loongson(即龍 +芯)是一個處理器家族。龍芯包括三個系列:Loongson-1(龍芯1號)是32位處理器系列, +Loongson-2(龍芯2號)是低端64位處理器系列,而Loongson-3(龍芯3號)是高端64位處理 +器系列。舊的龍芯處理器基於MIPS架構,而新的龍芯處理器基於LoongArch架構。以龍芯3號 +爲例:龍芯3A1000/3B1500/3A2000/3A3000/3A4000都是兼容MIPS的,而龍芯3A5000(以及將 +來的型號)都是基於LoongArch的。 + +.. _loongarch-references-zh_TW: + +參考文獻 +======== + +Loongson官方網站(龍芯中科技術股份有限公司): + + http://www.loongson.cn/ + +Loongson與LoongArch的開發者網站(軟件與文檔資源): + + http://www.loongnix.cn/ + + https://github.com/loongson/ + + https://loongson.github.io/LoongArch-Documentation/ + +LoongArch指令集架構的文檔: + + https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.02-CN.pdf (中文版) + + https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.02-EN.pdf (英文版) + +LoongArch的ELF psABI文檔: + + https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-ELF-ABI-v2.01-CN.pdf (中文版) + + https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-ELF-ABI-v2.01-EN.pdf (英文版) + +Loongson與LoongArch的Linux內核源碼倉庫: + + https://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson.git + diff --git a/Documentation/translations/zh_TW/arch/loongarch/irq-chip-model.rst b/Documentation/translations/zh_TW/arch/loongarch/irq-chip-model.rst new file mode 100644 index 0000000000..dbe9595bbf --- /dev/null +++ b/Documentation/translations/zh_TW/arch/loongarch/irq-chip-model.rst @@ -0,0 +1,158 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../../disclaimer-zh_TW.rst + +:Original: Documentation/arch/loongarch/irq-chip-model.rst +:Translator: Huacai Chen + +================================== +LoongArch的IRQ芯片模型(層級關係) +================================== + +目前,基於LoongArch的處理器(如龍芯3A5000)只能與LS7A芯片組配合工作。LoongArch計算機 +中的中斷控制器(即IRQ芯片)包括CPUINTC(CPU Core Interrupt Controller)、LIOINTC( +Legacy I/O Interrupt Controller)、EIOINTC(Extended I/O Interrupt Controller)、 +HTVECINTC(Hyper-Transport Vector Interrupt Controller)、PCH-PIC(LS7A芯片組的主中 +斷控制器)、PCH-LPC(LS7A芯片組的LPC中斷控制器)和PCH-MSI(MSI中斷控制器)。 + +CPUINTC是一種CPU內部的每個核本地的中斷控制器,LIOINTC/EIOINTC/HTVECINTC是CPU內部的 +全局中斷控制器(每個芯片一個,所有核共享),而PCH-PIC/PCH-LPC/PCH-MSI是CPU外部的中 +斷控制器(在配套芯片組裏面)。這些中斷控制器(或者說IRQ芯片)以一種層次樹的組織形式 +級聯在一起,一共有兩種層級關係模型(傳統IRQ模型和擴展IRQ模型)。 + +傳統IRQ模型 +=========== + +在這種模型裏面,IPI(Inter-Processor Interrupt)和CPU本地時鐘中斷直接發送到CPUINTC, +CPU串口(UARTs)中斷髮送到LIOINTC,而其他所有設備的中斷則分別發送到所連接的PCH-PIC/ +PCH-LPC/PCH-MSI,然後被HTVECINTC統一收集,再發送到LIOINTC,最後到達CPUINTC:: + + +-----+ +---------+ +-------+ + | IPI | --> | CPUINTC | <-- | Timer | + +-----+ +---------+ +-------+ + ^ + | + +---------+ +-------+ + | LIOINTC | <-- | UARTs | + +---------+ +-------+ + ^ + | + +-----------+ + | HTVECINTC | + +-----------+ + ^ ^ + | | + +---------+ +---------+ + | PCH-PIC | | PCH-MSI | + +---------+ +---------+ + ^ ^ ^ + | | | + +---------+ +---------+ +---------+ + | PCH-LPC | | Devices | | Devices | + +---------+ +---------+ +---------+ + ^ + | + +---------+ + | Devices | + +---------+ + +擴展IRQ模型 +=========== + +在這種模型裏面,IPI(Inter-Processor Interrupt)和CPU本地時鐘中斷直接發送到CPUINTC, +CPU串口(UARTs)中斷髮送到LIOINTC,而其他所有設備的中斷則分別發送到所連接的PCH-PIC/ +PCH-LPC/PCH-MSI,然後被EIOINTC統一收集,再直接到達CPUINTC:: + + +-----+ +---------+ +-------+ + | IPI | --> | CPUINTC | <-- | Timer | + +-----+ +---------+ +-------+ + ^ ^ + | | + +---------+ +---------+ +-------+ + | EIOINTC | | LIOINTC | <-- | UARTs | + +---------+ +---------+ +-------+ + ^ ^ + | | + +---------+ +---------+ + | PCH-PIC | | PCH-MSI | + +---------+ +---------+ + ^ ^ ^ + | | | + +---------+ +---------+ +---------+ + | PCH-LPC | | Devices | | Devices | + +---------+ +---------+ +---------+ + ^ + | + +---------+ + | Devices | + +---------+ + +ACPI相關的定義 +============== + +CPUINTC:: + + ACPI_MADT_TYPE_CORE_PIC; + struct acpi_madt_core_pic; + enum acpi_madt_core_pic_version; + +LIOINTC:: + + ACPI_MADT_TYPE_LIO_PIC; + struct acpi_madt_lio_pic; + enum acpi_madt_lio_pic_version; + +EIOINTC:: + + ACPI_MADT_TYPE_EIO_PIC; + struct acpi_madt_eio_pic; + enum acpi_madt_eio_pic_version; + +HTVECINTC:: + + ACPI_MADT_TYPE_HT_PIC; + struct acpi_madt_ht_pic; + enum acpi_madt_ht_pic_version; + +PCH-PIC:: + + ACPI_MADT_TYPE_BIO_PIC; + struct acpi_madt_bio_pic; + enum acpi_madt_bio_pic_version; + +PCH-MSI:: + + ACPI_MADT_TYPE_MSI_PIC; + struct acpi_madt_msi_pic; + enum acpi_madt_msi_pic_version; + +PCH-LPC:: + + ACPI_MADT_TYPE_LPC_PIC; + struct acpi_madt_lpc_pic; + enum acpi_madt_lpc_pic_version; + +參考文獻 +======== + +龍芯3A5000的文檔: + + https://github.com/loongson/LoongArch-Documentation/releases/latest/download/Loongson-3A5000-usermanual-1.02-CN.pdf (中文版) + + https://github.com/loongson/LoongArch-Documentation/releases/latest/download/Loongson-3A5000-usermanual-1.02-EN.pdf (英文版) + +龍芯LS7A芯片組的文檔: + + https://github.com/loongson/LoongArch-Documentation/releases/latest/download/Loongson-7A1000-usermanual-2.00-CN.pdf (中文版) + + https://github.com/loongson/LoongArch-Documentation/releases/latest/download/Loongson-7A1000-usermanual-2.00-EN.pdf (英文版) + +.. note:: + - CPUINTC:即《龍芯架構參考手冊卷一》第7.4節所描述的CSR.ECFG/CSR.ESTAT寄存器及其 + 中斷控制邏輯; + - LIOINTC:即《龍芯3A5000處理器使用手冊》第11.1節所描述的“傳統I/O中斷”; + - EIOINTC:即《龍芯3A5000處理器使用手冊》第11.2節所描述的“擴展I/O中斷”; + - HTVECINTC:即《龍芯3A5000處理器使用手冊》第14.3節所描述的“HyperTransport中斷”; + - PCH-PIC/PCH-MSI:即《龍芯7A1000橋片用戶手冊》第5章所描述的“中斷控制器”; + - PCH-LPC:即《龍芯7A1000橋片用戶手冊》第24.3節所描述的“LPC中斷”。 + diff --git a/Documentation/translations/zh_TW/arch/mips/booting.rst b/Documentation/translations/zh_TW/arch/mips/booting.rst new file mode 100644 index 0000000000..7e104abf5a --- /dev/null +++ b/Documentation/translations/zh_TW/arch/mips/booting.rst @@ -0,0 +1,35 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../../disclaimer-zh_TW.rst + +:Original: Documentation/arch/mips/booting.rst + +:翻譯: + + 司延騰 Yanteng Si + +.. _tw_booting: + +BMIPS設備樹引導 +------------------------ + + 一些bootloaders只支持在內核鏡像開始地址處的單一入口點。而其它 + bootloaders將跳轉到ELF的開始地址處。兩種方案都支持的;因爲 + CONFIG_BOOT_RAW=y and CONFIG_NO_EXCEPT_FILL=y, 所以第一條指令 + 會立即跳轉到kernel_entry()入口處執行。 + + 與arch/arm情況(b)類似,dt感知的引導加載程序需要設置以下寄存器: + + a0 : 0 + + a1 : 0xffffffff + + a2 : RAM中指向設備樹塊的物理指針(在chapterII中定義)。 + 設備樹可以位於前512MB物理地址空間(0x00000000 - + 0x1fffffff)的任何位置,以64位邊界對齊。 + + 傳統bootloaders不會使用這樣的約定,並且它們不傳入DT塊。 + 在這種情況下,Linux將通過選中CONFIG_DT_*查找DTB。 + + 以上約定只在32位系統中定義,因爲目前沒有任何64位的BMIPS實現。 + diff --git a/Documentation/translations/zh_TW/arch/mips/features.rst b/Documentation/translations/zh_TW/arch/mips/features.rst new file mode 100644 index 0000000000..3d3906c4d0 --- /dev/null +++ b/Documentation/translations/zh_TW/arch/mips/features.rst @@ -0,0 +1,14 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../../disclaimer-zh_TW.rst + +:Original: Documentation/arch/mips/features.rst + +:翻譯: + + 司延騰 Yanteng Si + +.. _tw_features: + +.. kernel-feat:: features mips + diff --git a/Documentation/translations/zh_TW/arch/mips/index.rst b/Documentation/translations/zh_TW/arch/mips/index.rst new file mode 100644 index 0000000000..4b7d288064 --- /dev/null +++ b/Documentation/translations/zh_TW/arch/mips/index.rst @@ -0,0 +1,30 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../../disclaimer-zh_TW.rst + +:Original: Documentation/arch/mips/index.rst + +:翻譯: + + 司延騰 Yanteng Si + +=========================== +MIPS特性文檔 +=========================== + +.. toctree:: + :maxdepth: 2 + :numbered: + + booting + ingenic-tcu + + features + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` + diff --git a/Documentation/translations/zh_TW/arch/mips/ingenic-tcu.rst b/Documentation/translations/zh_TW/arch/mips/ingenic-tcu.rst new file mode 100644 index 0000000000..4385c0f3e9 --- /dev/null +++ b/Documentation/translations/zh_TW/arch/mips/ingenic-tcu.rst @@ -0,0 +1,73 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../../disclaimer-zh_TW.rst + +:Original: Documentation/arch/mips/ingenic-tcu.rst + +:翻譯: + + 司延騰 Yanteng Si + +.. _tw_ingenic-tcu: + +=============================================== +君正 JZ47xx SoC定時器/計數器硬件單元 +=============================================== + +君正 JZ47xx SoC中的定時器/計數器單元(TCU)是一個多功能硬件塊。它有多達 +8個通道,可以用作計數器,計時器,或脈衝寬度調製器。 + +- JZ4725B, JZ4750, JZ4755 只有6個TCU通道。其它SoC都有8個通道。 + +- JZ4725B引入了一個獨立的通道,稱爲操作系統計時器(OST)。這是一個32位可 + 編程定時器。在JZ4760B及以上型號上,它是64位的。 + +- 每個TCU通道都有自己的時鐘源,可以通過 TCSR 寄存器設置通道的父級時鐘 + 源(pclk、ext、rtc)、開關以及分頻。 + + - 看門狗和OST硬件模塊在它們的寄存器空間中也有相同形式的TCSR寄存器。 + - 用於關閉/開啓的 TCU 寄存器也可以關閉/開啓看門狗和 OST 時鐘。 + +- 每個TCU通道在兩種模式的其中一種模式下運行: + + - 模式 TCU1:通道無法在睡眠模式下運行,但更易於操作。 + - 模式 TCU2:通道可以在睡眠模式下運行,但操作比 TCU1 通道複雜一些。 + +- 每個 TCU 通道的模式取決於使用的SoC: + + - 在最老的SoC(高於JZ4740),八個通道都運行在TCU1模式。 + - 在 JZ4725B,通道5運行在TCU2,其它通道則運行在TCU1。 + - 在最新的SoC(JZ4750及之後),通道1-2運行在TCU2,其它通道則運行 + 在TCU1。 + +- 每個通道都可以生成中斷。有些通道共享一條中斷線,而有些沒有,其在SoC型 + 號之間的變更: + + - 在很老的SoC(JZ4740及更低),通道0和通道1有它們自己的中斷線;通 + 道2-7共享最後一條中斷線。 + - 在 JZ4725B,通道0有它自己的中斷線;通道1-5共享一條中斷線;OST + 使用最後一條中斷線。 + - 在比較新的SoC(JZ4750及以後),通道5有它自己的中斷線;通 + 道0-4和(如果是8通道)6-7全部共享一條中斷線;OST使用最後一條中 + 斷線。 + +實現 +==== + +TCU硬件的功能分佈在多個驅動程序: + +============== =================================== +時鐘 drivers/clk/ingenic/tcu.c +中斷 drivers/irqchip/irq-ingenic-tcu.c +定時器 drivers/clocksource/ingenic-timer.c +OST drivers/clocksource/ingenic-ost.c +脈衝寬度調製器 drivers/pwm/pwm-jz4740.c +看門狗 drivers/watchdog/jz4740_wdt.c +============== =================================== + +因爲可以從相同的寄存器控制屬於不同驅動程序和框架的TCU的各種功能,所以 +所有這些驅動程序都通過相同的控制總線通用接口訪問它們的寄存器。 + +有關TCU驅動程序的設備樹綁定的更多信息,請參閱: +Documentation/devicetree/bindings/timer/ingenic,tcu.yaml. + diff --git a/Documentation/translations/zh_TW/arch/openrisc/index.rst b/Documentation/translations/zh_TW/arch/openrisc/index.rst new file mode 100644 index 0000000000..7585960783 --- /dev/null +++ b/Documentation/translations/zh_TW/arch/openrisc/index.rst @@ -0,0 +1,33 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../../disclaimer-zh_TW.rst + +:Original: Documentation/arch/openrisc/index.rst + +:翻譯: + + 司延騰 Yanteng Si + +.. _tw_openrisc_index: + +================= +OpenRISC 體系架構 +================= + +.. toctree:: + :maxdepth: 2 + + openrisc_port + todo + +Todolist: + features + + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` + diff --git a/Documentation/translations/zh_TW/arch/openrisc/openrisc_port.rst b/Documentation/translations/zh_TW/arch/openrisc/openrisc_port.rst new file mode 100644 index 0000000000..422fe9f7a3 --- /dev/null +++ b/Documentation/translations/zh_TW/arch/openrisc/openrisc_port.rst @@ -0,0 +1,128 @@ +.. include:: ../../disclaimer-zh_TW.rst + +:Original: Documentation/arch/openrisc/openrisc_port.rst + +:翻譯: + + 司延騰 Yanteng Si + +.. _tw_openrisc_port: + +============== +OpenRISC Linux +============== + +這是Linux對OpenRISC類微處理器的移植;具體來說,最早移植目標是32位 +OpenRISC 1000系列(或1k)。 + +關於OpenRISC處理器和正在進行中的開發的信息: + + ======= ============================= + 網站 https://openrisc.io + 郵箱 openrisc@lists.librecores.org + ======= ============================= + +--------------------------------------------------------------------- + +OpenRISC工具鏈和Linux的構建指南 +=============================== + +爲了構建和運行Linux for OpenRISC,你至少需要一個基本的工具鏈,或許 +還需要架構模擬器。 這裏概述了準備就位這些部分的步驟。 + +1) 工具鏈 + +工具鏈二進制文件可以從openrisc.io或我們的github發佈頁面獲得。不同 +工具鏈的構建指南可以在openrisc.io或Stafford的工具鏈構建和發佈腳本 +中找到。 + + ====== ================================================= + 二進制 https://github.com/openrisc/or1k-gcc/releases + 工具鏈 https://openrisc.io/software + 構建 https://github.com/stffrdhrn/or1k-toolchain-build + ====== ================================================= + +2) 構建 + +像往常一樣構建Linux內核:: + + make ARCH=openrisc CROSS_COMPILE="or1k-linux-" defconfig + make ARCH=openrisc CROSS_COMPILE="or1k-linux-" + +3) 在FPGA上運行(可選) + +OpenRISC社區通常使用FuseSoC來管理構建和編程SoC到FPGA中。 下面是用 +OpenRISC SoC對De0 Nano開發板進行編程的一個例子。 在構建過程中, +FPGA RTL是從FuseSoC IP核庫中下載的代碼,並使用FPGA供應商工具構建。 +二進制文件用openocd加載到電路板上。 + +:: + + git clone https://github.com/olofk/fusesoc + cd fusesoc + sudo pip install -e . + + fusesoc init + fusesoc build de0_nano + fusesoc pgm de0_nano + + openocd -f interface/altera-usb-blaster.cfg \ + -f board/or1k_generic.cfg + + telnet localhost 4444 + > init + > halt; load_image vmlinux ; reset + +4) 在模擬器上運行(可選) + +QEMU是一個處理器仿真器,我們推薦它來模擬OpenRISC平臺。 請按照QEMU網 +站上的OpenRISC說明,讓Linux在QEMU上運行。 你可以自己構建QEMU,但你的 +Linux發行版可能提供了支持OpenRISC的二進制包。 + + ============= ====================================================== + qemu openrisc https://wiki.qemu.org/Documentation/Platforms/OpenRISC + ============= ====================================================== + +--------------------------------------------------------------------- + +術語表 +====== + +代碼中使用了以下符號約定以將範圍限制在幾個特定處理器實現上: + +========= ======================= +openrisc: OpenRISC類型處理器 +or1k: OpenRISC 1000系列處理器 +or1200: OpenRISC 1200處理器 +========= ======================= + +--------------------------------------------------------------------- + +歷史 +==== + +2003-11-18 Matjaz Breskvar (phoenix@bsemi.com) + 將linux初步移植到OpenRISC或32架構。 + 所有的核心功能都實現了,並且可以使用。 + +2003-12-08 Matjaz Breskvar (phoenix@bsemi.com) + 徹底改變TLB失誤處理。 + 重寫異常處理。 + 在默認的initrd中實現了sash-3.6的所有功能。 + 大幅改進的版本。 + +2004-04-10 Matjaz Breskvar (phoenix@bsemi.com) + 大量的bug修復。 + 支持以太網,http和telnet服務器功能。 + 可以運行許多標準的linux應用程序。 + +2004-06-26 Matjaz Breskvar (phoenix@bsemi.com) + 移植到2.6.x。 + +2004-11-30 Matjaz Breskvar (phoenix@bsemi.com) + 大量的bug修復和增強功能。 + 增加了opencores framebuffer驅動。 + +2010-10-09 Jonas Bonn (jonas@southpole.se) + 重大重寫,使其與上游的Linux 2.6.36看齊。 + diff --git a/Documentation/translations/zh_TW/arch/openrisc/todo.rst b/Documentation/translations/zh_TW/arch/openrisc/todo.rst new file mode 100644 index 0000000000..df261b9e30 --- /dev/null +++ b/Documentation/translations/zh_TW/arch/openrisc/todo.rst @@ -0,0 +1,24 @@ +.. include:: ../../disclaimer-zh_TW.rst + +:Original: Documentation/arch/openrisc/todo.rst + +:翻譯: + + 司延騰 Yanteng Si + +.. _tw_openrisc_todo.rst: + +======== +待辦事項 +======== + +OpenRISC Linux的移植已經完全投入使用,並且從 2.6.35 開始就一直在上游同步。 +然而,還有一些剩餘的項目需要在未來幾個月內完成。 下面是一個即將進行調查的已知 +不盡完美的項目列表,即我們的待辦事項列表。 + +- 實現其餘的DMA API……dma_map_sg等。 + +- 完成重命名清理工作……代碼中提到了or32,這是架構的一個老名字。 我們 + 已經確定的名字是or1k,這個改變正在以緩慢積累的方式進行。 目前,or32相當 + 於or1k。 + diff --git a/Documentation/translations/zh_TW/arch/parisc/debugging.rst b/Documentation/translations/zh_TW/arch/parisc/debugging.rst new file mode 100644 index 0000000000..c9ee804aeb --- /dev/null +++ b/Documentation/translations/zh_TW/arch/parisc/debugging.rst @@ -0,0 +1,46 @@ +.. include:: ../../disclaimer-zh_TW.rst + +:Original: Documentation/arch/parisc/debugging.rst + +:翻譯: + + 司延騰 Yanteng Si + +.. _tw_parisc_debugging: + +================= +調試PA-RISC +================= + +好吧,這裏有一些關於調試linux/parisc的較底層部分的信息。 + + +1. 絕對地址 +===================== + +很多彙編代碼目前運行在實模式下,這意味着會使用絕對地址,而不是像內核其他 +部分那樣使用虛擬地址。要將絕對地址轉換爲虛擬地址,你可以在System.map中查 +找,添加__PAGE_OFFSET(目前是0x10000000)。 + + +2. HPMCs +======== + +當實模式的代碼試圖訪問不存在的內存時,會出現HPMC(high priority machine +check)而不是內核oops。若要調試HPMC,請嘗試找到系統響應程序/請求程序地址。 +系統請求程序地址應該與(某)處理器的HPA(I/O範圍內的高地址)相匹配;系統響應程 +序地址是實模式代碼試圖訪問的地址。 + +系統響應程序地址的典型值是大於__PAGE_OFFSET (0x10000000)的地址,這意味着 +在實模式試圖訪問它之前,虛擬地址沒有被翻譯成物理地址。 + + +3. 有趣的Q位 +============ + +某些非常關鍵的代碼必須清除PSW中的Q位。當Q位被清除時,CPU不會更新中斷處理 +程序所讀取的寄存器,以找出機器被中斷的位置——所以如果你在清除Q位的指令和再 +次設置Q位的RFI之間遇到中斷,你不知道它到底發生在哪裏。如果你幸運的話,IAOQ +會指向清除Q位的指令,如果你不幸運的話,它會指向任何地方。通常Q位的問題會 +表現爲無法解釋的系統掛起或物理內存越界。 + diff --git a/Documentation/translations/zh_TW/arch/parisc/index.rst b/Documentation/translations/zh_TW/arch/parisc/index.rst new file mode 100644 index 0000000000..35941bf68c --- /dev/null +++ b/Documentation/translations/zh_TW/arch/parisc/index.rst @@ -0,0 +1,32 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../../disclaimer-zh_TW.rst + +:Original: Documentation/arch/parisc/index.rst + +:翻譯: + + 司延騰 Yanteng Si + +.. _tw_parisc_index: + +==================== +PA-RISC體系架構 +==================== + +.. toctree:: + :maxdepth: 2 + + debugging + registers + +Todolist: + + features + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` + diff --git a/Documentation/translations/zh_TW/arch/parisc/registers.rst b/Documentation/translations/zh_TW/arch/parisc/registers.rst new file mode 100644 index 0000000000..695acb2113 --- /dev/null +++ b/Documentation/translations/zh_TW/arch/parisc/registers.rst @@ -0,0 +1,157 @@ +.. include:: ../../disclaimer-zh_TW.rst + +:Original: Documentation/arch/parisc/registers.rst + +:翻譯: + + 司延騰 Yanteng Si + +.. _tw_parisc_registers: + +========================= +Linux/PA-RISC的寄存器用法 +========================= + +[ 用星號表示目前尚未實現的計劃用途。 ] + +ABI約定的通用寄存器 +=================== + +控制寄存器 +---------- + +============================ ================================= +CR 0 (恢復計數器) 用於ptrace +CR 1-CR 7(無定義) 未使用 +CR 8 (Protection ID) 每進程值* +CR 9, 12, 13 (PIDS) 未使用 +CR10 (CCR) FPU延遲保存* +CR11 按照ABI的規定(SAR) +CR14 (中斷向量) 初始化爲 fault_vector +CR15 (EIEM) 所有位初始化爲1* +CR16 (間隔計時器) 讀取週期數/寫入開始時間間隔計時器 +CR17-CR22 中斷參數 +CR19 中斷指令寄存器 +CR20 中斷空間寄存器 +CR21 中斷偏移量寄存器 +CR22 中斷 PSW +CR23 (EIRR) 讀取未決中斷/寫入清除位 +CR24 (TR 0) 內核空間頁目錄指針 +CR25 (TR 1) 用戶空間頁目錄指針 +CR26 (TR 2) 不使用 +CR27 (TR 3) 線程描述符指針 +CR28 (TR 4) 不使用 +CR29 (TR 5) 不使用 +CR30 (TR 6) 當前 / 0 +CR31 (TR 7) 臨時寄存器,在不同地方使用 +============================ ================================= + +空間寄存器(內核模式) +---------------------- + +======== ============================== +SR0 臨時空間寄存器 +SR4-SR7 設置爲0 +SR1 臨時空間寄存器 +SR2 內核不應該破壞它 +SR3 用於用戶空間訪問(當前進程) +======== ============================== + +空間寄存器(用戶模式) +---------------------- + +======== ============================ +SR0 臨時空間寄存器 +SR1 臨時空間寄存器 +SR2 保存Linux gateway page的空間 +SR3 在內核中保存用戶地址空間的值 +SR4-SR7 定義了用戶/內核的短地址空間 +======== ============================ + + +處理器狀態字 +------------ + +====================== ================================================ +W (64位地址) 0 +E (小尾端) 0 +S (安全間隔計時器) 0 +T (產生分支陷阱) 0 +H (高特權級陷阱) 0 +L (低特權級陷阱) 0 +N (撤銷下一條指令) 被C代碼使用 +X (數據存儲中斷禁用) 0 +B (產生分支) 被C代碼使用 +C (代碼地址轉譯) 1, 在執行實模式代碼時爲0 +V (除法步長校正) 被C代碼使用 +M (HPMC 掩碼) 0, 在執行HPMC操作*時爲1 +C/B (進/借 位) 被C代碼使用 +O (有序引用) 1* +F (性能監視器) 0 +R (回收計數器陷阱) 0 +Q (收集中斷狀態) 1 (在rfi之前的代碼中爲0) +P (保護標識符) 1* +D (數據地址轉譯) 1, 在執行實模式代碼時爲0 +I (外部中斷掩碼) 由cli()/sti()宏使用。 +====================== ================================================ + +“隱形”寄存器(影子寄存器) +--------------------------- + +============= =================== +PSW W 默認值 0 +PSW E 默認值 0 +影子寄存器 被中斷處理代碼使用 +TOC啓用位 1 +============= =================== + +---------------------------------------------------------- + +PA-RISC架構定義了7個寄存器作爲“影子寄存器”。這些寄存器在 +RETURN FROM INTERRUPTION AND RESTORE指令中使用,通過消 +除中斷處理程序中對一般寄存器(GR)的保存和恢復的需要來減 +少狀態保存和恢復時間。影子寄存器是GRs 1, 8, 9, 16, 17, +24和25。 + +------------------------------------------------------------------------- + +寄存器使用說明,最初由John Marvin提供,並由Randolph Chung提供一些補充說明。 + +對於通用寄存器: + +r1,r2,r19-r26,r28,r29 & r31可以在不保存它們的情況下被使用。當然,如果你 +關心它們,在調用另一個程序之前,你也需要保存它們。上面的一些寄存器確實 +有特殊的含義,你應該注意一下: + + r1: + addil指令是硬性規定將其結果放在r1中,所以如果你使用這條指令要 + 注意這點。 + + r2: + 這就是返回指針。一般來說,你不想使用它,因爲你需要這個指針來返 + 回給你的調用者。然而,它與這組寄存器組合在一起,因爲調用者不能 + 依賴你返回時的值是相同的,也就是說,你可以將r2複製到另一個寄存 + 器,並在作廢r2後通過該寄存器返回,這應該不會給調用程序帶來問題。 + + r19-r22: + 這些通常被認爲是臨時寄存器。 + 請注意,在64位中它們是arg7-arg4。 + + r23-r26: + 這些是arg3-arg0,也就是說,如果你不再關心傳入的值,你可以使用 + 它們。 + + r28,r29: + 這倆是ret0和ret1。它們是你傳入返回值的地方。r28是主返回值。當返回 + 小結構體時,r29也可以用來將數據傳回給調用程序。 + + r30: + 棧指針 + + r31: + ble指令將返回指針放在這裏。 + + + r3-r18,r27,r30需要被保存和恢復。r3-r18只是一般用途的寄存器。 + r27是數據指針,用來使對全局變量的引用更容易。r30是棧指針。 + diff --git a/Documentation/translations/zh_TW/cpu-freq/core.rst b/Documentation/translations/zh_TW/cpu-freq/core.rst index f1951e1b23..4f98d1e9f3 100644 --- a/Documentation/translations/zh_TW/cpu-freq/core.rst +++ b/Documentation/translations/zh_TW/cpu-freq/core.rst @@ -1,13 +1,15 @@ .. SPDX-License-Identifier: GPL-2.0 - .. include:: ../disclaimer-zh_TW.rst -:Original: :doc:`../../../cpu-freq/core` -:Translator: Yanteng Si - Hu Haowen +:Original: Documentation/cpu-freq/core.rst + +:翻譯: + + 司延騰 Yanteng Si -.. _tw_core.rst: +:校譯: + 唐藝舟 Tang Yizhou ==================================== CPUFreq核心和CPUFreq通知器的通用說明 @@ -29,10 +31,10 @@ CPUFreq核心和CPUFreq通知器的通用說明 ====================== cpufreq核心代碼位於drivers/cpufreq/cpufreq.c中。這些cpufreq代碼爲CPUFreq架構的驅 -動程序(那些操作硬體切換頻率的代碼)以及 "通知器 "提供了一個標準化的接口。 -這些是設備驅動程序或需要了解策略變化的其它內核部分(如 ACPI 熱量管理)或所有頻率更改(除 -計時代碼外),甚至需要強制確定速度限制的通知器(如 ARM 架構上的 LCD 驅動程序)。 -此外, 內核 "常數" loops_per_jiffy會根據頻率變化而更新。 +動程序(那些執行硬件頻率切換的代碼)以及 "通知器" 提供了一個標準化的接口。 +包括設備驅動程序;需要了解策略變化(如 ACPI 熱量管理),或所有頻率變化(如計時代碼), +甚至需要強制限制爲指定頻率(如 ARM 架構上的 LCD 驅動程序)的其它內核組件。 +此外,內核 "常數" loops_per_jiffy 會根據頻率變化而更新。 cpufreq策略的引用計數由 cpufreq_cpu_get 和 cpufreq_cpu_put 來完成,以確保 cpufreq 驅 動程序被正確地註冊到核心中,並且驅動程序在 cpufreq_put_cpu 被調用之前不會被卸載。這也保證 @@ -41,10 +43,10 @@ cpufreq策略的引用計數由 cpufreq_cpu_get 和 cpufreq_cpu_put 來完成, 2. CPUFreq 通知器 ==================== -CPUFreq通知器符合標準的內核通知器接口。 +CPUFreq通知器遵循標準的內核通知器接口。 關於通知器的細節請參閱 linux/include/linux/notifier.h。 -這裡有兩個不同的CPUfreq通知器 - 策略通知器和轉換通知器。 +這裏有兩個不同的CPUfreq通知器 - 策略通知器和轉換通知器。 2.1 CPUFreq策略通知器 @@ -62,27 +64,27 @@ CPUFreq通知器符合標準的內核通知器接口。 2.2 CPUFreq轉換通知器 -------------------------------- -當CPUfreq驅動切換CPU核心頻率時,策略中的每個在線CPU都會收到兩次通知,這些變化沒有任何外部干 +當CPUfreq驅動切換CPU核心頻率時,策略中的每個在線CPU都會收到兩次通知,這些變化沒有任何外部幹 預。 第二個參數指定階段 - CPUFREQ_PRECHANGE or CPUFREQ_POSTCHANGE. 第三個參數是一個包含如下值的結構體cpufreq_freqs: -===== ==================== -cpu 受影響cpu的編號 +====== =============================== +policy 指向struct cpufreq_policy的指針 old 舊頻率 new 新頻率 flags cpufreq驅動的標誌 -===== ==================== +====== =============================== 3. 含有Operating Performance Point (OPP)的CPUFreq表的生成 ================================================================== 關於OPP的細節請參閱 Documentation/power/opp.rst dev_pm_opp_init_cpufreq_table - - 這個功能提供了一個隨時可用的轉換程序,用來將OPP層關於可用頻率的內部信息翻譯成一種容易提供給 - cpufreq的格式。 + 這個函數提供了一個隨時可用的轉換例程,用來將OPP層關於可用頻率的內部信息翻譯成一種 + cpufreq易於處理的格式。 .. Warning:: @@ -101,7 +103,7 @@ dev_pm_opp_init_cpufreq_table - .. note:: - 該函數只有在CONFIG_PM_OPP之外還啓用了CONFIG_CPU_FREQ時才可用。 + 該函數只有在CONFIG_PM_OPP之外還啓用了CONFIG_CPU_FREQ時纔可用。 dev_pm_opp_free_cpufreq_table 釋放dev_pm_opp_init_cpufreq_table分配的表。 diff --git a/Documentation/translations/zh_TW/cpu-freq/cpu-drivers.rst b/Documentation/translations/zh_TW/cpu-freq/cpu-drivers.rst index 671b1bf0e2..add3de2d45 100644 --- a/Documentation/translations/zh_TW/cpu-freq/cpu-drivers.rst +++ b/Documentation/translations/zh_TW/cpu-freq/cpu-drivers.rst @@ -2,12 +2,15 @@ .. include:: ../disclaimer-zh_TW.rst -:Original: :doc:`../../../cpu-freq/cpu-drivers` -:Translator: Yanteng Si - Hu Haowen +:Original: Documentation/cpu-freq/cpu-drivers.rst -.. _tw_cpu-drivers.rst: +:翻譯: + 司延騰 Yanteng Si + +:校譯: + + 唐藝舟 Tang Yizhou ======================================= 如何實現一個新的CPUFreq處理器驅動程序? @@ -37,15 +40,15 @@ 1. 怎麼做? =========== -如此,你剛剛得到了一個全新的CPU/晶片組及其數據手冊,並希望爲這個CPU/晶片組添加cpufreq -支持?很好,這裡有一些至關重要的提示: +如果,你剛剛得到了一個全新的CPU/芯片組及其數據手冊,並希望爲這個CPU/芯片組添加cpufreq +支持?很好,這裏有一些至關重要的提示: 1.1 初始化 ---------- -首先,在__initcall_level_7 (module_init())或更靠後的函數中檢查這個內核是否 -運行在正確的CPU和正確的晶片組上。如果是,則使用cpufreq_register_driver()向 +首先,在 __initcall level 7 (module_init())或更靠後的函數中檢查這個內核是否 +運行在正確的CPU和正確的芯片組上。如果是,則使用cpufreq_register_driver()向 CPUfreq核心層註冊一個cpufreq_driver結構體。 結構體cpufreq_driver應該包含什麼成員? @@ -59,11 +62,11 @@ CPUfreq核心層註冊一個cpufreq_driver結構體。 .setpolicy 或 .fast_switch 或 .target 或 .target_index - 差異見 下文。 -並且可選擇 +其它可選成員 - .flags - cpufreq核的提示。 + .flags - 給cpufreq核心的提示。 - .driver_data - cpufreq驅動程序的特定數據。 + .driver_data - cpufreq驅動程序的特有數據。 .get_intermediate 和 target_intermediate - 用於在改變CPU頻率時切換到穩定 的頻率。 @@ -72,18 +75,18 @@ CPUfreq核心層註冊一個cpufreq_driver結構體。 .bios_limit - 返回HW/BIOS對CPU的最大頻率限制值。 - .exit - 一個指向per-policy清理函數的指針,該函數在cpu熱插拔過程的CPU_POST_DEAD + .exit - 一個指向per-policy清理函數的指針,該函數在CPU熱插拔過程的CPU_POST_DEAD 階段被調用。 .suspend - 一個指向per-policy暫停函數的指針,該函數在關中斷且在該策略的調節器停止 後被調用。 - .resume - 一個指向per-policy恢復函數的指針,該函數在關中斷且在調節器再一次開始前被 + .resume - 一個指向per-policy恢復函數的指針,該函數在關中斷且在調節器再一次啓動前被 調用。 .ready - 一個指向per-policy準備函數的指針,該函數在策略完全初始化之後被調用。 - .attr - 一個指向NULL結尾的"struct freq_attr"列表的指針,該函數允許導出值到 + .attr - 一個指向NULL結尾的"struct freq_attr"列表的指針,該列表允許導出值到 sysfs。 .boost_enabled - 如果設置,則啓用提升(boost)頻率。 @@ -94,95 +97,93 @@ CPUfreq核心層註冊一個cpufreq_driver結構體。 1.2 Per-CPU 初始化 ------------------ -每當一個新的CPU被註冊到設備模型中,或者在cpufreq驅動註冊自己之後,如果此CPU的cpufreq策 -略不存在,則會調用per-policy的初始化函數cpufreq_driver.init。請注意,.init()和.exit()程序 -只對策略調用一次,而不是對策略管理的每個CPU調用一次。它需要一個 ``struct cpufreq_policy +每當一個新的CPU被註冊到設備模型中,或者當cpufreq驅動註冊自身之後,如果此CPU的cpufreq策 +略不存在,則會調用per-policy的初始化函數cpufreq_driver.init。請注意,.init()和.exit()例程 +只爲某個策略調用一次,而不是對該策略管理的每個CPU調用一次。它需要一個 ``struct cpufreq_policy *policy`` 作爲參數。現在該怎麼做呢? 如果有必要,請在你的CPU上激活CPUfreq功能支持。 -然後,驅動程序必須填寫以下數值: +然後,驅動程序必須填寫以下值: +-----------------------------------+--------------------------------------+ -|policy->cpuinfo.min_freq 和 | | -|policy->cpuinfo.max_freq | 該CPU支持的最低和最高頻率(kHz) | -| | | -| | | +|policy->cpuinfo.min_freq和 | 該CPU支持的最低和最高頻率(kHz) | +|policy->cpuinfo.max_freq | | +| | | +-----------------------------------+--------------------------------------+ -|policy->cpuinfo.transition_latency | | -| | CPU在兩個頻率之間切換所需的時間,以 | -| | 納秒爲單位(如適用,否則指定 | -| | CPUFREQ_ETERNAL) | +|policy->cpuinfo.transition_latency | CPU在兩個頻率之間切換所需的時間,以 | +| | 納秒爲單位(如不適用,設定爲 | +| | CPUFREQ_ETERNAL) | +| | | +-----------------------------------+--------------------------------------+ -|policy->cur | 該CPU當前的工作頻率(如適用) | -| | | +|policy->cur | 該CPU當前的工作頻率(如適用) | +| | | +-----------------------------------+--------------------------------------+ -|policy->min, | | -|policy->max, | | -|policy->policy and, if necessary, | | -|policy->governor | 必須包含該cpu的 「默認策略」。稍後 | -| | 會用這些值調用 | -| | cpufreq_driver.verify and either | -| | cpufreq_driver.setpolicy or | -| | cpufreq_driver.target/target_index | -| | | +|policy->min, | 必須包含該CPU的"默認策略"。稍後 | +|policy->max, | 會用這些值調用 | +|policy->policy and, if necessary, | cpufreq_driver.verify和下面函數 | +|policy->governor | 之一:cpufreq_driver.setpolicy或 | +| | cpufreq_driver.target/target_index | +| | | +-----------------------------------+--------------------------------------+ -|policy->cpus | 用與這個CPU一起做DVFS的(在線+離線) | -| | CPU(即與它共享時鐘/電壓軌)的掩碼更新 | -| | 這個 | -| | | +|policy->cpus | 該policy通過DVFS框架影響的全部CPU | +| | (即與本CPU共享"時鐘/電壓"對)構成 | +| | 掩碼(同時包含在線和離線CPU),用掩碼 | +| | 更新本字段 | +| | | +-----------------------------------+--------------------------------------+ -對於設置其中的一些值(cpuinfo.min[max]_freq, policy->min[max]),頻率表助手可能會有幫 +對於設置其中的一些值(cpuinfo.min[max]_freq, policy->min[max]),頻率表輔助函數可能會有幫 助。關於它們的更多信息,請參見第2節。 1.3 驗證 -------- -當用戶決定設置一個新的策略(由 「policy,governor,min,max組成」)時,必須對這個策略進行驗證, +當用戶決定設置一個新的策略(由"policy,governor,min,max組成")時,必須對這個策略進行驗證, 以便糾正不兼容的值。爲了驗證這些值,cpufreq_verify_within_limits(``struct cpufreq_policy *policy``, ``unsigned int min_freq``, ``unsigned int max_freq``)函數可能會有幫助。 -關於頻率表助手的詳細內容請參見第2節。 +關於頻率表輔助函數的詳細內容請參見第2節。 您需要確保至少有一個有效頻率(或工作範圍)在 policy->min 和 policy->max 範圍內。如果有必 -要,先增加policy->max,只有在沒有辦法的情況下,才減少policy->min。 +要,先增大policy->max,只有在沒有解決方案的情況下,才減小policy->min。 1.4 target 或 target_index 或 setpolicy 或 fast_switch? ------------------------------------------------------- -大多數cpufreq驅動甚至大多數cpu頻率升降算法只允許將CPU頻率設置爲預定義的固定值。對於這些,你 +大多數cpufreq驅動甚至大多數CPU頻率升降算法只允許將CPU頻率設置爲預定義的固定值。對於這些,你 可以使用->target(),->target_index()或->fast_switch()回調。 -有些cpufreq功能的處理器可以自己在某些限制之間切換頻率。這些應使用->setpolicy()回調。 +有些具有硬件調頻能力的處理器可以自行依據某些限制來切換CPU頻率。它們應使用->setpolicy()回調。 1.5. target/target_index ------------------------ -target_index調用有兩個參數:``struct cpufreq_policy * policy``和``unsigned int`` -索引(於列出的頻率表)。 +target_index調用有兩個參數: ``struct cpufreq_policy * policy`` 和 ``unsigned int`` +索引(用於索引頻率表項)。 -當調用這裡時,CPUfreq驅動必須設置新的頻率。實際頻率必須由freq_table[index].frequency決定。 +當調用這裏時,CPUfreq驅動必須設置新的頻率。實際頻率必須由freq_table[index].frequency決定。 -它應該總是在錯誤的情況下恢復到之前的頻率(即policy->restore_freq),即使我們之前切換到中間頻率。 +在發生錯誤的情況下總是應該恢復到之前的頻率(即policy->restore_freq),即使我們已經切換到了 +中間頻率。 已棄用 ---------- -目標調用有三個參數。``struct cpufreq_policy * policy``, unsigned int target_frequency, +target調用有三個參數。``struct cpufreq_policy * policy``, unsigned int target_frequency, unsigned int relation. -CPUfreq驅動在調用這裡時必須設置新的頻率。實際的頻率必須使用以下規則來確定。 +CPUfreq驅動在調用這裏時必須設置新的頻率。實際的頻率必須使用以下規則來確定。 -- 緊跟 "目標頻率"。 +- 儘量貼近"目標頻率"。 - policy->min <= new_freq <= policy->max (這必須是有效的!!!) - 如果 relation==CPUFREQ_REL_L,嘗試選擇一個高於或等於 target_freq 的 new_freq。("L代表 最低,但不能低於") - 如果 relation==CPUFREQ_REL_H,嘗試選擇一個低於或等於 target_freq 的 new_freq。("H代表 最高,但不能高於") -這裡,頻率表助手可能會幫助你--詳見第2節。 +這裏,頻率表輔助函數可能會幫助你 -- 詳見第2節。 1.6. fast_switch ---------------- @@ -196,51 +197,52 @@ CPUfreq驅動在調用這裡時必須設置新的頻率。實際的頻率必須 1.7 setpolicy ------------- -setpolicy調用只需要一個``struct cpufreq_policy * policy``作爲參數。需要將處理器內或晶片組內動態頻 +setpolicy調用只需要一個 ``struct cpufreq_policy * policy`` 作爲參數。需要將處理器內或芯片組內動態頻 率切換的下限設置爲policy->min,上限設置爲policy->max,如果支持的話,當policy->policy爲 -CPUFREQ_POLICY_PERFORMANCE時選擇面向性能的設置,當CPUFREQ_POLICY_POWERSAVE時選擇面向省電的設置。 +CPUFREQ_POLICY_PERFORMANCE時選擇面向性能的設置,爲CPUFREQ_POLICY_POWERSAVE時選擇面向省電的設置。 也可以查看drivers/cpufreq/longrun.c中的參考實現。 1.8 get_intermediate 和 target_intermediate -------------------------------------------- -僅適用於 target_index() 和 CPUFREQ_ASYNC_NOTIFICATION 未設置的驅動。 +僅適用於未設置 target_index() 和 CPUFREQ_ASYNC_NOTIFICATION 的驅動。 -get_intermediate應該返回一個平台想要切換到的穩定的中間頻率,target_intermediate()應該將CPU設置爲 -該頻率,然後再跳轉到'index'對應的頻率。核心會負責發送通知,驅動不必在target_intermediate()或 -target_index()中處理。 +get_intermediate應該返回一個平臺想要切換到的穩定的中間頻率,target_intermediate()應該將CPU設置爲 +該頻率,然後再跳轉到'index'對應的頻率。cpufreq核心會負責發送通知,驅動不必在 +target_intermediate()或target_index()中處理它們。 -在驅動程序不想因爲某個目標頻率切換到中間頻率的情況下,它們可以從get_intermediate()中返回'0'。在這種情況 -下,核心將直接調用->target_index()。 +在驅動程序不想爲某個目標頻率切換到中間頻率的情況下,它們可以讓get_intermediate()返回'0'。 +在這種情況下,cpufreq核心將直接調用->target_index()。 -注意:->target_index()應該在失敗的情況下恢復到policy->restore_freq,因爲core會爲此發送通知。 +注意:->target_index()應該在發生失敗的情況下將頻率恢復到policy->restore_freq, +因爲cpufreq核心會爲此發送通知。 -2. 頻率表助手 -============= +2. 頻率表輔助函數 +================= -由於大多數cpufreq處理器只允許被設置爲幾個特定的頻率,因此,一個帶有一些函數的 「頻率表」可能會輔助處理器驅動 -程序的一些工作。這樣的 "頻率表" 由一個cpufreq_frequency_table條目構成的數組組成,"driver_data" 中包 -含了驅動程序的具體數值,"frequency" 中包含了相應的頻率,並設置了標誌。在表的最後,需要添加一個 -cpufreq_frequency_table條目,頻率設置爲CPUFREQ_TABLE_END。而如果想跳過表中的一個條目,則將頻率設置爲 -CPUFREQ_ENTRY_INVALID。這些條目不需要按照任何特定的順序排序,但如果它們是cpufreq 核心會對它們進行快速的DVFS, +由於大多數支持cpufreq的處理器只允許被設置爲幾個特定的頻率,因此,"頻率表"和一些相關函數可能會輔助處理器驅動 +程序的一些工作。這樣的"頻率表"是一個由struct cpufreq_frequency_table的條目構成的數組,"driver_data"成員包 +含驅動程序的專用值,"frequency"成員包含了相應的頻率,此外還有標誌成員。在表的最後,需要添加一個 +cpufreq_frequency_table條目,頻率設置爲CPUFREQ_TABLE_END。如果想跳過表中的一個條目,則將頻率設置爲 +CPUFREQ_ENTRY_INVALID。這些條目不需要按照任何特定的順序排序,如果排序了,cpufreq核心執行DVFS會更快一點, 因爲搜索最佳匹配會更快。 -如果策略在其policy->freq_table欄位中包含一個有效的指針,cpufreq表就會被核心自動驗證。 +如果在policy->freq_table字段中包含一個有效的頻率表指針,頻率表就會被cpufreq核心自動驗證。 cpufreq_frequency_table_verify()保證至少有一個有效的頻率在policy->min和policy->max範圍內,並且所有其他 -標準都被滿足。這對->verify調用很有幫助。 +準則都被滿足。這對->verify調用很有幫助。 -cpufreq_frequency_table_target()是對應於->target階段的頻率表助手。只要把數值傳遞給這個函數,這個函數就會返 +cpufreq_frequency_table_target()是對應於->target階段的頻率表輔助函數。只要把值傳遞給這個函數,這個函數就會返 回包含CPU要設置的頻率的頻率表條目。 -以下宏可以作爲cpufreq_frequency_table的疊代器。 +以下宏可以作爲cpufreq_frequency_table的迭代器。 cpufreq_for_each_entry(pos, table) - 遍歷頻率表的所有條目。 cpufreq_for_each_valid_entry(pos, table) - 該函數遍歷所有條目,不包括CPUFREQ_ENTRY_INVALID頻率。 -使用參數 "pos"-一個``cpufreq_frequency_table * `` 作爲循環變量,使用參數 "table"-作爲你想疊代 -的``cpufreq_frequency_table * `` 。 +使用參數"pos" -- 一個 ``cpufreq_frequency_table *`` 作爲循環指針,使用參數"table" -- 作爲你想迭代 +的 ``cpufreq_frequency_table *`` 。 例如:: @@ -251,6 +253,6 @@ cpufreq_for_each_valid_entry(pos, table) - 該函數遍歷所有條目,不包 pos->frequency = ... } -如果你需要在driver_freq_table中處理pos的位置,不要減去指針,因爲它的代價相當高。相反,使用宏 +如果你需要在driver_freq_table中處理pos的位置,不要做指針減法,因爲它的代價相當高。作爲替代,使用宏 cpufreq_for_each_entry_idx() 和 cpufreq_for_each_valid_entry_idx() 。 diff --git a/Documentation/translations/zh_TW/cpu-freq/cpufreq-stats.rst b/Documentation/translations/zh_TW/cpu-freq/cpufreq-stats.rst index 49088becd5..01ec8c837f 100644 --- a/Documentation/translations/zh_TW/cpu-freq/cpufreq-stats.rst +++ b/Documentation/translations/zh_TW/cpu-freq/cpufreq-stats.rst @@ -2,18 +2,21 @@ .. include:: ../disclaimer-zh_TW.rst -:Original: :doc:`../../../cpu-freq/cpufreq-stats` -:Translator: Yanteng Si - Hu Haowen +:Original: Documentation/cpu-freq/cpufreq-stats.rst -.. _tw_cpufreq-stats.rst: +:翻譯: + 司延騰 Yanteng Si + +:校譯: + + 唐藝舟 Tang Yizhou ========================================== sysfs CPUFreq Stats的一般說明 ========================================== -用戶信息 +爲使用者準備的信息 作者: Venkatesh Pallipadi @@ -28,17 +31,16 @@ sysfs CPUFreq Stats的一般說明 1. 簡介 =============== -cpufreq-stats是一個爲每個CPU提供CPU頻率統計的驅動。 -這些統計數據在/sysfs中以一堆只讀接口的形式提供。這個接口(在配置好後)將出現在 -/sysfs(/devices/system/cpu/cpuX/cpufreq/stats/)中cpufreq下的一個單 -獨的目錄中,提供給每個CPU。 -各種統計數據將在此目錄下形成只讀文件。 +cpufreq-stats是一種爲每個CPU提供CPU頻率統計的驅動。 +這些統計數據以/sysfs中一系列只讀接口的形式呈現。cpufreq-stats接口(若已配置)將爲每個CPU生成 +/sysfs(/devices/system/cpu/cpuX/cpufreq/stats/)中cpufreq目錄下的stats目錄。 +各項統計數據將在stats目錄下形成對應的只讀文件。 -此驅動是獨立於任何可能運行在你所用CPU上的特定cpufreq_driver而設計的。因此,它將與所有 -cpufreq_driver一起工作。 +此驅動是以獨立於任何可能運行在你所用CPU上的特定cpufreq_driver的方式設計的。因此,它將能和任何 +cpufreq_driver協同工作。 -2. 提供的統計數據(舉例說明) +2. 已提供的統計數據(有例子) ===================================== cpufreq stats提供了以下統計數據(在下面詳細解釋)。 @@ -47,8 +49,8 @@ cpufreq stats提供了以下統計數據(在下面詳細解釋)。 - total_trans - trans_table -所有的統計數據將從統計驅動被載入的時間(或統計被重置的時間)開始,到某一統計數據被讀取的時間爲止。 -顯然,統計驅動不會有任何關於統計驅動載入之前的頻率轉換信息。 +所有統計數據來自以下時間範圍:從統計驅動被加載的時間(或統計數據被重置的時間)開始,到某一統計數據被讀取的時間爲止。 +顯然,統計驅動不會保存它被加載之前的任何頻率轉換信息。 :: @@ -63,14 +65,14 @@ cpufreq stats提供了以下統計數據(在下面詳細解釋)。 - **reset** -只寫屬性,可用於重置統計計數器。這對於評估不同調節器下的系統行爲非常有用,且無需重啓。 +只寫屬性,可用於重置統計計數器。這對於評估不同調節器的系統行爲非常有用,且無需重啓。 - **time_in_state** -此項給出了這個CPU所支持的每個頻率所花費的時間。cat輸出的每一行都會有" -