From ace9429bb58fd418f0c81d4c2835699bddf6bde6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 11 Apr 2024 10:27:49 +0200 Subject: Adding upstream version 6.6.15. Signed-off-by: Daniel Baumann --- Documentation/driver-api/80211/cfg80211.rst | 178 + Documentation/driver-api/80211/index.rst | 17 + Documentation/driver-api/80211/introduction.rst | 17 + .../driver-api/80211/mac80211-advanced.rst | 239 ++ Documentation/driver-api/80211/mac80211.rst | 155 + Documentation/driver-api/acpi/index.rst | 9 + Documentation/driver-api/acpi/linuxized-acpica.rst | 279 ++ Documentation/driver-api/acpi/scan_handlers.rst | 83 + Documentation/driver-api/aperture.rst | 13 + Documentation/driver-api/auxiliary_bus.rst | 50 + .../driver-api/backlight/lp855x-driver.rst | 81 + Documentation/driver-api/basics.rst | 130 + Documentation/driver-api/clk.rst | 312 ++ Documentation/driver-api/component.rst | 19 + Documentation/driver-api/connector.rst | 157 + Documentation/driver-api/console.rst | 152 + Documentation/driver-api/cxl/index.rst | 12 + Documentation/driver-api/cxl/memory-devices.rst | 383 ++ Documentation/driver-api/dcdbas.rst | 99 + Documentation/driver-api/devfreq.rst | 30 + Documentation/driver-api/device-io.rst | 521 +++ Documentation/driver-api/device_link.rst | 320 ++ Documentation/driver-api/dma-buf.rst | 366 ++ Documentation/driver-api/dmaengine/client.rst | 379 ++ Documentation/driver-api/dmaengine/dmatest.rst | 232 ++ Documentation/driver-api/dmaengine/index.rst | 55 + Documentation/driver-api/dmaengine/provider.rst | 647 ++++ Documentation/driver-api/dmaengine/pxa_dma.rst | 190 + Documentation/driver-api/driver-model/binding.rst | 98 + Documentation/driver-api/driver-model/bus.rst | 146 + .../driver-api/driver-model/design-patterns.rst | 116 + Documentation/driver-api/driver-model/device.rst | 120 + Documentation/driver-api/driver-model/devres.rst | 462 +++ Documentation/driver-api/driver-model/driver.rst | 286 ++ Documentation/driver-api/driver-model/index.rst | 23 + Documentation/driver-api/driver-model/overview.rst | 124 + Documentation/driver-api/driver-model/platform.rst | 246 ++ Documentation/driver-api/driver-model/porting.rst | 448 +++ .../driver-api/early-userspace/buffer-format.rst | 119 + .../early-userspace/early_userspace_support.rst | 154 + Documentation/driver-api/early-userspace/index.rst | 18 + Documentation/driver-api/edac.rst | 298 ++ Documentation/driver-api/eisa.rst | 230 ++ Documentation/driver-api/firewire.rst | 48 + Documentation/driver-api/firmware/built-in-fw.rst | 33 + Documentation/driver-api/firmware/core.rst | 17 + .../driver-api/firmware/direct-fs-lookup.rst | 30 + Documentation/driver-api/firmware/efi/index.rst | 11 + .../driver-api/firmware/fallback-mechanisms.rst | 308 ++ .../firmware/firmware-usage-guidelines.rst | 44 + .../driver-api/firmware/firmware_cache.rst | 51 + .../driver-api/firmware/fw_search_path.rst | 31 + Documentation/driver-api/firmware/fw_upload.rst | 127 + Documentation/driver-api/firmware/index.rst | 19 + Documentation/driver-api/firmware/introduction.rst | 27 + Documentation/driver-api/firmware/lookup-order.rst | 20 + .../driver-api/firmware/other_interfaces.rst | 51 + .../driver-api/firmware/request_firmware.rst | 80 + Documentation/driver-api/fpga/fpga-bridge.rst | 22 + Documentation/driver-api/fpga/fpga-mgr.rst | 162 + Documentation/driver-api/fpga/fpga-programming.rst | 107 + Documentation/driver-api/fpga/fpga-region.rst | 109 + Documentation/driver-api/fpga/index.rst | 15 + Documentation/driver-api/fpga/intro.rst | 54 + Documentation/driver-api/frame-buffer.rst | 62 + Documentation/driver-api/generic-counter.rst | 573 +++ Documentation/driver-api/gpio/board.rst | 222 ++ Documentation/driver-api/gpio/bt8xxgpio.rst | 62 + Documentation/driver-api/gpio/consumer.rst | 468 +++ Documentation/driver-api/gpio/driver.rst | 778 +++++ Documentation/driver-api/gpio/drivers-on-gpio.rst | 114 + Documentation/driver-api/gpio/index.rst | 50 + Documentation/driver-api/gpio/intro.rst | 124 + Documentation/driver-api/gpio/legacy.rst | 695 ++++ Documentation/driver-api/gpio/using-gpio.rst | 50 + Documentation/driver-api/hsi.rst | 88 + Documentation/driver-api/hte/hte.rst | 79 + Documentation/driver-api/hte/index.rst | 22 + Documentation/driver-api/hte/tegra-hte.rst | 47 + Documentation/driver-api/i2c.rst | 48 + Documentation/driver-api/i3c/device-driver-api.rst | 9 + Documentation/driver-api/i3c/index.rst | 11 + Documentation/driver-api/i3c/master-driver-api.rst | 9 + Documentation/driver-api/i3c/protocol.rst | 203 ++ Documentation/driver-api/iio/buffers.rst | 126 + Documentation/driver-api/iio/core.rst | 182 + Documentation/driver-api/iio/hw-consumer.rst | 50 + Documentation/driver-api/iio/index.rst | 18 + Documentation/driver-api/iio/intro.rst | 33 + Documentation/driver-api/iio/triggered-buffers.rst | 69 + Documentation/driver-api/iio/triggers.rst | 78 + Documentation/driver-api/index.rst | 123 + Documentation/driver-api/infiniband.rst | 124 + Documentation/driver-api/infrastructure.rst | 97 + Documentation/driver-api/input.rst | 42 + Documentation/driver-api/interconnect.rst | 140 + Documentation/driver-api/io-mapping.rst | 91 + Documentation/driver-api/io_ordering.rst | 51 + Documentation/driver-api/ioctl.rst | 253 ++ Documentation/driver-api/ipmb.rst | 109 + Documentation/driver-api/ipmi.rst | 810 +++++ Documentation/driver-api/isa.rst | 122 + Documentation/driver-api/isapnp.rst | 15 + Documentation/driver-api/libata.rst | 993 ++++++ Documentation/driver-api/mailbox.rst | 129 + Documentation/driver-api/md/index.rst | 12 + Documentation/driver-api/md/md-cluster.rst | 385 ++ Documentation/driver-api/md/raid5-cache.rst | 111 + Documentation/driver-api/md/raid5-ppl.rst | 47 + Documentation/driver-api/media/camera-sensor.rst | 175 + Documentation/driver-api/media/cec-core.rst | 502 +++ .../driver-api/media/drivers/bttv-devel.rst | 116 + .../driver-api/media/drivers/ccs/ccs-regs.asc | 1041 ++++++ Documentation/driver-api/media/drivers/ccs/ccs.rst | 117 + .../driver-api/media/drivers/ccs/mk-ccs-regs | 434 +++ .../driver-api/media/drivers/contributors.rst | 131 + .../driver-api/media/drivers/cx2341x-devel.rst | 3685 ++++++++++++++++++++ .../driver-api/media/drivers/cx88-devel.rst | 113 + Documentation/driver-api/media/drivers/dvb-usb.rst | 357 ++ .../driver-api/media/drivers/fimc-devel.rst | 33 + .../driver-api/media/drivers/frontends.rst | 32 + Documentation/driver-api/media/drivers/index.rst | 40 + Documentation/driver-api/media/drivers/pvrusb2.rst | 202 ++ .../driver-api/media/drivers/pxa_camera.rst | 194 ++ .../driver-api/media/drivers/radiotrack.rst | 168 + Documentation/driver-api/media/drivers/rkisp1.rst | 43 + .../driver-api/media/drivers/saa7134-devel.rst | 67 + .../media/drivers/sh_mobile_ceu_camera.rst | 142 + Documentation/driver-api/media/drivers/tuners.rst | 133 + Documentation/driver-api/media/drivers/vidtv.rst | 513 +++ .../driver-api/media/drivers/vimc-devel.rst | 15 + Documentation/driver-api/media/drivers/zoran.rst | 575 +++ Documentation/driver-api/media/dtv-ca.rst | 6 + Documentation/driver-api/media/dtv-common.rst | 53 + Documentation/driver-api/media/dtv-core.rst | 39 + Documentation/driver-api/media/dtv-demux.rst | 84 + Documentation/driver-api/media/dtv-frontend.rst | 445 +++ Documentation/driver-api/media/dtv-net.rst | 6 + Documentation/driver-api/media/index.rst | 59 + .../driver-api/media/maintainer-entry-profile.rst | 206 ++ Documentation/driver-api/media/mc-core.rst | 326 ++ Documentation/driver-api/media/rc-core.rst | 88 + Documentation/driver-api/media/tx-rx.rst | 133 + Documentation/driver-api/media/v4l2-async.rst | 5 + Documentation/driver-api/media/v4l2-cci.rst | 5 + Documentation/driver-api/media/v4l2-common.rst | 8 + Documentation/driver-api/media/v4l2-controls.rst | 823 +++++ Documentation/driver-api/media/v4l2-core.rst | 29 + Documentation/driver-api/media/v4l2-dev.rst | 375 ++ Documentation/driver-api/media/v4l2-device.rst | 146 + Documentation/driver-api/media/v4l2-dv-timings.rst | 6 + Documentation/driver-api/media/v4l2-event.rst | 181 + Documentation/driver-api/media/v4l2-fh.rst | 141 + .../driver-api/media/v4l2-flash-led-class.rst | 6 + Documentation/driver-api/media/v4l2-fwnode.rst | 5 + Documentation/driver-api/media/v4l2-intro.rst | 76 + Documentation/driver-api/media/v4l2-mc.rst | 6 + Documentation/driver-api/media/v4l2-mediabus.rst | 6 + Documentation/driver-api/media/v4l2-mem2mem.rst | 6 + Documentation/driver-api/media/v4l2-rect.rst | 6 + Documentation/driver-api/media/v4l2-subdev.rst | 637 ++++ Documentation/driver-api/media/v4l2-tuner.rst | 8 + Documentation/driver-api/media/v4l2-tveeprom.rst | 6 + Documentation/driver-api/media/v4l2-videobuf.rst | 403 +++ Documentation/driver-api/media/v4l2-videobuf2.rst | 12 + Documentation/driver-api/mei/hdcp.rst | 32 + Documentation/driver-api/mei/iamt.rst | 101 + Documentation/driver-api/mei/index.rst | 23 + Documentation/driver-api/mei/mei-client-bus.rst | 168 + Documentation/driver-api/mei/mei.rst | 213 ++ Documentation/driver-api/mei/nfc.rst | 28 + Documentation/driver-api/memory-devices/index.rst | 18 + .../driver-api/memory-devices/ti-emif.rst | 64 + .../driver-api/memory-devices/ti-gpmc.rst | 179 + Documentation/driver-api/men-chameleon-bus.rst | 187 + Documentation/driver-api/message-based.rst | 12 + Documentation/driver-api/misc_devices.rst | 5 + Documentation/driver-api/miscellaneous.rst | 48 + Documentation/driver-api/mmc/index.rst | 13 + Documentation/driver-api/mmc/mmc-async-req.rst | 98 + Documentation/driver-api/mmc/mmc-dev-attrs.rst | 91 + Documentation/driver-api/mmc/mmc-dev-parts.rst | 41 + Documentation/driver-api/mmc/mmc-tools.rst | 37 + Documentation/driver-api/mtd/index.rst | 12 + Documentation/driver-api/mtd/nand_ecc.rst | 763 ++++ Documentation/driver-api/mtd/spi-intel.rst | 90 + Documentation/driver-api/mtd/spi-nor.rst | 65 + Documentation/driver-api/mtdnand.rst | 1006 ++++++ Documentation/driver-api/nfc/index.rst | 11 + Documentation/driver-api/nfc/nfc-hci.rst | 311 ++ Documentation/driver-api/nfc/nfc-pn544.rst | 34 + Documentation/driver-api/ntb.rst | 263 ++ Documentation/driver-api/nvdimm/btt.rst | 285 ++ .../driver-api/nvdimm/firmware-activate.rst | 86 + Documentation/driver-api/nvdimm/index.rst | 13 + Documentation/driver-api/nvdimm/nvdimm.rst | 657 ++++ Documentation/driver-api/nvdimm/security.rst | 143 + Documentation/driver-api/nvmem.rst | 202 ++ Documentation/driver-api/parport-lowlevel.rst | 1832 ++++++++++ Documentation/driver-api/pci/index.rst | 22 + Documentation/driver-api/pci/p2pdma.rst | 131 + Documentation/driver-api/pci/pci.rst | 47 + Documentation/driver-api/phy/index.rst | 18 + Documentation/driver-api/phy/phy.rst | 224 ++ Documentation/driver-api/phy/samsung-usb2.rst | 137 + Documentation/driver-api/pin-control.rst | 1455 ++++++++ Documentation/driver-api/pldmfw/driver-ops.rst | 56 + Documentation/driver-api/pldmfw/file-format.rst | 203 ++ Documentation/driver-api/pldmfw/index.rst | 72 + Documentation/driver-api/pm/cpuidle.rst | 279 ++ Documentation/driver-api/pm/devices.rst | 880 +++++ Documentation/driver-api/pm/index.rst | 19 + Documentation/driver-api/pm/notifiers.rst | 74 + Documentation/driver-api/pm/types.rst | 7 + Documentation/driver-api/pps.rst | 242 ++ Documentation/driver-api/ptp.rst | 137 + Documentation/driver-api/pwm.rst | 176 + Documentation/driver-api/rapidio/index.rst | 15 + Documentation/driver-api/rapidio/mport_cdev.rst | 110 + Documentation/driver-api/rapidio/rapidio.rst | 362 ++ Documentation/driver-api/rapidio/rio_cm.rst | 135 + Documentation/driver-api/rapidio/sysfs.rst | 7 + Documentation/driver-api/rapidio/tsi721.rst | 112 + Documentation/driver-api/regulator.rst | 170 + Documentation/driver-api/reset.rst | 221 ++ Documentation/driver-api/rfkill.rst | 132 + Documentation/driver-api/s390-drivers.rst | 135 + Documentation/driver-api/scsi.rst | 338 ++ Documentation/driver-api/serial/driver.rst | 106 + Documentation/driver-api/serial/index.rst | 27 + Documentation/driver-api/serial/serial-iso7816.rst | 90 + Documentation/driver-api/serial/serial-rs485.rst | 135 + Documentation/driver-api/slimbus.rst | 132 + Documentation/driver-api/sm501.rst | 74 + .../driver-api/soundwire/error_handling.rst | 65 + Documentation/driver-api/soundwire/index.rst | 18 + Documentation/driver-api/soundwire/locking.rst | 108 + Documentation/driver-api/soundwire/stream.rst | 527 +++ Documentation/driver-api/soundwire/summary.rst | 208 ++ Documentation/driver-api/spi.rst | 53 + .../driver-api/surface_aggregator/client-api.rst | 38 + .../driver-api/surface_aggregator/client.rst | 397 +++ .../driver-api/surface_aggregator/clients/cdev.rst | 204 ++ .../driver-api/surface_aggregator/clients/dtx.rst | 718 ++++ .../surface_aggregator/clients/index.rst | 23 + .../driver-api/surface_aggregator/clients/san.rst | 44 + .../driver-api/surface_aggregator/index.rst | 21 + .../driver-api/surface_aggregator/internal-api.rst | 67 + .../driver-api/surface_aggregator/internal.rst | 578 +++ .../driver-api/surface_aggregator/overview.rst | 79 + .../driver-api/surface_aggregator/ssh.rst | 346 ++ Documentation/driver-api/switchtec.rst | 102 + Documentation/driver-api/sync_file.rst | 86 + Documentation/driver-api/target.rst | 52 + .../driver-api/thermal/cpu-cooling-api.rst | 107 + .../driver-api/thermal/cpu-idle-cooling.rst | 199 ++ .../driver-api/thermal/exynos_thermal.rst | 90 + .../thermal/exynos_thermal_emulation.rst | 61 + Documentation/driver-api/thermal/index.rst | 19 + Documentation/driver-api/thermal/intel_dptf.rst | 317 ++ .../driver-api/thermal/nouveau_thermal.rst | 96 + .../driver-api/thermal/power_allocator.rst | 281 ++ Documentation/driver-api/thermal/sysfs-api.rst | 495 +++ .../thermal/x86_pkg_temperature_thermal.rst | 55 + Documentation/driver-api/tty/index.rst | 73 + Documentation/driver-api/tty/moxa-smartio.rst | 197 ++ Documentation/driver-api/tty/n_gsm.rst | 192 + Documentation/driver-api/tty/n_tty.rst | 22 + Documentation/driver-api/tty/tty_buffer.rst | 49 + Documentation/driver-api/tty/tty_driver.rst | 128 + Documentation/driver-api/tty/tty_internals.rst | 31 + Documentation/driver-api/tty/tty_ldisc.rst | 85 + Documentation/driver-api/tty/tty_port.rst | 70 + Documentation/driver-api/tty/tty_struct.rst | 81 + Documentation/driver-api/uio-howto.rst | 730 ++++ Documentation/driver-api/usb/URB.rst | 290 ++ Documentation/driver-api/usb/anchors.rst | 83 + Documentation/driver-api/usb/bulk-streams.rst | 83 + Documentation/driver-api/usb/callbacks.rst | 157 + Documentation/driver-api/usb/dma.rst | 136 + Documentation/driver-api/usb/dwc3.rst | 711 ++++ Documentation/driver-api/usb/error-codes.rst | 210 ++ Documentation/driver-api/usb/gadget.rst | 510 +++ Documentation/driver-api/usb/hotplug.rst | 154 + Documentation/driver-api/usb/index.rst | 30 + Documentation/driver-api/usb/persist.rst | 171 + Documentation/driver-api/usb/power-management.rst | 798 +++++ Documentation/driver-api/usb/typec.rst | 234 ++ Documentation/driver-api/usb/typec_bus.rst | 122 + Documentation/driver-api/usb/usb.rst | 1056 ++++++ Documentation/driver-api/usb/usb3-debug-port.rst | 152 + .../driver-api/usb/writing_musb_glue_layer.rst | 720 ++++ .../driver-api/usb/writing_usb_driver.rst | 328 ++ Documentation/driver-api/vfio-mediated-device.rst | 273 ++ .../vfio-pci-device-specific-driver-acceptance.rst | 35 + Documentation/driver-api/vfio.rst | 707 ++++ Documentation/driver-api/virtio/index.rst | 11 + Documentation/driver-api/virtio/virtio.rst | 145 + .../driver-api/virtio/writing_virtio_drivers.rst | 197 ++ Documentation/driver-api/vme.rst | 297 ++ Documentation/driver-api/w1.rst | 67 + Documentation/driver-api/wmi.rst | 21 + Documentation/driver-api/xilinx/eemi.rst | 40 + Documentation/driver-api/xilinx/index.rst | 16 + Documentation/driver-api/xillybus.rst | 379 ++ Documentation/driver-api/zorro.rst | 104 + 306 files changed, 59432 insertions(+) create mode 100644 Documentation/driver-api/80211/cfg80211.rst create mode 100644 Documentation/driver-api/80211/index.rst create mode 100644 Documentation/driver-api/80211/introduction.rst create mode 100644 Documentation/driver-api/80211/mac80211-advanced.rst create mode 100644 Documentation/driver-api/80211/mac80211.rst create mode 100644 Documentation/driver-api/acpi/index.rst create mode 100644 Documentation/driver-api/acpi/linuxized-acpica.rst create mode 100644 Documentation/driver-api/acpi/scan_handlers.rst create mode 100644 Documentation/driver-api/aperture.rst create mode 100644 Documentation/driver-api/auxiliary_bus.rst create mode 100644 Documentation/driver-api/backlight/lp855x-driver.rst create mode 100644 Documentation/driver-api/basics.rst create mode 100644 Documentation/driver-api/clk.rst create mode 100644 Documentation/driver-api/component.rst create mode 100644 Documentation/driver-api/connector.rst create mode 100644 Documentation/driver-api/console.rst create mode 100644 Documentation/driver-api/cxl/index.rst create mode 100644 Documentation/driver-api/cxl/memory-devices.rst create mode 100644 Documentation/driver-api/dcdbas.rst create mode 100644 Documentation/driver-api/devfreq.rst create mode 100644 Documentation/driver-api/device-io.rst create mode 100644 Documentation/driver-api/device_link.rst create mode 100644 Documentation/driver-api/dma-buf.rst create mode 100644 Documentation/driver-api/dmaengine/client.rst create mode 100644 Documentation/driver-api/dmaengine/dmatest.rst create mode 100644 Documentation/driver-api/dmaengine/index.rst create mode 100644 Documentation/driver-api/dmaengine/provider.rst create mode 100644 Documentation/driver-api/dmaengine/pxa_dma.rst create mode 100644 Documentation/driver-api/driver-model/binding.rst create mode 100644 Documentation/driver-api/driver-model/bus.rst create mode 100644 Documentation/driver-api/driver-model/design-patterns.rst create mode 100644 Documentation/driver-api/driver-model/device.rst create mode 100644 Documentation/driver-api/driver-model/devres.rst create mode 100644 Documentation/driver-api/driver-model/driver.rst create mode 100644 Documentation/driver-api/driver-model/index.rst create mode 100644 Documentation/driver-api/driver-model/overview.rst create mode 100644 Documentation/driver-api/driver-model/platform.rst create mode 100644 Documentation/driver-api/driver-model/porting.rst create mode 100644 Documentation/driver-api/early-userspace/buffer-format.rst create mode 100644 Documentation/driver-api/early-userspace/early_userspace_support.rst create mode 100644 Documentation/driver-api/early-userspace/index.rst create mode 100644 Documentation/driver-api/edac.rst create mode 100644 Documentation/driver-api/eisa.rst create mode 100644 Documentation/driver-api/firewire.rst create mode 100644 Documentation/driver-api/firmware/built-in-fw.rst create mode 100644 Documentation/driver-api/firmware/core.rst create mode 100644 Documentation/driver-api/firmware/direct-fs-lookup.rst create mode 100644 Documentation/driver-api/firmware/efi/index.rst create mode 100644 Documentation/driver-api/firmware/fallback-mechanisms.rst create mode 100644 Documentation/driver-api/firmware/firmware-usage-guidelines.rst create mode 100644 Documentation/driver-api/firmware/firmware_cache.rst create mode 100644 Documentation/driver-api/firmware/fw_search_path.rst create mode 100644 Documentation/driver-api/firmware/fw_upload.rst create mode 100644 Documentation/driver-api/firmware/index.rst create mode 100644 Documentation/driver-api/firmware/introduction.rst create mode 100644 Documentation/driver-api/firmware/lookup-order.rst create mode 100644 Documentation/driver-api/firmware/other_interfaces.rst create mode 100644 Documentation/driver-api/firmware/request_firmware.rst create mode 100644 Documentation/driver-api/fpga/fpga-bridge.rst create mode 100644 Documentation/driver-api/fpga/fpga-mgr.rst create mode 100644 Documentation/driver-api/fpga/fpga-programming.rst create mode 100644 Documentation/driver-api/fpga/fpga-region.rst create mode 100644 Documentation/driver-api/fpga/index.rst create mode 100644 Documentation/driver-api/fpga/intro.rst create mode 100644 Documentation/driver-api/frame-buffer.rst create mode 100644 Documentation/driver-api/generic-counter.rst create mode 100644 Documentation/driver-api/gpio/board.rst create mode 100644 Documentation/driver-api/gpio/bt8xxgpio.rst create mode 100644 Documentation/driver-api/gpio/consumer.rst create mode 100644 Documentation/driver-api/gpio/driver.rst create mode 100644 Documentation/driver-api/gpio/drivers-on-gpio.rst create mode 100644 Documentation/driver-api/gpio/index.rst create mode 100644 Documentation/driver-api/gpio/intro.rst create mode 100644 Documentation/driver-api/gpio/legacy.rst create mode 100644 Documentation/driver-api/gpio/using-gpio.rst create mode 100644 Documentation/driver-api/hsi.rst create mode 100644 Documentation/driver-api/hte/hte.rst create mode 100644 Documentation/driver-api/hte/index.rst create mode 100644 Documentation/driver-api/hte/tegra-hte.rst create mode 100644 Documentation/driver-api/i2c.rst create mode 100644 Documentation/driver-api/i3c/device-driver-api.rst create mode 100644 Documentation/driver-api/i3c/index.rst create mode 100644 Documentation/driver-api/i3c/master-driver-api.rst create mode 100644 Documentation/driver-api/i3c/protocol.rst create mode 100644 Documentation/driver-api/iio/buffers.rst create mode 100644 Documentation/driver-api/iio/core.rst create mode 100644 Documentation/driver-api/iio/hw-consumer.rst create mode 100644 Documentation/driver-api/iio/index.rst create mode 100644 Documentation/driver-api/iio/intro.rst create mode 100644 Documentation/driver-api/iio/triggered-buffers.rst create mode 100644 Documentation/driver-api/iio/triggers.rst create mode 100644 Documentation/driver-api/index.rst create mode 100644 Documentation/driver-api/infiniband.rst create mode 100644 Documentation/driver-api/infrastructure.rst create mode 100644 Documentation/driver-api/input.rst create mode 100644 Documentation/driver-api/interconnect.rst create mode 100644 Documentation/driver-api/io-mapping.rst create mode 100644 Documentation/driver-api/io_ordering.rst create mode 100644 Documentation/driver-api/ioctl.rst create mode 100644 Documentation/driver-api/ipmb.rst create mode 100644 Documentation/driver-api/ipmi.rst create mode 100644 Documentation/driver-api/isa.rst create mode 100644 Documentation/driver-api/isapnp.rst create mode 100644 Documentation/driver-api/libata.rst create mode 100644 Documentation/driver-api/mailbox.rst create mode 100644 Documentation/driver-api/md/index.rst create mode 100644 Documentation/driver-api/md/md-cluster.rst create mode 100644 Documentation/driver-api/md/raid5-cache.rst create mode 100644 Documentation/driver-api/md/raid5-ppl.rst create mode 100644 Documentation/driver-api/media/camera-sensor.rst create mode 100644 Documentation/driver-api/media/cec-core.rst create mode 100644 Documentation/driver-api/media/drivers/bttv-devel.rst create mode 100644 Documentation/driver-api/media/drivers/ccs/ccs-regs.asc create mode 100644 Documentation/driver-api/media/drivers/ccs/ccs.rst create mode 100755 Documentation/driver-api/media/drivers/ccs/mk-ccs-regs create mode 100644 Documentation/driver-api/media/drivers/contributors.rst create mode 100644 Documentation/driver-api/media/drivers/cx2341x-devel.rst create mode 100644 Documentation/driver-api/media/drivers/cx88-devel.rst create mode 100644 Documentation/driver-api/media/drivers/dvb-usb.rst create mode 100644 Documentation/driver-api/media/drivers/fimc-devel.rst create mode 100644 Documentation/driver-api/media/drivers/frontends.rst create mode 100644 Documentation/driver-api/media/drivers/index.rst create mode 100644 Documentation/driver-api/media/drivers/pvrusb2.rst create mode 100644 Documentation/driver-api/media/drivers/pxa_camera.rst create mode 100644 Documentation/driver-api/media/drivers/radiotrack.rst create mode 100644 Documentation/driver-api/media/drivers/rkisp1.rst create mode 100644 Documentation/driver-api/media/drivers/saa7134-devel.rst create mode 100644 Documentation/driver-api/media/drivers/sh_mobile_ceu_camera.rst create mode 100644 Documentation/driver-api/media/drivers/tuners.rst create mode 100644 Documentation/driver-api/media/drivers/vidtv.rst create mode 100644 Documentation/driver-api/media/drivers/vimc-devel.rst create mode 100644 Documentation/driver-api/media/drivers/zoran.rst create mode 100644 Documentation/driver-api/media/dtv-ca.rst create mode 100644 Documentation/driver-api/media/dtv-common.rst create mode 100644 Documentation/driver-api/media/dtv-core.rst create mode 100644 Documentation/driver-api/media/dtv-demux.rst create mode 100644 Documentation/driver-api/media/dtv-frontend.rst create mode 100644 Documentation/driver-api/media/dtv-net.rst create mode 100644 Documentation/driver-api/media/index.rst create mode 100644 Documentation/driver-api/media/maintainer-entry-profile.rst create mode 100644 Documentation/driver-api/media/mc-core.rst create mode 100644 Documentation/driver-api/media/rc-core.rst create mode 100644 Documentation/driver-api/media/tx-rx.rst create mode 100644 Documentation/driver-api/media/v4l2-async.rst create mode 100644 Documentation/driver-api/media/v4l2-cci.rst create mode 100644 Documentation/driver-api/media/v4l2-common.rst create mode 100644 Documentation/driver-api/media/v4l2-controls.rst create mode 100644 Documentation/driver-api/media/v4l2-core.rst create mode 100644 Documentation/driver-api/media/v4l2-dev.rst create mode 100644 Documentation/driver-api/media/v4l2-device.rst create mode 100644 Documentation/driver-api/media/v4l2-dv-timings.rst create mode 100644 Documentation/driver-api/media/v4l2-event.rst create mode 100644 Documentation/driver-api/media/v4l2-fh.rst create mode 100644 Documentation/driver-api/media/v4l2-flash-led-class.rst create mode 100644 Documentation/driver-api/media/v4l2-fwnode.rst create mode 100644 Documentation/driver-api/media/v4l2-intro.rst create mode 100644 Documentation/driver-api/media/v4l2-mc.rst create mode 100644 Documentation/driver-api/media/v4l2-mediabus.rst create mode 100644 Documentation/driver-api/media/v4l2-mem2mem.rst create mode 100644 Documentation/driver-api/media/v4l2-rect.rst create mode 100644 Documentation/driver-api/media/v4l2-subdev.rst create mode 100644 Documentation/driver-api/media/v4l2-tuner.rst create mode 100644 Documentation/driver-api/media/v4l2-tveeprom.rst create mode 100644 Documentation/driver-api/media/v4l2-videobuf.rst create mode 100644 Documentation/driver-api/media/v4l2-videobuf2.rst create mode 100644 Documentation/driver-api/mei/hdcp.rst create mode 100644 Documentation/driver-api/mei/iamt.rst create mode 100644 Documentation/driver-api/mei/index.rst create mode 100644 Documentation/driver-api/mei/mei-client-bus.rst create mode 100644 Documentation/driver-api/mei/mei.rst create mode 100644 Documentation/driver-api/mei/nfc.rst create mode 100644 Documentation/driver-api/memory-devices/index.rst create mode 100644 Documentation/driver-api/memory-devices/ti-emif.rst create mode 100644 Documentation/driver-api/memory-devices/ti-gpmc.rst create mode 100644 Documentation/driver-api/men-chameleon-bus.rst create mode 100644 Documentation/driver-api/message-based.rst create mode 100644 Documentation/driver-api/misc_devices.rst create mode 100644 Documentation/driver-api/miscellaneous.rst create mode 100644 Documentation/driver-api/mmc/index.rst create mode 100644 Documentation/driver-api/mmc/mmc-async-req.rst create mode 100644 Documentation/driver-api/mmc/mmc-dev-attrs.rst create mode 100644 Documentation/driver-api/mmc/mmc-dev-parts.rst create mode 100644 Documentation/driver-api/mmc/mmc-tools.rst create mode 100644 Documentation/driver-api/mtd/index.rst create mode 100644 Documentation/driver-api/mtd/nand_ecc.rst create mode 100644 Documentation/driver-api/mtd/spi-intel.rst create mode 100644 Documentation/driver-api/mtd/spi-nor.rst create mode 100644 Documentation/driver-api/mtdnand.rst create mode 100644 Documentation/driver-api/nfc/index.rst create mode 100644 Documentation/driver-api/nfc/nfc-hci.rst create mode 100644 Documentation/driver-api/nfc/nfc-pn544.rst create mode 100644 Documentation/driver-api/ntb.rst create mode 100644 Documentation/driver-api/nvdimm/btt.rst create mode 100644 Documentation/driver-api/nvdimm/firmware-activate.rst create mode 100644 Documentation/driver-api/nvdimm/index.rst create mode 100644 Documentation/driver-api/nvdimm/nvdimm.rst create mode 100644 Documentation/driver-api/nvdimm/security.rst create mode 100644 Documentation/driver-api/nvmem.rst create mode 100644 Documentation/driver-api/parport-lowlevel.rst create mode 100644 Documentation/driver-api/pci/index.rst create mode 100644 Documentation/driver-api/pci/p2pdma.rst create mode 100644 Documentation/driver-api/pci/pci.rst create mode 100644 Documentation/driver-api/phy/index.rst create mode 100644 Documentation/driver-api/phy/phy.rst create mode 100644 Documentation/driver-api/phy/samsung-usb2.rst create mode 100644 Documentation/driver-api/pin-control.rst create mode 100644 Documentation/driver-api/pldmfw/driver-ops.rst create mode 100644 Documentation/driver-api/pldmfw/file-format.rst create mode 100644 Documentation/driver-api/pldmfw/index.rst create mode 100644 Documentation/driver-api/pm/cpuidle.rst create mode 100644 Documentation/driver-api/pm/devices.rst create mode 100644 Documentation/driver-api/pm/index.rst create mode 100644 Documentation/driver-api/pm/notifiers.rst create mode 100644 Documentation/driver-api/pm/types.rst create mode 100644 Documentation/driver-api/pps.rst create mode 100644 Documentation/driver-api/ptp.rst create mode 100644 Documentation/driver-api/pwm.rst create mode 100644 Documentation/driver-api/rapidio/index.rst create mode 100644 Documentation/driver-api/rapidio/mport_cdev.rst create mode 100644 Documentation/driver-api/rapidio/rapidio.rst create mode 100644 Documentation/driver-api/rapidio/rio_cm.rst create mode 100644 Documentation/driver-api/rapidio/sysfs.rst create mode 100644 Documentation/driver-api/rapidio/tsi721.rst create mode 100644 Documentation/driver-api/regulator.rst create mode 100644 Documentation/driver-api/reset.rst create mode 100644 Documentation/driver-api/rfkill.rst create mode 100644 Documentation/driver-api/s390-drivers.rst create mode 100644 Documentation/driver-api/scsi.rst create mode 100644 Documentation/driver-api/serial/driver.rst create mode 100644 Documentation/driver-api/serial/index.rst create mode 100644 Documentation/driver-api/serial/serial-iso7816.rst create mode 100644 Documentation/driver-api/serial/serial-rs485.rst create mode 100644 Documentation/driver-api/slimbus.rst create mode 100644 Documentation/driver-api/sm501.rst create mode 100644 Documentation/driver-api/soundwire/error_handling.rst create mode 100644 Documentation/driver-api/soundwire/index.rst create mode 100644 Documentation/driver-api/soundwire/locking.rst create mode 100644 Documentation/driver-api/soundwire/stream.rst create mode 100644 Documentation/driver-api/soundwire/summary.rst create mode 100644 Documentation/driver-api/spi.rst create mode 100644 Documentation/driver-api/surface_aggregator/client-api.rst create mode 100644 Documentation/driver-api/surface_aggregator/client.rst create mode 100644 Documentation/driver-api/surface_aggregator/clients/cdev.rst create mode 100644 Documentation/driver-api/surface_aggregator/clients/dtx.rst create mode 100644 Documentation/driver-api/surface_aggregator/clients/index.rst create mode 100644 Documentation/driver-api/surface_aggregator/clients/san.rst create mode 100644 Documentation/driver-api/surface_aggregator/index.rst create mode 100644 Documentation/driver-api/surface_aggregator/internal-api.rst create mode 100644 Documentation/driver-api/surface_aggregator/internal.rst create mode 100644 Documentation/driver-api/surface_aggregator/overview.rst create mode 100644 Documentation/driver-api/surface_aggregator/ssh.rst create mode 100644 Documentation/driver-api/switchtec.rst create mode 100644 Documentation/driver-api/sync_file.rst create mode 100644 Documentation/driver-api/target.rst create mode 100644 Documentation/driver-api/thermal/cpu-cooling-api.rst create mode 100644 Documentation/driver-api/thermal/cpu-idle-cooling.rst create mode 100644 Documentation/driver-api/thermal/exynos_thermal.rst create mode 100644 Documentation/driver-api/thermal/exynos_thermal_emulation.rst create mode 100644 Documentation/driver-api/thermal/index.rst create mode 100644 Documentation/driver-api/thermal/intel_dptf.rst create mode 100644 Documentation/driver-api/thermal/nouveau_thermal.rst create mode 100644 Documentation/driver-api/thermal/power_allocator.rst create mode 100644 Documentation/driver-api/thermal/sysfs-api.rst create mode 100644 Documentation/driver-api/thermal/x86_pkg_temperature_thermal.rst create mode 100644 Documentation/driver-api/tty/index.rst create mode 100644 Documentation/driver-api/tty/moxa-smartio.rst create mode 100644 Documentation/driver-api/tty/n_gsm.rst create mode 100644 Documentation/driver-api/tty/n_tty.rst create mode 100644 Documentation/driver-api/tty/tty_buffer.rst create mode 100644 Documentation/driver-api/tty/tty_driver.rst create mode 100644 Documentation/driver-api/tty/tty_internals.rst create mode 100644 Documentation/driver-api/tty/tty_ldisc.rst create mode 100644 Documentation/driver-api/tty/tty_port.rst create mode 100644 Documentation/driver-api/tty/tty_struct.rst create mode 100644 Documentation/driver-api/uio-howto.rst create mode 100644 Documentation/driver-api/usb/URB.rst create mode 100644 Documentation/driver-api/usb/anchors.rst create mode 100644 Documentation/driver-api/usb/bulk-streams.rst create mode 100644 Documentation/driver-api/usb/callbacks.rst create mode 100644 Documentation/driver-api/usb/dma.rst create mode 100644 Documentation/driver-api/usb/dwc3.rst create mode 100644 Documentation/driver-api/usb/error-codes.rst create mode 100644 Documentation/driver-api/usb/gadget.rst create mode 100644 Documentation/driver-api/usb/hotplug.rst create mode 100644 Documentation/driver-api/usb/index.rst create mode 100644 Documentation/driver-api/usb/persist.rst create mode 100644 Documentation/driver-api/usb/power-management.rst create mode 100644 Documentation/driver-api/usb/typec.rst create mode 100644 Documentation/driver-api/usb/typec_bus.rst create mode 100644 Documentation/driver-api/usb/usb.rst create mode 100644 Documentation/driver-api/usb/usb3-debug-port.rst create mode 100644 Documentation/driver-api/usb/writing_musb_glue_layer.rst create mode 100644 Documentation/driver-api/usb/writing_usb_driver.rst create mode 100644 Documentation/driver-api/vfio-mediated-device.rst create mode 100644 Documentation/driver-api/vfio-pci-device-specific-driver-acceptance.rst create mode 100644 Documentation/driver-api/vfio.rst create mode 100644 Documentation/driver-api/virtio/index.rst create mode 100644 Documentation/driver-api/virtio/virtio.rst create mode 100644 Documentation/driver-api/virtio/writing_virtio_drivers.rst create mode 100644 Documentation/driver-api/vme.rst create mode 100644 Documentation/driver-api/w1.rst create mode 100644 Documentation/driver-api/wmi.rst create mode 100644 Documentation/driver-api/xilinx/eemi.rst create mode 100644 Documentation/driver-api/xilinx/index.rst create mode 100644 Documentation/driver-api/xillybus.rst create mode 100644 Documentation/driver-api/zorro.rst (limited to 'Documentation/driver-api') diff --git a/Documentation/driver-api/80211/cfg80211.rst b/Documentation/driver-api/80211/cfg80211.rst new file mode 100644 index 000000000..836f609c3 --- /dev/null +++ b/Documentation/driver-api/80211/cfg80211.rst @@ -0,0 +1,178 @@ +================== +cfg80211 subsystem +================== + +.. kernel-doc:: include/net/cfg80211.h + :doc: Introduction + +Device registration +=================== + +.. kernel-doc:: include/net/cfg80211.h + :doc: Device registration + +.. kernel-doc:: include/net/cfg80211.h + :functions: + ieee80211_channel_flags + ieee80211_channel + ieee80211_rate_flags + ieee80211_rate + ieee80211_sta_ht_cap + ieee80211_supported_band + cfg80211_signal_type + wiphy_params_flags + wiphy_flags + wiphy + wireless_dev + wiphy_new + wiphy_read_of_freq_limits + wiphy_register + wiphy_unregister + wiphy_free + wiphy_name + wiphy_dev + wiphy_priv + priv_to_wiphy + set_wiphy_dev + wdev_priv + ieee80211_iface_limit + ieee80211_iface_combination + cfg80211_check_combinations + +Actions and configuration +========================= + +.. kernel-doc:: include/net/cfg80211.h + :doc: Actions and configuration + +.. kernel-doc:: include/net/cfg80211.h + :functions: + cfg80211_ops + vif_params + key_params + survey_info_flags + survey_info + cfg80211_beacon_data + cfg80211_ap_settings + station_parameters + rate_info_flags + rate_info + station_info + monitor_flags + mpath_info_flags + mpath_info + bss_parameters + ieee80211_txq_params + cfg80211_crypto_settings + cfg80211_auth_request + cfg80211_assoc_request + cfg80211_deauth_request + cfg80211_disassoc_request + cfg80211_ibss_params + cfg80211_connect_params + cfg80211_pmksa + cfg80211_rx_mlme_mgmt + cfg80211_auth_timeout + cfg80211_rx_assoc_resp + cfg80211_assoc_timeout + cfg80211_tx_mlme_mgmt + cfg80211_ibss_joined + cfg80211_connect_resp_params + cfg80211_connect_done + cfg80211_connect_result + cfg80211_connect_bss + cfg80211_connect_timeout + cfg80211_roamed + cfg80211_disconnected + cfg80211_ready_on_channel + cfg80211_remain_on_channel_expired + cfg80211_new_sta + cfg80211_rx_mgmt + cfg80211_mgmt_tx_status + cfg80211_cqm_rssi_notify + cfg80211_cqm_pktloss_notify + cfg80211_michael_mic_failure + +Scanning and BSS list handling +============================== + +.. kernel-doc:: include/net/cfg80211.h + :doc: Scanning and BSS list handling + +.. kernel-doc:: include/net/cfg80211.h + :functions: + cfg80211_ssid + cfg80211_scan_request + cfg80211_scan_done + cfg80211_bss + cfg80211_inform_bss + cfg80211_inform_bss_frame_data + cfg80211_inform_bss_data + cfg80211_unlink_bss + cfg80211_find_ie + ieee80211_bss_get_ie + +Utility functions +================= + +.. kernel-doc:: include/net/cfg80211.h + :doc: Utility functions + +.. kernel-doc:: include/net/cfg80211.h + :functions: + ieee80211_channel_to_frequency + ieee80211_frequency_to_channel + ieee80211_get_channel + ieee80211_get_response_rate + ieee80211_hdrlen + ieee80211_get_hdrlen_from_skb + ieee80211_radiotap_iterator + +Data path helpers +================= + +.. kernel-doc:: include/net/cfg80211.h + :doc: Data path helpers + +.. kernel-doc:: include/net/cfg80211.h + :functions: + ieee80211_data_to_8023 + ieee80211_amsdu_to_8023s + cfg80211_classify8021d + +Regulatory enforcement infrastructure +===================================== + +.. kernel-doc:: include/net/cfg80211.h + :doc: Regulatory enforcement infrastructure + +.. kernel-doc:: include/net/cfg80211.h + :functions: + regulatory_hint + wiphy_apply_custom_regulatory + freq_reg_info + +RFkill integration +================== + +.. kernel-doc:: include/net/cfg80211.h + :doc: RFkill integration + +.. kernel-doc:: include/net/cfg80211.h + :functions: + wiphy_rfkill_set_hw_state + wiphy_rfkill_start_polling + wiphy_rfkill_stop_polling + +Test mode +========= + +.. kernel-doc:: include/net/cfg80211.h + :doc: Test mode + +.. kernel-doc:: include/net/cfg80211.h + :functions: + cfg80211_testmode_alloc_reply_skb + cfg80211_testmode_reply + cfg80211_testmode_alloc_event_skb + cfg80211_testmode_event diff --git a/Documentation/driver-api/80211/index.rst b/Documentation/driver-api/80211/index.rst new file mode 100644 index 000000000..af210859d --- /dev/null +++ b/Documentation/driver-api/80211/index.rst @@ -0,0 +1,17 @@ +===================================== +Linux 802.11 Driver Developer's Guide +===================================== + +.. toctree:: + + introduction + cfg80211 + mac80211 + mac80211-advanced + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/driver-api/80211/introduction.rst b/Documentation/driver-api/80211/introduction.rst new file mode 100644 index 000000000..4938fa876 --- /dev/null +++ b/Documentation/driver-api/80211/introduction.rst @@ -0,0 +1,17 @@ +============ +Introduction +============ + +Explaining wireless 802.11 networking in the Linux kernel + +Copyright 2007-2009 Johannes Berg + +These books attempt to give a description of the various subsystems +that play a role in 802.11 wireless networking in Linux. Since these +books are for kernel developers they attempts to document the +structures and functions used in the kernel as well as giving a +higher-level overview. + +The reader is expected to be familiar with the 802.11 standard as +published by the IEEE in 802.11-2007 (or possibly later versions). +References to this standard will be given as "802.11-2007 8.1.5". diff --git a/Documentation/driver-api/80211/mac80211-advanced.rst b/Documentation/driver-api/80211/mac80211-advanced.rst new file mode 100644 index 000000000..f8df7b3af --- /dev/null +++ b/Documentation/driver-api/80211/mac80211-advanced.rst @@ -0,0 +1,239 @@ +============================= +mac80211 subsystem (advanced) +============================= + +Information contained within this part of the book is of interest only +for advanced interaction of mac80211 with drivers to exploit more +hardware capabilities and improve performance. + +LED support +=========== + +Mac80211 supports various ways of blinking LEDs. Wherever possible, +device LEDs should be exposed as LED class devices and hooked up to the +appropriate trigger, which will then be triggered appropriately by +mac80211. + +.. kernel-doc:: include/net/mac80211.h + :functions: + ieee80211_get_tx_led_name + ieee80211_get_rx_led_name + ieee80211_get_assoc_led_name + ieee80211_get_radio_led_name + ieee80211_tpt_blink + ieee80211_tpt_led_trigger_flags + ieee80211_create_tpt_led_trigger + +Hardware crypto acceleration +============================ + +.. kernel-doc:: include/net/mac80211.h + :doc: Hardware crypto acceleration + +.. kernel-doc:: include/net/mac80211.h + :functions: + set_key_cmd + ieee80211_key_conf + ieee80211_key_flags + ieee80211_get_tkip_p1k + ieee80211_get_tkip_p1k_iv + ieee80211_get_tkip_p2k + +Powersave support +================= + +.. kernel-doc:: include/net/mac80211.h + :doc: Powersave support + +Beacon filter support +===================== + +.. kernel-doc:: include/net/mac80211.h + :doc: Beacon filter support + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_beacon_loss + +Multiple queues and QoS support +=============================== + +TBD + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_tx_queue_params + +Access point mode support +========================= + +TBD + +Some parts of the if_conf should be discussed here instead + +Insert notes about VLAN interfaces with hw crypto here or in the hw +crypto chapter. + +support for powersaving clients +------------------------------- + +.. kernel-doc:: include/net/mac80211.h + :doc: AP support for powersaving clients + +.. kernel-doc:: include/net/mac80211.h + :functions: + ieee80211_get_buffered_bc + ieee80211_beacon_get + ieee80211_sta_eosp + ieee80211_frame_release_type + ieee80211_sta_ps_transition + ieee80211_sta_ps_transition_ni + ieee80211_sta_set_buffered + ieee80211_sta_block_awake + +Supporting multiple virtual interfaces +====================================== + +TBD + +Note: WDS with identical MAC address should almost always be OK + +Insert notes about having multiple virtual interfaces with different MAC +addresses here, note which configurations are supported by mac80211, add +notes about supporting hw crypto with it. + +.. kernel-doc:: include/net/mac80211.h + :functions: + ieee80211_iterate_active_interfaces + ieee80211_iterate_active_interfaces_atomic + +Station handling +================ + +TODO + +.. kernel-doc:: include/net/mac80211.h + :functions: + ieee80211_sta + sta_notify_cmd + ieee80211_find_sta + ieee80211_find_sta_by_ifaddr + +Hardware scan offload +===================== + +TBD + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_scan_completed + +Aggregation +=========== + +TX A-MPDU aggregation +--------------------- + +.. kernel-doc:: net/mac80211/agg-tx.c + :doc: TX A-MPDU aggregation + +.. WARNING: DOCPROC directive not supported: !Cnet/mac80211/agg-tx.c + +RX A-MPDU aggregation +--------------------- + +.. kernel-doc:: net/mac80211/agg-rx.c + :doc: RX A-MPDU aggregation + +.. WARNING: DOCPROC directive not supported: !Cnet/mac80211/agg-rx.c + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_ampdu_mlme_action + +Spatial Multiplexing Powersave (SMPS) +===================================== + +.. kernel-doc:: include/net/mac80211.h + :doc: Spatial multiplexing power save + +.. kernel-doc:: include/net/mac80211.h + :functions: + ieee80211_request_smps + ieee80211_smps_mode + +TBD + +This part of the book describes the rate control algorithm interface and +how it relates to mac80211 and drivers. + +Rate Control API +================ + +TBD + +.. kernel-doc:: include/net/mac80211.h + :functions: + ieee80211_start_tx_ba_session + ieee80211_start_tx_ba_cb_irqsafe + ieee80211_stop_tx_ba_session + ieee80211_stop_tx_ba_cb_irqsafe + ieee80211_rate_control_changed + ieee80211_tx_rate_control + +TBD + +This part of the book describes mac80211 internals. + +Key handling +============ + +Key handling basics +------------------- + +.. kernel-doc:: net/mac80211/key.c + :doc: Key handling basics + +MORE TBD +-------- + +TBD + +Receive processing +================== + +TBD + +Transmit processing +=================== + +TBD + +Station info handling +===================== + +Programming information +----------------------- + +.. kernel-doc:: net/mac80211/sta_info.h + :functions: + sta_info + ieee80211_sta_info_flags + +STA information lifetime rules +------------------------------ + +.. kernel-doc:: net/mac80211/sta_info.c + :doc: STA information lifetime rules + +Aggregation Functions +===================== + +.. kernel-doc:: net/mac80211/sta_info.h + :functions: + sta_ampdu_mlme + tid_ampdu_tx + tid_ampdu_rx + +Synchronisation Functions +========================= + +TBD + +Locking, lots of RCU diff --git a/Documentation/driver-api/80211/mac80211.rst b/Documentation/driver-api/80211/mac80211.rst new file mode 100644 index 000000000..67d2e58b4 --- /dev/null +++ b/Documentation/driver-api/80211/mac80211.rst @@ -0,0 +1,155 @@ +=========================== +mac80211 subsystem (basics) +=========================== + +You should read and understand the information contained within this +part of the book while implementing a mac80211 driver. In some chapters, +advanced usage is noted, those may be skipped if this isn't needed. + +This part of the book only covers station and monitor mode +functionality, additional information required to implement the other +modes is covered in the second part of the book. + +Basic hardware handling +======================= + +TBD + +This chapter shall contain information on getting a hw struct allocated +and registered with mac80211. + +Since it is required to allocate rates/modes before registering a hw +struct, this chapter shall also contain information on setting up the +rate/mode structs. + +Additionally, some discussion about the callbacks and the general +programming model should be in here, including the definition of +ieee80211_ops which will be referred to a lot. + +Finally, a discussion of hardware capabilities should be done with +references to other parts of the book. + +.. kernel-doc:: include/net/mac80211.h + :functions: + ieee80211_hw + ieee80211_hw_flags + SET_IEEE80211_DEV + SET_IEEE80211_PERM_ADDR + ieee80211_ops + ieee80211_alloc_hw + ieee80211_register_hw + ieee80211_unregister_hw + ieee80211_free_hw + +PHY configuration +================= + +TBD + +This chapter should describe PHY handling including start/stop callbacks +and the various structures used. + +.. kernel-doc:: include/net/mac80211.h + :functions: + ieee80211_conf + ieee80211_conf_flags + +Virtual interfaces +================== + +TBD + +This chapter should describe virtual interface basics that are relevant +to the driver (VLANs, MGMT etc are not.) It should explain the use of +the add_iface/remove_iface callbacks as well as the interface +configuration callbacks. + +Things related to AP mode should be discussed there. + +Things related to supporting multiple interfaces should be in the +appropriate chapter, a BIG FAT note should be here about this though and +the recommendation to allow only a single interface in STA mode at +first! + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_vif + +Receive and transmit processing +=============================== + +what should be here +------------------- + +TBD + +This should describe the receive and transmit paths in mac80211/the +drivers as well as transmit status handling. + +Frame format +------------ + +.. kernel-doc:: include/net/mac80211.h + :doc: Frame format + +Packet alignment +---------------- + +.. kernel-doc:: net/mac80211/rx.c + :doc: Packet alignment + +Calling into mac80211 from interrupts +------------------------------------- + +.. kernel-doc:: include/net/mac80211.h + :doc: Calling mac80211 from interrupts + +functions/definitions +--------------------- + +.. kernel-doc:: include/net/mac80211.h + :functions: + ieee80211_rx_status + mac80211_rx_encoding_flags + mac80211_rx_flags + mac80211_tx_info_flags + mac80211_tx_control_flags + mac80211_rate_control_flags + ieee80211_tx_rate + ieee80211_tx_info + ieee80211_tx_info_clear_status + ieee80211_rx + ieee80211_rx_ni + ieee80211_rx_irqsafe + ieee80211_tx_status + ieee80211_tx_status_ni + ieee80211_tx_status_irqsafe + ieee80211_rts_get + ieee80211_rts_duration + ieee80211_ctstoself_get + ieee80211_ctstoself_duration + ieee80211_generic_frame_duration + ieee80211_wake_queue + ieee80211_stop_queue + ieee80211_wake_queues + ieee80211_stop_queues + ieee80211_queue_stopped + +Frame filtering +=============== + +.. kernel-doc:: include/net/mac80211.h + :doc: Frame filtering + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_filter_flags + +The mac80211 workqueue +====================== + +.. kernel-doc:: include/net/mac80211.h + :doc: mac80211 workqueue + +.. kernel-doc:: include/net/mac80211.h + :functions: + ieee80211_queue_work + ieee80211_queue_delayed_work diff --git a/Documentation/driver-api/acpi/index.rst b/Documentation/driver-api/acpi/index.rst new file mode 100644 index 000000000..ace0008e5 --- /dev/null +++ b/Documentation/driver-api/acpi/index.rst @@ -0,0 +1,9 @@ +============ +ACPI Support +============ + +.. toctree:: + :maxdepth: 2 + + linuxized-acpica + scan_handlers diff --git a/Documentation/driver-api/acpi/linuxized-acpica.rst b/Documentation/driver-api/acpi/linuxized-acpica.rst new file mode 100644 index 000000000..cc234353d --- /dev/null +++ b/Documentation/driver-api/acpi/linuxized-acpica.rst @@ -0,0 +1,279 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: + +============================================================ +Linuxized ACPICA - Introduction to ACPICA Release Automation +============================================================ + +:Copyright: |copy| 2013-2016, Intel Corporation + +:Author: Lv Zheng + + +Abstract +======== +This document describes the ACPICA project and the relationship between +ACPICA and Linux. It also describes how ACPICA code in drivers/acpi/acpica, +include/acpi and tools/power/acpi is automatically updated to follow the +upstream. + +ACPICA Project +============== + +The ACPI Component Architecture (ACPICA) project provides an operating +system (OS)-independent reference implementation of the Advanced +Configuration and Power Interface Specification (ACPI). It has been +adapted by various host OSes. By directly integrating ACPICA, Linux can +also benefit from the application experiences of ACPICA from other host +OSes. + +The homepage of ACPICA project is: www.acpica.org, it is maintained and +supported by Intel Corporation. + +The following figure depicts the Linux ACPI subsystem where the ACPICA +adaptation is included:: + + +---------------------------------------------------------+ + | | + | +---------------------------------------------------+ | + | | +------------------+ | | + | | | Table Management | | | + | | +------------------+ | | + | | +----------------------+ | | + | | | Namespace Management | | | + | | +----------------------+ | | + | | +------------------+ ACPICA Components | | + | | | Event Management | | | + | | +------------------+ | | + | | +---------------------+ | | + | | | Resource Management | | | + | | +---------------------+ | | + | | +---------------------+ | | + | | | Hardware Management | | | + | | +---------------------+ | | + | +---------------------------------------------------+ | | + | | | +------------------+ | | | + | | | | OS Service Layer | | | | + | | | +------------------+ | | | + | | +-------------------------------------------------|-+ | + | | +--------------------+ | | + | | | Device Enumeration | | | + | | +--------------------+ | | + | | +------------------+ | | + | | | Power Management | | | + | | +------------------+ Linux/ACPI Components | | + | | +--------------------+ | | + | | | Thermal Management | | | + | | +--------------------+ | | + | | +--------------------------+ | | + | | | Drivers for ACPI Devices | | | + | | +--------------------------+ | | + | | +--------+ | | + | | | ...... | | | + | | +--------+ | | + | +---------------------------------------------------+ | + | | + +---------------------------------------------------------+ + + Figure 1. Linux ACPI Software Components + +.. note:: + A. OS Service Layer - Provided by Linux to offer OS dependent + implementation of the predefined ACPICA interfaces (acpi_os_*). + :: + + include/acpi/acpiosxf.h + drivers/acpi/osl.c + include/acpi/platform + include/asm/acenv.h + B. ACPICA Functionality - Released from ACPICA code base to offer + OS independent implementation of the ACPICA interfaces (acpi_*). + :: + + drivers/acpi/acpica + include/acpi/ac*.h + tools/power/acpi + C. Linux/ACPI Functionality - Providing Linux specific ACPI + functionality to the other Linux kernel subsystems and user space + programs. + :: + + drivers/acpi + include/linux/acpi.h + include/linux/acpi*.h + include/acpi + tools/power/acpi + D. Architecture Specific ACPICA/ACPI Functionalities - Provided by the + ACPI subsystem to offer architecture specific implementation of the + ACPI interfaces. They are Linux specific components and are out of + the scope of this document. + :: + + include/asm/acpi.h + include/asm/acpi*.h + arch/*/acpi + +ACPICA Release +============== + +The ACPICA project maintains its code base at the following repository URL: +https://github.com/acpica/acpica.git. As a rule, a release is made every +month. + +As the coding style adopted by the ACPICA project is not acceptable by +Linux, there is a release process to convert the ACPICA git commits into +Linux patches. The patches generated by this process are referred to as +"linuxized ACPICA patches". The release process is carried out on a local +copy the ACPICA git repository. Each commit in the monthly release is +converted into a linuxized ACPICA patch. Together, they form the monthly +ACPICA release patchset for the Linux ACPI community. This process is +illustrated in the following figure:: + + +-----------------------------+ + | acpica / master (-) commits | + +-----------------------------+ + /|\ | + | \|/ + | /---------------------\ +----------------------+ + | < Linuxize repo Utility >-->| old linuxized acpica |--+ + | \---------------------/ +----------------------+ | + | | + /---------\ | + < git reset > \ + \---------/ \ + /|\ /+-+ + | / | + +-----------------------------+ | | + | acpica / master (+) commits | | | + +-----------------------------+ | | + | | | + \|/ | | + /-----------------------\ +----------------------+ | | + < Linuxize repo Utilities >-->| new linuxized acpica |--+ | + \-----------------------/ +----------------------+ | + \|/ + +--------------------------+ /----------------------\ + | Linuxized ACPICA Patches |<----------------< Linuxize patch Utility > + +--------------------------+ \----------------------/ + | + \|/ + /---------------------------\ + < Linux ACPI Community Review > + \---------------------------/ + | + \|/ + +-----------------------+ /------------------\ +----------------+ + | linux-pm / linux-next |-->< Linux Merge Window >-->| linux / master | + +-----------------------+ \------------------/ +----------------+ + + Figure 2. ACPICA -> Linux Upstream Process + +.. note:: + A. Linuxize Utilities - Provided by the ACPICA repository, including a + utility located in source/tools/acpisrc folder and a number of + scripts located in generate/linux folder. + B. acpica / master - "master" branch of the git repository at + . + C. linux-pm / linux-next - "linux-next" branch of the git repository at + . + D. linux / master - "master" branch of the git repository at + . + + Before the linuxized ACPICA patches are sent to the Linux ACPI community + for review, there is a quality assurance build test process to reduce + porting issues. Currently this build process only takes care of the + following kernel configuration options: + CONFIG_ACPI/CONFIG_ACPI_DEBUG/CONFIG_ACPI_DEBUGGER + +ACPICA Divergences +================== + +Ideally, all of the ACPICA commits should be converted into Linux patches +automatically without manual modifications, the "linux / master" tree should +contain the ACPICA code that exactly corresponds to the ACPICA code +contained in "new linuxized acpica" tree and it should be possible to run +the release process fully automatically. + +As a matter of fact, however, there are source code differences between +the ACPICA code in Linux and the upstream ACPICA code, referred to as +"ACPICA Divergences". + +The various sources of ACPICA divergences include: + 1. Legacy divergences - Before the current ACPICA release process was + established, there already had been divergences between Linux and + ACPICA. Over the past several years those divergences have been greatly + reduced, but there still are several ones and it takes time to figure + out the underlying reasons for their existence. + 2. Manual modifications - Any manual modification (eg. coding style fixes) + made directly in the Linux sources obviously hurts the ACPICA release + automation. Thus it is recommended to fix such issues in the ACPICA + upstream source code and generate the linuxized fix using the ACPICA + release utilities (please refer to Section 4 below for the details). + 3. Linux specific features - Sometimes it's impossible to use the + current ACPICA APIs to implement features required by the Linux kernel, + so Linux developers occasionally have to change ACPICA code directly. + Those changes may not be acceptable by ACPICA upstream and in such cases + they are left as committed ACPICA divergences unless the ACPICA side can + implement new mechanisms as replacements for them. + 4. ACPICA release fixups - ACPICA only tests commits using a set of the + user space simulation utilities, thus the linuxized ACPICA patches may + break the Linux kernel, leaving us build/boot failures. In order to + avoid breaking Linux bisection, fixes are applied directly to the + linuxized ACPICA patches during the release process. When the release + fixups are backported to the upstream ACPICA sources, they must follow + the upstream ACPICA rules and so further modifications may appear. + That may result in the appearance of new divergences. + 5. Fast tracking of ACPICA commits - Some ACPICA commits are regression + fixes or stable-candidate material, so they are applied in advance with + respect to the ACPICA release process. If such commits are reverted or + rebased on the ACPICA side in order to offer better solutions, new ACPICA + divergences are generated. + +ACPICA Development +================== + +This paragraph guides Linux developers to use the ACPICA upstream release +utilities to obtain Linux patches corresponding to upstream ACPICA commits +before they become available from the ACPICA release process. + + 1. Cherry-pick an ACPICA commit + + First you need to git clone the ACPICA repository and the ACPICA change + you want to cherry pick must be committed into the local repository. + + Then the gen-patch.sh command can help to cherry-pick an ACPICA commit + from the ACPICA local repository:: + + $ git clone https://github.com/acpica/acpica + $ cd acpica + $ generate/linux/gen-patch.sh -u [commit ID] + + Here the commit ID is the ACPICA local repository commit ID you want to + cherry pick. It can be omitted if the commit is "HEAD". + + 2. Cherry-pick recent ACPICA commits + + Sometimes you need to rebase your code on top of the most recent ACPICA + changes that haven't been applied to Linux yet. + + You can generate the ACPICA release series yourself and rebase your code on + top of the generated ACPICA release patches:: + + $ git clone https://github.com/acpica/acpica + $ cd acpica + $ generate/linux/make-patches.sh -u [commit ID] + + The commit ID should be the last ACPICA commit accepted by Linux. Usually, + it is the commit modifying ACPI_CA_VERSION. It can be found by executing + "git blame source/include/acpixf.h" and referencing the line that contains + "ACPI_CA_VERSION". + + 3. Inspect the current divergences + + If you have local copies of both Linux and upstream ACPICA, you can generate + a diff file indicating the state of the current divergences:: + + # git clone https://github.com/acpica/acpica + # git clone https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git + # cd acpica + # generate/linux/divergence.sh -s ../linux diff --git a/Documentation/driver-api/acpi/scan_handlers.rst b/Documentation/driver-api/acpi/scan_handlers.rst new file mode 100644 index 000000000..7a197b3a3 --- /dev/null +++ b/Documentation/driver-api/acpi/scan_handlers.rst @@ -0,0 +1,83 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: + +================== +ACPI Scan Handlers +================== + +:Copyright: |copy| 2012, Intel Corporation + +:Author: Rafael J. Wysocki + +During system initialization and ACPI-based device hot-add, the ACPI namespace +is scanned in search of device objects that generally represent various pieces +of hardware. This causes a struct acpi_device object to be created and +registered with the driver core for every device object in the ACPI namespace +and the hierarchy of those struct acpi_device objects reflects the namespace +layout (i.e. parent device objects in the namespace are represented by parent +struct acpi_device objects and analogously for their children). Those struct +acpi_device objects are referred to as "device nodes" in what follows, but they +should not be confused with struct device_node objects used by the Device Trees +parsing code (although their role is analogous to the role of those objects). + +During ACPI-based device hot-remove device nodes representing pieces of hardware +being removed are unregistered and deleted. + +The core ACPI namespace scanning code in drivers/acpi/scan.c carries out basic +initialization of device nodes, such as retrieving common configuration +information from the device objects represented by them and populating them with +appropriate data, but some of them require additional handling after they have +been registered. For example, if the given device node represents a PCI host +bridge, its registration should cause the PCI bus under that bridge to be +enumerated and PCI devices on that bus to be registered with the driver core. +Similarly, if the device node represents a PCI interrupt link, it is necessary +to configure that link so that the kernel can use it. + +Those additional configuration tasks usually depend on the type of the hardware +component represented by the given device node which can be determined on the +basis of the device node's hardware ID (HID). They are performed by objects +called ACPI scan handlers represented by the following structure:: + + struct acpi_scan_handler { + const struct acpi_device_id *ids; + struct list_head list_node; + int (*attach)(struct acpi_device *dev, const struct acpi_device_id *id); + void (*detach)(struct acpi_device *dev); + }; + +where ids is the list of IDs of device nodes the given handler is supposed to +take care of, list_node is the hook to the global list of ACPI scan handlers +maintained by the ACPI core and the .attach() and .detach() callbacks are +executed, respectively, after registration of new device nodes and before +unregistration of device nodes the handler attached to previously. + +The namespace scanning function, acpi_bus_scan(), first registers all of the +device nodes in the given namespace scope with the driver core. Then, it tries +to match a scan handler against each of them using the ids arrays of the +available scan handlers. If a matching scan handler is found, its .attach() +callback is executed for the given device node. If that callback returns 1, +that means that the handler has claimed the device node and is now responsible +for carrying out any additional configuration tasks related to it. It also will +be responsible for preparing the device node for unregistration in that case. +The device node's handler field is then populated with the address of the scan +handler that has claimed it. + +If the .attach() callback returns 0, it means that the device node is not +interesting to the given scan handler and may be matched against the next scan +handler in the list. If it returns a (negative) error code, that means that +the namespace scan should be terminated due to a serious error. The error code +returned should then reflect the type of the error. + +The namespace trimming function, acpi_bus_trim(), first executes .detach() +callbacks from the scan handlers of all device nodes in the given namespace +scope (if they have scan handlers). Next, it unregisters all of the device +nodes in that scope. + +ACPI scan handlers can be added to the list maintained by the ACPI core with the +help of the acpi_scan_add_handler() function taking a pointer to the new scan +handler as an argument. The order in which scan handlers are added to the list +is the order in which they are matched against device nodes during namespace +scans. + +All scan handles must be added to the list before acpi_bus_scan() is run for the +first time and they cannot be removed from it. diff --git a/Documentation/driver-api/aperture.rst b/Documentation/driver-api/aperture.rst new file mode 100644 index 000000000..d173f4e7a --- /dev/null +++ b/Documentation/driver-api/aperture.rst @@ -0,0 +1,13 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Managing Ownership of the Framebuffer Aperture +============================================== + +.. kernel-doc:: drivers/video/aperture.c + :doc: overview + +.. kernel-doc:: include/linux/aperture.h + :internal: + +.. kernel-doc:: drivers/video/aperture.c + :export: diff --git a/Documentation/driver-api/auxiliary_bus.rst b/Documentation/driver-api/auxiliary_bus.rst new file mode 100644 index 000000000..cec84908f --- /dev/null +++ b/Documentation/driver-api/auxiliary_bus.rst @@ -0,0 +1,50 @@ +.. SPDX-License-Identifier: GPL-2.0-only + +.. _auxiliary_bus: + +============= +Auxiliary Bus +============= + +.. kernel-doc:: drivers/base/auxiliary.c + :doc: PURPOSE + +When Should the Auxiliary Bus Be Used +===================================== + +.. kernel-doc:: drivers/base/auxiliary.c + :doc: USAGE + + +Auxiliary Device Creation +========================= + +.. kernel-doc:: include/linux/auxiliary_bus.h + :identifiers: auxiliary_device + +.. kernel-doc:: drivers/base/auxiliary.c + :identifiers: auxiliary_device_init __auxiliary_device_add + auxiliary_find_device + +Auxiliary Device Memory Model and Lifespan +------------------------------------------ + +.. kernel-doc:: include/linux/auxiliary_bus.h + :doc: DEVICE_LIFESPAN + + +Auxiliary Drivers +================= + +.. kernel-doc:: include/linux/auxiliary_bus.h + :identifiers: auxiliary_driver module_auxiliary_driver + +.. kernel-doc:: drivers/base/auxiliary.c + :identifiers: __auxiliary_driver_register auxiliary_driver_unregister + +Example Usage +============= + +.. kernel-doc:: drivers/base/auxiliary.c + :doc: EXAMPLE + diff --git a/Documentation/driver-api/backlight/lp855x-driver.rst b/Documentation/driver-api/backlight/lp855x-driver.rst new file mode 100644 index 000000000..1e0b224fc --- /dev/null +++ b/Documentation/driver-api/backlight/lp855x-driver.rst @@ -0,0 +1,81 @@ +==================== +Kernel driver lp855x +==================== + +Backlight driver for LP855x ICs + +Supported chips: + + Texas Instruments LP8550, LP8551, LP8552, LP8553, LP8555, LP8556 and + LP8557 + +Author: Milo(Woogyom) Kim + +Description +----------- + +* Brightness control + + Brightness can be controlled by the pwm input or the i2c command. + The lp855x driver supports both cases. + +* Device attributes + + 1) bl_ctl_mode + + Backlight control mode. + + Value: pwm based or register based + + 2) chip_id + + The lp855x chip id. + + Value: lp8550/lp8551/lp8552/lp8553/lp8555/lp8556/lp8557 + +Platform data for lp855x +------------------------ + +For supporting platform specific data, the lp855x platform data can be used. + +* name: + Backlight driver name. If it is not defined, default name is set. +* device_control: + Value of DEVICE CONTROL register. +* initial_brightness: + Initial value of backlight brightness. +* period_ns: + Platform specific PWM period value. unit is nano. + Only valid when brightness is pwm input mode. +* size_program: + Total size of lp855x_rom_data. +* rom_data: + List of new eeprom/eprom registers. + +Examples +======== + +1) lp8552 platform data: i2c register mode with new eeprom data:: + + #define EEPROM_A5_ADDR 0xA5 + #define EEPROM_A5_VAL 0x4f /* EN_VSYNC=0 */ + + static struct lp855x_rom_data lp8552_eeprom_arr[] = { + {EEPROM_A5_ADDR, EEPROM_A5_VAL}, + }; + + static struct lp855x_platform_data lp8552_pdata = { + .name = "lcd-bl", + .device_control = I2C_CONFIG(LP8552), + .initial_brightness = INITIAL_BRT, + .size_program = ARRAY_SIZE(lp8552_eeprom_arr), + .rom_data = lp8552_eeprom_arr, + }; + +2) lp8556 platform data: pwm input mode with default rom data:: + + static struct lp855x_platform_data lp8556_pdata = { + .device_control = PWM_CONFIG(LP8556), + .initial_brightness = INITIAL_BRT, + .period_ns = 1000000, + }; diff --git a/Documentation/driver-api/basics.rst b/Documentation/driver-api/basics.rst new file mode 100644 index 000000000..d78b7c328 --- /dev/null +++ b/Documentation/driver-api/basics.rst @@ -0,0 +1,130 @@ +Driver Basics +============= + +Driver Entry and Exit points +---------------------------- + +.. kernel-doc:: include/linux/module.h + :internal: + +Driver device table +------------------- + +.. kernel-doc:: include/linux/mod_devicetable.h + :internal: + :no-identifiers: pci_device_id + + +Delaying and scheduling routines +-------------------------------- + +.. kernel-doc:: include/linux/sched.h + :internal: + +.. kernel-doc:: kernel/sched/core.c + :export: + +.. kernel-doc:: kernel/sched/cpupri.c + :internal: + +.. kernel-doc:: kernel/sched/fair.c + :internal: + +.. kernel-doc:: include/linux/completion.h + :internal: + +Time and timer routines +----------------------- + +.. kernel-doc:: include/linux/jiffies.h + :internal: + +.. kernel-doc:: kernel/time/time.c + :export: + +.. kernel-doc:: kernel/time/timer.c + :export: + +High-resolution timers +---------------------- + +.. kernel-doc:: include/linux/ktime.h + :internal: + +.. kernel-doc:: include/linux/hrtimer.h + :internal: + +.. kernel-doc:: kernel/time/hrtimer.c + :export: + +Wait queues and Wake events +--------------------------- + +.. kernel-doc:: include/linux/wait.h + :internal: + +.. kernel-doc:: kernel/sched/wait.c + :export: + +Internal Functions +------------------ + +.. kernel-doc:: kernel/exit.c + :internal: + +.. kernel-doc:: kernel/signal.c + :internal: + +.. kernel-doc:: include/linux/kthread.h + :internal: + +.. kernel-doc:: kernel/kthread.c + :export: + +Reference counting +------------------ + +.. kernel-doc:: include/linux/refcount.h + :internal: + +.. kernel-doc:: lib/refcount.c + :export: + +Atomics +------- + +.. kernel-doc:: include/linux/atomic/atomic-instrumented.h + :internal: + +.. kernel-doc:: include/linux/atomic/atomic-arch-fallback.h + :internal: + +.. kernel-doc:: include/linux/atomic/atomic-long.h + :internal: + +Kernel objects manipulation +--------------------------- + +.. kernel-doc:: lib/kobject.c + :export: + +Kernel utility functions +------------------------ + +.. kernel-doc:: include/linux/kernel.h + :internal: + :no-identifiers: kstrtol kstrtoul + +.. kernel-doc:: kernel/printk/printk.c + :export: + :no-identifiers: printk + +.. kernel-doc:: kernel/panic.c + :export: + +Device Resource Management +-------------------------- + +.. kernel-doc:: drivers/base/devres.c + :export: + diff --git a/Documentation/driver-api/clk.rst b/Documentation/driver-api/clk.rst new file mode 100644 index 000000000..93bab5336 --- /dev/null +++ b/Documentation/driver-api/clk.rst @@ -0,0 +1,312 @@ +======================== +The Common Clk Framework +======================== + +:Author: Mike Turquette + +This document endeavours to explain the common clk framework details, +and how to port a platform over to this framework. It is not yet a +detailed explanation of the clock api in include/linux/clk.h, but +perhaps someday it will include that information. + +Introduction and interface split +================================ + +The common clk framework is an interface to control the clock nodes +available on various devices today. This may come in the form of clock +gating, rate adjustment, muxing or other operations. This framework is +enabled with the CONFIG_COMMON_CLK option. + +The interface itself is divided into two halves, each shielded from the +details of its counterpart. First is the common definition of struct +clk which unifies the framework-level accounting and infrastructure that +has traditionally been duplicated across a variety of platforms. Second +is a common implementation of the clk.h api, defined in +drivers/clk/clk.c. Finally there is struct clk_ops, whose operations +are invoked by the clk api implementation. + +The second half of the interface is comprised of the hardware-specific +callbacks registered with struct clk_ops and the corresponding +hardware-specific structures needed to model a particular clock. For +the remainder of this document any reference to a callback in struct +clk_ops, such as .enable or .set_rate, implies the hardware-specific +implementation of that code. Likewise, references to struct clk_foo +serve as a convenient shorthand for the implementation of the +hardware-specific bits for the hypothetical "foo" hardware. + +Tying the two halves of this interface together is struct clk_hw, which +is defined in struct clk_foo and pointed to within struct clk_core. This +allows for easy navigation between the two discrete halves of the common +clock interface. + +Common data structures and api +============================== + +Below is the common struct clk_core definition from +drivers/clk/clk.c, modified for brevity:: + + struct clk_core { + const char *name; + const struct clk_ops *ops; + struct clk_hw *hw; + struct module *owner; + struct clk_core *parent; + const char **parent_names; + struct clk_core **parents; + u8 num_parents; + u8 new_parent_index; + ... + }; + +The members above make up the core of the clk tree topology. The clk +api itself defines several driver-facing functions which operate on +struct clk. That api is documented in include/linux/clk.h. + +Platforms and devices utilizing the common struct clk_core use the struct +clk_ops pointer in struct clk_core to perform the hardware-specific parts of +the operations defined in clk-provider.h:: + + struct clk_ops { + int (*prepare)(struct clk_hw *hw); + void (*unprepare)(struct clk_hw *hw); + int (*is_prepared)(struct clk_hw *hw); + void (*unprepare_unused)(struct clk_hw *hw); + int (*enable)(struct clk_hw *hw); + void (*disable)(struct clk_hw *hw); + int (*is_enabled)(struct clk_hw *hw); + void (*disable_unused)(struct clk_hw *hw); + unsigned long (*recalc_rate)(struct clk_hw *hw, + unsigned long parent_rate); + long (*round_rate)(struct clk_hw *hw, + unsigned long rate, + unsigned long *parent_rate); + int (*determine_rate)(struct clk_hw *hw, + struct clk_rate_request *req); + int (*set_parent)(struct clk_hw *hw, u8 index); + u8 (*get_parent)(struct clk_hw *hw); + int (*set_rate)(struct clk_hw *hw, + unsigned long rate, + unsigned long parent_rate); + int (*set_rate_and_parent)(struct clk_hw *hw, + unsigned long rate, + unsigned long parent_rate, + u8 index); + unsigned long (*recalc_accuracy)(struct clk_hw *hw, + unsigned long parent_accuracy); + int (*get_phase)(struct clk_hw *hw); + int (*set_phase)(struct clk_hw *hw, int degrees); + void (*init)(struct clk_hw *hw); + void (*debug_init)(struct clk_hw *hw, + struct dentry *dentry); + }; + +Hardware clk implementations +============================ + +The strength of the common struct clk_core comes from its .ops and .hw pointers +which abstract the details of struct clk from the hardware-specific bits, and +vice versa. To illustrate consider the simple gateable clk implementation in +drivers/clk/clk-gate.c:: + + struct clk_gate { + struct clk_hw hw; + void __iomem *reg; + u8 bit_idx; + ... + }; + +struct clk_gate contains struct clk_hw hw as well as hardware-specific +knowledge about which register and bit controls this clk's gating. +Nothing about clock topology or accounting, such as enable_count or +notifier_count, is needed here. That is all handled by the common +framework code and struct clk_core. + +Let's walk through enabling this clk from driver code:: + + struct clk *clk; + clk = clk_get(NULL, "my_gateable_clk"); + + clk_prepare(clk); + clk_enable(clk); + +The call graph for clk_enable is very simple:: + + clk_enable(clk); + clk->ops->enable(clk->hw); + [resolves to...] + clk_gate_enable(hw); + [resolves struct clk gate with to_clk_gate(hw)] + clk_gate_set_bit(gate); + +And the definition of clk_gate_set_bit:: + + static void clk_gate_set_bit(struct clk_gate *gate) + { + u32 reg; + + reg = __raw_readl(gate->reg); + reg |= BIT(gate->bit_idx); + writel(reg, gate->reg); + } + +Note that to_clk_gate is defined as:: + + #define to_clk_gate(_hw) container_of(_hw, struct clk_gate, hw) + +This pattern of abstraction is used for every clock hardware +representation. + +Supporting your own clk hardware +================================ + +When implementing support for a new type of clock it is only necessary to +include the following header:: + + #include + +To construct a clk hardware structure for your platform you must define +the following:: + + struct clk_foo { + struct clk_hw hw; + ... hardware specific data goes here ... + }; + +To take advantage of your data you'll need to support valid operations +for your clk:: + + struct clk_ops clk_foo_ops = { + .enable = &clk_foo_enable, + .disable = &clk_foo_disable, + }; + +Implement the above functions using container_of:: + + #define to_clk_foo(_hw) container_of(_hw, struct clk_foo, hw) + + int clk_foo_enable(struct clk_hw *hw) + { + struct clk_foo *foo; + + foo = to_clk_foo(hw); + + ... perform magic on foo ... + + return 0; + }; + +Below is a matrix detailing which clk_ops are mandatory based upon the +hardware capabilities of that clock. A cell marked as "y" means +mandatory, a cell marked as "n" implies that either including that +callback is invalid or otherwise unnecessary. Empty cells are either +optional or must be evaluated on a case-by-case basis. + +.. table:: clock hardware characteristics + + +----------------+------+-------------+---------------+-------------+------+ + | | gate | change rate | single parent | multiplexer | root | + +================+======+=============+===============+=============+======+ + |.prepare | | | | | | + +----------------+------+-------------+---------------+-------------+------+ + |.unprepare | | | | | | + +----------------+------+-------------+---------------+-------------+------+ + +----------------+------+-------------+---------------+-------------+------+ + |.enable | y | | | | | + +----------------+------+-------------+---------------+-------------+------+ + |.disable | y | | | | | + +----------------+------+-------------+---------------+-------------+------+ + |.is_enabled | y | | | | | + +----------------+------+-------------+---------------+-------------+------+ + +----------------+------+-------------+---------------+-------------+------+ + |.recalc_rate | | y | | | | + +----------------+------+-------------+---------------+-------------+------+ + |.round_rate | | y [1]_ | | | | + +----------------+------+-------------+---------------+-------------+------+ + |.determine_rate | | y [1]_ | | | | + +----------------+------+-------------+---------------+-------------+------+ + |.set_rate | | y | | | | + +----------------+------+-------------+---------------+-------------+------+ + +----------------+------+-------------+---------------+-------------+------+ + |.set_parent | | | n | y | n | + +----------------+------+-------------+---------------+-------------+------+ + |.get_parent | | | n | y | n | + +----------------+------+-------------+---------------+-------------+------+ + +----------------+------+-------------+---------------+-------------+------+ + |.recalc_accuracy| | | | | | + +----------------+------+-------------+---------------+-------------+------+ + +----------------+------+-------------+---------------+-------------+------+ + |.init | | | | | | + +----------------+------+-------------+---------------+-------------+------+ + +.. [1] either one of round_rate or determine_rate is required. + +Finally, register your clock at run-time with a hardware-specific +registration function. This function simply populates struct clk_foo's +data and then passes the common struct clk parameters to the framework +with a call to:: + + clk_register(...) + +See the basic clock types in ``drivers/clk/clk-*.c`` for examples. + +Disabling clock gating of unused clocks +======================================= + +Sometimes during development it can be useful to be able to bypass the +default disabling of unused clocks. For example, if drivers aren't enabling +clocks properly but rely on them being on from the bootloader, bypassing +the disabling means that the driver will remain functional while the issues +are sorted out. + +You can see which clocks have been disabled by booting your kernel with these +parameters:: + + tp_printk trace_event=clk:clk_disable + +To bypass this disabling, include "clk_ignore_unused" in the bootargs to the +kernel. + +Locking +======= + +The common clock framework uses two global locks, the prepare lock and the +enable lock. + +The enable lock is a spinlock and is held across calls to the .enable, +.disable operations. Those operations are thus not allowed to sleep, +and calls to the clk_enable(), clk_disable() API functions are allowed in +atomic context. + +For clk_is_enabled() API, it is also designed to be allowed to be used in +atomic context. However, it doesn't really make any sense to hold the enable +lock in core, unless you want to do something else with the information of +the enable state with that lock held. Otherwise, seeing if a clk is enabled is +a one-shot read of the enabled state, which could just as easily change after +the function returns because the lock is released. Thus the user of this API +needs to handle synchronizing the read of the state with whatever they're +using it for to make sure that the enable state doesn't change during that +time. + +The prepare lock is a mutex and is held across calls to all other operations. +All those operations are allowed to sleep, and calls to the corresponding API +functions are not allowed in atomic context. + +This effectively divides operations in two groups from a locking perspective. + +Drivers don't need to manually protect resources shared between the operations +of one group, regardless of whether those resources are shared by multiple +clocks or not. However, access to resources that are shared between operations +of the two groups needs to be protected by the drivers. An example of such a +resource would be a register that controls both the clock rate and the clock +enable/disable state. + +The clock framework is reentrant, in that a driver is allowed to call clock +framework functions from within its implementation of clock operations. This +can for instance cause a .set_rate operation of one clock being called from +within the .set_rate operation of another clock. This case must be considered +in the driver implementations, but the code flow is usually controlled by the +driver in that case. + +Note that locking must also be considered when code outside of the common +clock framework needs to access resources used by the clock operations. This +is considered out of scope of this document. diff --git a/Documentation/driver-api/component.rst b/Documentation/driver-api/component.rst new file mode 100644 index 000000000..57e375907 --- /dev/null +++ b/Documentation/driver-api/component.rst @@ -0,0 +1,19 @@ +.. _component: + +====================================== +Component Helper for Aggregate Drivers +====================================== + +.. kernel-doc:: drivers/base/component.c + :doc: overview + + +API +=== + +.. kernel-doc:: include/linux/component.h + :internal: + +.. kernel-doc:: drivers/base/component.c + :export: + diff --git a/Documentation/driver-api/connector.rst b/Documentation/driver-api/connector.rst new file mode 100644 index 000000000..631b84a48 --- /dev/null +++ b/Documentation/driver-api/connector.rst @@ -0,0 +1,157 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================ +Kernel Connector +================ + +Kernel connector - new netlink based userspace <-> kernel space easy +to use communication module. + +The Connector driver makes it easy to connect various agents using a +netlink based network. One must register a callback and an identifier. +When the driver receives a special netlink message with the appropriate +identifier, the appropriate callback will be called. + +From the userspace point of view it's quite straightforward: + + - socket(); + - bind(); + - send(); + - recv(); + +But if kernelspace wants to use the full power of such connections, the +driver writer must create special sockets, must know about struct sk_buff +handling, etc... The Connector driver allows any kernelspace agents to use +netlink based networking for inter-process communication in a significantly +easier way:: + + int cn_add_callback(const struct cb_id *id, char *name, void (*callback) (struct cn_msg *, struct netlink_skb_parms *)); + void cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid, u32 __group, int gfp_mask); + void cn_netlink_send(struct cn_msg *msg, u32 portid, u32 __group, int gfp_mask); + + struct cb_id + { + __u32 idx; + __u32 val; + }; + +idx and val are unique identifiers which must be registered in the +connector.h header for in-kernel usage. `void (*callback) (void *)` is a +callback function which will be called when a message with above idx.val +is received by the connector core. The argument for that function must +be dereferenced to `struct cn_msg *`:: + + struct cn_msg + { + struct cb_id id; + + __u32 seq; + __u32 ack; + + __u16 len; /* Length of the following data */ + __u16 flags; + __u8 data[0]; + }; + +Connector interfaces +==================== + + .. kernel-doc:: include/linux/connector.h + + Note: + When registering new callback user, connector core assigns + netlink group to the user which is equal to its id.idx. + +Protocol description +==================== + +The current framework offers a transport layer with fixed headers. The +recommended protocol which uses such a header is as following: + +msg->seq and msg->ack are used to determine message genealogy. When +someone sends a message, they use a locally unique sequence and random +acknowledge number. The sequence number may be copied into +nlmsghdr->nlmsg_seq too. + +The sequence number is incremented with each message sent. + +If you expect a reply to the message, then the sequence number in the +received message MUST be the same as in the original message, and the +acknowledge number MUST be the same + 1. + +If we receive a message and its sequence number is not equal to one we +are expecting, then it is a new message. If we receive a message and +its sequence number is the same as one we are expecting, but its +acknowledge is not equal to the sequence number in the original +message + 1, then it is a new message. + +Obviously, the protocol header contains the above id. + +The connector allows event notification in the following form: kernel +driver or userspace process can ask connector to notify it when +selected ids will be turned on or off (registered or unregistered its +callback). It is done by sending a special command to the connector +driver (it also registers itself with id={-1, -1}). + +As example of this usage can be found in the cn_test.c module which +uses the connector to request notification and to send messages. + +Reliability +=========== + +Netlink itself is not a reliable protocol. That means that messages can +be lost due to memory pressure or process' receiving queue overflowed, +so caller is warned that it must be prepared. That is why the struct +cn_msg [main connector's message header] contains u32 seq and u32 ack +fields. + +Userspace usage +=============== + +2.6.14 has a new netlink socket implementation, which by default does not +allow people to send data to netlink groups other than 1. +So, if you wish to use a netlink socket (for example using connector) +with a different group number, the userspace application must subscribe to +that group first. It can be achieved by the following pseudocode:: + + s = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR); + + l_local.nl_family = AF_NETLINK; + l_local.nl_groups = 12345; + l_local.nl_pid = 0; + + if (bind(s, (struct sockaddr *)&l_local, sizeof(struct sockaddr_nl)) == -1) { + perror("bind"); + close(s); + return -1; + } + + { + int on = l_local.nl_groups; + setsockopt(s, 270, 1, &on, sizeof(on)); + } + +Where 270 above is SOL_NETLINK, and 1 is a NETLINK_ADD_MEMBERSHIP socket +option. To drop a multicast subscription, one should call the above socket +option with the NETLINK_DROP_MEMBERSHIP parameter which is defined as 0. + +2.6.14 netlink code only allows to select a group which is less or equal to +the maximum group number, which is used at netlink_kernel_create() time. +In case of connector it is CN_NETLINK_USERS + 0xf, so if you want to use +group number 12345, you must increment CN_NETLINK_USERS to that number. +Additional 0xf numbers are allocated to be used by non-in-kernel users. + +Due to this limitation, group 0xffffffff does not work now, so one can +not use add/remove connector's group notifications, but as far as I know, +only cn_test.c test module used it. + +Some work in netlink area is still being done, so things can be changed in +2.6.15 timeframe, if it will happen, documentation will be updated for that +kernel. + +Code samples +============ + +Sample code for a connector test module and user space can be found +in samples/connector/. To build this code, enable CONFIG_CONNECTOR +and CONFIG_SAMPLES. diff --git a/Documentation/driver-api/console.rst b/Documentation/driver-api/console.rst new file mode 100644 index 000000000..8394ad774 --- /dev/null +++ b/Documentation/driver-api/console.rst @@ -0,0 +1,152 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============== +Console Drivers +=============== + +The Linux kernel has 2 general types of console drivers. The first type is +assigned by the kernel to all the virtual consoles during the boot process. +This type will be called 'system driver', and only one system driver is allowed +to exist. The system driver is persistent and it can never be unloaded, though +it may become inactive. + +The second type has to be explicitly loaded and unloaded. This will be called +'modular driver' by this document. Multiple modular drivers can coexist at +any time with each driver sharing the console with other drivers including +the system driver. However, modular drivers cannot take over the console +that is currently occupied by another modular driver. (Exception: Drivers that +call do_take_over_console() will succeed in the takeover regardless of the type +of driver occupying the consoles.) They can only take over the console that is +occupied by the system driver. In the same token, if the modular driver is +released by the console, the system driver will take over. + +Modular drivers, from the programmer's point of view, have to call:: + + do_take_over_console() - load and bind driver to console layer + give_up_console() - unload driver; it will only work if driver + is fully unbound + +In newer kernels, the following are also available:: + + do_register_con_driver() + do_unregister_con_driver() + +If sysfs is enabled, the contents of /sys/class/vtconsole can be +examined. This shows the console backends currently registered by the +system which are named vtcon where is an integer from 0 to 15. +Thus:: + + ls /sys/class/vtconsole + . .. vtcon0 vtcon1 + +Each directory in /sys/class/vtconsole has 3 files:: + + ls /sys/class/vtconsole/vtcon0 + . .. bind name uevent + +What do these files signify? + + 1. bind - this is a read/write file. It shows the status of the driver if + read, or acts to bind or unbind the driver to the virtual consoles + when written to. The possible values are: + + 0 + - means the driver is not bound and if echo'ed, commands the driver + to unbind + + 1 + - means the driver is bound and if echo'ed, commands the driver to + bind + + 2. name - read-only file. Shows the name of the driver in this format:: + + cat /sys/class/vtconsole/vtcon0/name + (S) VGA+ + + '(S)' stands for a (S)ystem driver, i.e., it cannot be directly + commanded to bind or unbind + + 'VGA+' is the name of the driver + + cat /sys/class/vtconsole/vtcon1/name + (M) frame buffer device + + In this case, '(M)' stands for a (M)odular driver, one that can be + directly commanded to bind or unbind. + + 3. uevent - ignore this file + +When unbinding, the modular driver is detached first, and then the system +driver takes over the consoles vacated by the driver. Binding, on the other +hand, will bind the driver to the consoles that are currently occupied by a +system driver. + +NOTE1: + Binding and unbinding must be selected in Kconfig. It's under:: + + Device Drivers -> + Character devices -> + Support for binding and unbinding console drivers + +NOTE2: + If any of the virtual consoles are in KD_GRAPHICS mode, then binding or + unbinding will not succeed. An example of an application that sets the + console to KD_GRAPHICS is X. + +How useful is this feature? This is very useful for console driver +developers. By unbinding the driver from the console layer, one can unload the +driver, make changes, recompile, reload and rebind the driver without any need +for rebooting the kernel. For regular users who may want to switch from +framebuffer console to VGA console and vice versa, this feature also makes +this possible. (NOTE NOTE NOTE: Please read fbcon.txt under Documentation/fb +for more details.) + +Notes for developers +==================== + +do_take_over_console() is now broken up into:: + + do_register_con_driver() + do_bind_con_driver() - private function + +give_up_console() is a wrapper to do_unregister_con_driver(), and a driver must +be fully unbound for this call to succeed. con_is_bound() will check if the +driver is bound or not. + +Guidelines for console driver writers +===================================== + +In order for binding to and unbinding from the console to properly work, +console drivers must follow these guidelines: + +1. All drivers, except system drivers, must call either do_register_con_driver() + or do_take_over_console(). do_register_con_driver() will just add the driver + to the console's internal list. It won't take over the + console. do_take_over_console(), as it name implies, will also take over (or + bind to) the console. + +2. All resources allocated during con->con_init() must be released in + con->con_deinit(). + +3. All resources allocated in con->con_startup() must be released when the + driver, which was previously bound, becomes unbound. The console layer + does not have a complementary call to con->con_startup() so it's up to the + driver to check when it's legal to release these resources. Calling + con_is_bound() in con->con_deinit() will help. If the call returned + false(), then it's safe to release the resources. This balance has to be + ensured because con->con_startup() can be called again when a request to + rebind the driver to the console arrives. + +4. Upon exit of the driver, ensure that the driver is totally unbound. If the + condition is satisfied, then the driver must call do_unregister_con_driver() + or give_up_console(). + +5. do_unregister_con_driver() can also be called on conditions which make it + impossible for the driver to service console requests. This can happen + with the framebuffer console that suddenly lost all of its drivers. + +The current crop of console drivers should still work correctly, but binding +and unbinding them may cause problems. With minimal fixes, these drivers can +be made to work correctly. + +Antonino Daplas diff --git a/Documentation/driver-api/cxl/index.rst b/Documentation/driver-api/cxl/index.rst new file mode 100644 index 000000000..036e49553 --- /dev/null +++ b/Documentation/driver-api/cxl/index.rst @@ -0,0 +1,12 @@ +.. SPDX-License-Identifier: GPL-2.0 + +==================== +Compute Express Link +==================== + +.. toctree:: + :maxdepth: 1 + + memory-devices + +.. only:: subproject and html diff --git a/Documentation/driver-api/cxl/memory-devices.rst b/Documentation/driver-api/cxl/memory-devices.rst new file mode 100644 index 000000000..5149ecdc5 --- /dev/null +++ b/Documentation/driver-api/cxl/memory-devices.rst @@ -0,0 +1,383 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: + +=================================== +Compute Express Link Memory Devices +=================================== + +A Compute Express Link Memory Device is a CXL component that implements the +CXL.mem protocol. It contains some amount of volatile memory, persistent memory, +or both. It is enumerated as a PCI device for configuration and passing +messages over an MMIO mailbox. Its contribution to the System Physical +Address space is handled via HDM (Host Managed Device Memory) decoders +that optionally define a device's contribution to an interleaved address +range across multiple devices underneath a host-bridge or interleaved +across host-bridges. + +CXL Bus: Theory of Operation +============================ +Similar to how a RAID driver takes disk objects and assembles them into a new +logical device, the CXL subsystem is tasked to take PCIe and ACPI objects and +assemble them into a CXL.mem decode topology. The need for runtime configuration +of the CXL.mem topology is also similar to RAID in that different environments +with the same hardware configuration may decide to assemble the topology in +contrasting ways. One may choose performance (RAID0) striping memory across +multiple Host Bridges and endpoints while another may opt for fault tolerance +and disable any striping in the CXL.mem topology. + +Platform firmware enumerates a menu of interleave options at the "CXL root port" +(Linux term for the top of the CXL decode topology). From there, PCIe topology +dictates which endpoints can participate in which Host Bridge decode regimes. +Each PCIe Switch in the path between the root and an endpoint introduces a point +at which the interleave can be split. For example platform firmware may say at a +given range only decodes to 1 one Host Bridge, but that Host Bridge may in turn +interleave cycles across multiple Root Ports. An intervening Switch between a +port and an endpoint may interleave cycles across multiple Downstream Switch +Ports, etc. + +Here is a sample listing of a CXL topology defined by 'cxl_test'. The 'cxl_test' +module generates an emulated CXL topology of 2 Host Bridges each with 2 Root +Ports. Each of those Root Ports are connected to 2-way switches with endpoints +connected to those downstream ports for a total of 8 endpoints:: + + # cxl list -BEMPu -b cxl_test + { + "bus":"root3", + "provider":"cxl_test", + "ports:root3":[ + { + "port":"port5", + "host":"cxl_host_bridge.1", + "ports:port5":[ + { + "port":"port8", + "host":"cxl_switch_uport.1", + "endpoints:port8":[ + { + "endpoint":"endpoint9", + "host":"mem2", + "memdev":{ + "memdev":"mem2", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x1", + "numa_node":1, + "host":"cxl_mem.1" + } + }, + { + "endpoint":"endpoint15", + "host":"mem6", + "memdev":{ + "memdev":"mem6", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x5", + "numa_node":1, + "host":"cxl_mem.5" + } + } + ] + }, + { + "port":"port12", + "host":"cxl_switch_uport.3", + "endpoints:port12":[ + { + "endpoint":"endpoint17", + "host":"mem8", + "memdev":{ + "memdev":"mem8", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x7", + "numa_node":1, + "host":"cxl_mem.7" + } + }, + { + "endpoint":"endpoint13", + "host":"mem4", + "memdev":{ + "memdev":"mem4", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x3", + "numa_node":1, + "host":"cxl_mem.3" + } + } + ] + } + ] + }, + { + "port":"port4", + "host":"cxl_host_bridge.0", + "ports:port4":[ + { + "port":"port6", + "host":"cxl_switch_uport.0", + "endpoints:port6":[ + { + "endpoint":"endpoint7", + "host":"mem1", + "memdev":{ + "memdev":"mem1", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0", + "numa_node":0, + "host":"cxl_mem.0" + } + }, + { + "endpoint":"endpoint14", + "host":"mem5", + "memdev":{ + "memdev":"mem5", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x4", + "numa_node":0, + "host":"cxl_mem.4" + } + } + ] + }, + { + "port":"port10", + "host":"cxl_switch_uport.2", + "endpoints:port10":[ + { + "endpoint":"endpoint16", + "host":"mem7", + "memdev":{ + "memdev":"mem7", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x6", + "numa_node":0, + "host":"cxl_mem.6" + } + }, + { + "endpoint":"endpoint11", + "host":"mem3", + "memdev":{ + "memdev":"mem3", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x2", + "numa_node":0, + "host":"cxl_mem.2" + } + } + ] + } + ] + } + ] + } + +In that listing each "root", "port", and "endpoint" object correspond a kernel +'struct cxl_port' object. A 'cxl_port' is a device that can decode CXL.mem to +its descendants. So "root" claims non-PCIe enumerable platform decode ranges and +decodes them to "ports", "ports" decode to "endpoints", and "endpoints" +represent the decode from SPA (System Physical Address) to DPA (Device Physical +Address). + +Continuing the RAID analogy, disks have both topology metadata and on device +metadata that determine RAID set assembly. CXL Port topology and CXL Port link +status is metadata for CXL.mem set assembly. The CXL Port topology is enumerated +by the arrival of a CXL.mem device. I.e. unless and until the PCIe core attaches +the cxl_pci driver to a CXL Memory Expander there is no role for CXL Port +objects. Conversely for hot-unplug / removal scenarios, there is no need for +the Linux PCI core to tear down switch-level CXL resources because the endpoint +->remove() event cleans up the port data that was established to support that +Memory Expander. + +The port metadata and potential decode schemes that a give memory device may +participate can be determined via a command like:: + + # cxl list -BDMu -d root -m mem3 + { + "bus":"root3", + "provider":"cxl_test", + "decoders:root3":[ + { + "decoder":"decoder3.1", + "resource":"0x8030000000", + "size":"512.00 MiB (536.87 MB)", + "volatile_capable":true, + "nr_targets":2 + }, + { + "decoder":"decoder3.3", + "resource":"0x8060000000", + "size":"512.00 MiB (536.87 MB)", + "pmem_capable":true, + "nr_targets":2 + }, + { + "decoder":"decoder3.0", + "resource":"0x8020000000", + "size":"256.00 MiB (268.44 MB)", + "volatile_capable":true, + "nr_targets":1 + }, + { + "decoder":"decoder3.2", + "resource":"0x8050000000", + "size":"256.00 MiB (268.44 MB)", + "pmem_capable":true, + "nr_targets":1 + } + ], + "memdevs:root3":[ + { + "memdev":"mem3", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x2", + "numa_node":0, + "host":"cxl_mem.2" + } + ] + } + +...which queries the CXL topology to ask "given CXL Memory Expander with a kernel +device name of 'mem3' which platform level decode ranges may this device +participate". A given expander can participate in multiple CXL.mem interleave +sets simultaneously depending on how many decoder resource it has. In this +example mem3 can participate in one or more of a PMEM interleave that spans to +Host Bridges, a PMEM interleave that targets a single Host Bridge, a Volatile +memory interleave that spans 2 Host Bridges, and a Volatile memory interleave +that only targets a single Host Bridge. + +Conversely the memory devices that can participate in a given platform level +decode scheme can be determined via a command like the following:: + + # cxl list -MDu -d 3.2 + [ + { + "memdevs":[ + { + "memdev":"mem1", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0", + "numa_node":0, + "host":"cxl_mem.0" + }, + { + "memdev":"mem5", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x4", + "numa_node":0, + "host":"cxl_mem.4" + }, + { + "memdev":"mem7", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x6", + "numa_node":0, + "host":"cxl_mem.6" + }, + { + "memdev":"mem3", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x2", + "numa_node":0, + "host":"cxl_mem.2" + } + ] + }, + { + "root decoders":[ + { + "decoder":"decoder3.2", + "resource":"0x8050000000", + "size":"256.00 MiB (268.44 MB)", + "pmem_capable":true, + "nr_targets":1 + } + ] + } + ] + +...where the naming scheme for decoders is "decoder.". + +Driver Infrastructure +===================== + +This section covers the driver infrastructure for a CXL memory device. + +CXL Memory Device +----------------- + +.. kernel-doc:: drivers/cxl/pci.c + :doc: cxl pci + +.. kernel-doc:: drivers/cxl/pci.c + :internal: + +.. kernel-doc:: drivers/cxl/mem.c + :doc: cxl mem + +CXL Port +-------- +.. kernel-doc:: drivers/cxl/port.c + :doc: cxl port + +CXL Core +-------- +.. kernel-doc:: drivers/cxl/cxl.h + :doc: cxl objects + +.. kernel-doc:: drivers/cxl/cxl.h + :internal: + +.. kernel-doc:: drivers/cxl/core/port.c + :doc: cxl core + +.. kernel-doc:: drivers/cxl/core/port.c + :identifiers: + +.. kernel-doc:: drivers/cxl/core/pci.c + :doc: cxl core pci + +.. kernel-doc:: drivers/cxl/core/pci.c + :identifiers: + +.. kernel-doc:: drivers/cxl/core/pmem.c + :doc: cxl pmem + +.. kernel-doc:: drivers/cxl/core/regs.c + :doc: cxl registers + +.. kernel-doc:: drivers/cxl/core/mbox.c + :doc: cxl mbox + +CXL Regions +----------- +.. kernel-doc:: drivers/cxl/core/region.c + :doc: cxl core region + +.. kernel-doc:: drivers/cxl/core/region.c + :identifiers: + +External Interfaces +=================== + +CXL IOCTL Interface +------------------- + +.. kernel-doc:: include/uapi/linux/cxl_mem.h + :doc: UAPI + +.. kernel-doc:: include/uapi/linux/cxl_mem.h + :internal: diff --git a/Documentation/driver-api/dcdbas.rst b/Documentation/driver-api/dcdbas.rst new file mode 100644 index 000000000..309cc57a7 --- /dev/null +++ b/Documentation/driver-api/dcdbas.rst @@ -0,0 +1,99 @@ +=================================== +Dell Systems Management Base Driver +=================================== + +Overview +======== + +The Dell Systems Management Base Driver provides a sysfs interface for +systems management software such as Dell OpenManage to perform system +management interrupts and host control actions (system power cycle or +power off after OS shutdown) on certain Dell systems. + +Dell OpenManage requires this driver on the following Dell PowerEdge systems: +300, 1300, 1400, 400SC, 500SC, 1500SC, 1550, 600SC, 1600SC, 650, 1655MC, +700, and 750. Other Dell software such as the open source libsmbios project +is expected to make use of this driver, and it may include the use of this +driver on other Dell systems. + +The Dell libsmbios project aims towards providing access to as much BIOS +information as possible. See http://linux.dell.com/libsmbios/main/ for +more information about the libsmbios project. + + +System Management Interrupt +=========================== + +On some Dell systems, systems management software must access certain +management information via a system management interrupt (SMI). The SMI data +buffer must reside in 32-bit address space, and the physical address of the +buffer is required for the SMI. The driver maintains the memory required for +the SMI and provides a way for the application to generate the SMI. +The driver creates the following sysfs entries for systems management +software to perform these system management interrupts:: + + /sys/devices/platform/dcdbas/smi_data + /sys/devices/platform/dcdbas/smi_data_buf_phys_addr + /sys/devices/platform/dcdbas/smi_data_buf_size + /sys/devices/platform/dcdbas/smi_request + +Systems management software must perform the following steps to execute +a SMI using this driver: + +1) Lock smi_data. +2) Write system management command to smi_data. +3) Write "1" to smi_request to generate a calling interface SMI or + "2" to generate a raw SMI. +4) Read system management command response from smi_data. +5) Unlock smi_data. + + +Host Control Action +=================== + +Dell OpenManage supports a host control feature that allows the administrator +to perform a power cycle or power off of the system after the OS has finished +shutting down. On some Dell systems, this host control feature requires that +a driver perform a SMI after the OS has finished shutting down. + +The driver creates the following sysfs entries for systems management software +to schedule the driver to perform a power cycle or power off host control +action after the system has finished shutting down: + +/sys/devices/platform/dcdbas/host_control_action +/sys/devices/platform/dcdbas/host_control_smi_type +/sys/devices/platform/dcdbas/host_control_on_shutdown + +Dell OpenManage performs the following steps to execute a power cycle or +power off host control action using this driver: + +1) Write host control action to be performed to host_control_action. +2) Write type of SMI that driver needs to perform to host_control_smi_type. +3) Write "1" to host_control_on_shutdown to enable host control action. +4) Initiate OS shutdown. + (Driver will perform host control SMI when it is notified that the OS + has finished shutting down.) + + +Host Control SMI Type +===================== + +The following table shows the value to write to host_control_smi_type to +perform a power cycle or power off host control action: + +=================== ===================== +PowerEdge System Host Control SMI Type +=================== ===================== + 300 HC_SMITYPE_TYPE1 + 1300 HC_SMITYPE_TYPE1 + 1400 HC_SMITYPE_TYPE2 + 500SC HC_SMITYPE_TYPE2 + 1500SC HC_SMITYPE_TYPE2 + 1550 HC_SMITYPE_TYPE2 + 600SC HC_SMITYPE_TYPE2 + 1600SC HC_SMITYPE_TYPE2 + 650 HC_SMITYPE_TYPE2 + 1655MC HC_SMITYPE_TYPE2 + 700 HC_SMITYPE_TYPE3 + 750 HC_SMITYPE_TYPE3 +=================== ===================== diff --git a/Documentation/driver-api/devfreq.rst b/Documentation/driver-api/devfreq.rst new file mode 100644 index 000000000..4a0bf87a3 --- /dev/null +++ b/Documentation/driver-api/devfreq.rst @@ -0,0 +1,30 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================== +Device Frequency Scaling +======================== + +Introduction +------------ + +This framework provides a standard kernel interface for Dynamic Voltage and +Frequency Switching on arbitrary devices. + +It exposes controls for adjusting frequency through sysfs files which are +similar to the cpufreq subsystem. + +Devices for which current usage can be measured can have their frequency +automatically adjusted by governors. + +API +--- + +Device drivers need to initialize a :c:type:`devfreq_profile` and call the +:c:func:`devfreq_add_device` function to create a :c:type:`devfreq` instance. + +.. kernel-doc:: include/linux/devfreq.h +.. kernel-doc:: include/linux/devfreq-event.h +.. kernel-doc:: drivers/devfreq/devfreq.c + :export: +.. kernel-doc:: drivers/devfreq/devfreq-event.c + :export: diff --git a/Documentation/driver-api/device-io.rst b/Documentation/driver-api/device-io.rst new file mode 100644 index 000000000..2c7abd234 --- /dev/null +++ b/Documentation/driver-api/device-io.rst @@ -0,0 +1,521 @@ +.. Copyright 2001 Matthew Wilcox +.. +.. This documentation is free software; you can redistribute +.. it and/or modify it under the terms of the GNU General Public +.. License as published by the Free Software Foundation; either +.. version 2 of the License, or (at your option) any later +.. version. + +=============================== +Bus-Independent Device Accesses +=============================== + +:Author: Matthew Wilcox +:Author: Alan Cox + +Introduction +============ + +Linux provides an API which abstracts performing IO across all busses +and devices, allowing device drivers to be written independently of bus +type. + +Memory Mapped IO +================ + +Getting Access to the Device +---------------------------- + +The most widely supported form of IO is memory mapped IO. That is, a +part of the CPU's address space is interpreted not as accesses to +memory, but as accesses to a device. Some architectures define devices +to be at a fixed address, but most have some method of discovering +devices. The PCI bus walk is a good example of such a scheme. This +document does not cover how to receive such an address, but assumes you +are starting with one. Physical addresses are of type unsigned long. + +This address should not be used directly. Instead, to get an address +suitable for passing to the accessor functions described below, you +should call ioremap(). An address suitable for accessing +the device will be returned to you. + +After you've finished using the device (say, in your module's exit +routine), call iounmap() in order to return the address +space to the kernel. Most architectures allocate new address space each +time you call ioremap(), and they can run out unless you +call iounmap(). + +Accessing the device +-------------------- + +The part of the interface most used by drivers is reading and writing +memory-mapped registers on the device. Linux provides interfaces to read +and write 8-bit, 16-bit, 32-bit and 64-bit quantities. Due to a +historical accident, these are named byte, word, long and quad accesses. +Both read and write accesses are supported; there is no prefetch support +at this time. + +The functions are named readb(), readw(), readl(), readq(), +readb_relaxed(), readw_relaxed(), readl_relaxed(), readq_relaxed(), +writeb(), writew(), writel() and writeq(). + +Some devices (such as framebuffers) would like to use larger transfers than +8 bytes at a time. For these devices, the memcpy_toio(), +memcpy_fromio() and memset_io() functions are +provided. Do not use memset or memcpy on IO addresses; they are not +guaranteed to copy data in order. + +The read and write functions are defined to be ordered. That is the +compiler is not permitted to reorder the I/O sequence. When the ordering +can be compiler optimised, you can use __readb() and friends to +indicate the relaxed ordering. Use this with care. + +While the basic functions are defined to be synchronous with respect to +each other and ordered with respect to each other the busses the devices +sit on may themselves have asynchronicity. In particular many authors +are burned by the fact that PCI bus writes are posted asynchronously. A +driver author must issue a read from the same device to ensure that +writes have occurred in the specific cases the author cares. This kind +of property cannot be hidden from driver writers in the API. In some +cases, the read used to flush the device may be expected to fail (if the +card is resetting, for example). In that case, the read should be done +from config space, which is guaranteed to soft-fail if the card doesn't +respond. + +The following is an example of flushing a write to a device when the +driver would like to ensure the write's effects are visible prior to +continuing execution:: + + static inline void + qla1280_disable_intrs(struct scsi_qla_host *ha) + { + struct device_reg *reg; + + reg = ha->iobase; + /* disable risc and host interrupts */ + WRT_REG_WORD(®->ictrl, 0); + /* + * The following read will ensure that the above write + * has been received by the device before we return from this + * function. + */ + RD_REG_WORD(®->ictrl); + ha->flags.ints_enabled = 0; + } + +PCI ordering rules also guarantee that PIO read responses arrive after any +outstanding DMA writes from that bus, since for some devices the result of +a readb() call may signal to the driver that a DMA transaction is +complete. In many cases, however, the driver may want to indicate that the +next readb() call has no relation to any previous DMA writes +performed by the device. The driver can use readb_relaxed() for +these cases, although only some platforms will honor the relaxed +semantics. Using the relaxed read functions will provide significant +performance benefits on platforms that support it. The qla2xxx driver +provides examples of how to use readX_relaxed(). In many cases, a majority +of the driver's readX() calls can safely be converted to readX_relaxed() +calls, since only a few will indicate or depend on DMA completion. + +Port Space Accesses +=================== + +Port Space Explained +-------------------- + +Another form of IO commonly supported is Port Space. This is a range of +addresses separate to the normal memory address space. Access to these +addresses is generally not as fast as accesses to the memory mapped +addresses, and it also has a potentially smaller address space. + +Unlike memory mapped IO, no preparation is required to access port +space. + +Accessing Port Space +-------------------- + +Accesses to this space are provided through a set of functions which +allow 8-bit, 16-bit and 32-bit accesses; also known as byte, word and +long. These functions are inb(), inw(), +inl(), outb(), outw() and +outl(). + +Some variants are provided for these functions. Some devices require +that accesses to their ports are slowed down. This functionality is +provided by appending a ``_p`` to the end of the function. +There are also equivalents to memcpy. The ins() and +outs() functions copy bytes, words or longs to the given +port. + +__iomem pointer tokens +====================== + +The data type for an MMIO address is an ``__iomem`` qualified pointer, such as +``void __iomem *reg``. On most architectures it is a regular pointer that +points to a virtual memory address and can be offset or dereferenced, but in +portable code, it must only be passed from and to functions that explicitly +operated on an ``__iomem`` token, in particular the ioremap() and +readl()/writel() functions. The 'sparse' semantic code checker can be used to +verify that this is done correctly. + +While on most architectures, ioremap() creates a page table entry for an +uncached virtual address pointing to the physical MMIO address, some +architectures require special instructions for MMIO, and the ``__iomem`` pointer +just encodes the physical address or an offsettable cookie that is interpreted +by readl()/writel(). + +Differences between I/O access functions +======================================== + +readq(), readl(), readw(), readb(), writeq(), writel(), writew(), writeb() + + These are the most generic accessors, providing serialization against other + MMIO accesses and DMA accesses as well as fixed endianness for accessing + little-endian PCI devices and on-chip peripherals. Portable device drivers + should generally use these for any access to ``__iomem`` pointers. + + Note that posted writes are not strictly ordered against a spinlock, see + Documentation/driver-api/io_ordering.rst. + +readq_relaxed(), readl_relaxed(), readw_relaxed(), readb_relaxed(), +writeq_relaxed(), writel_relaxed(), writew_relaxed(), writeb_relaxed() + + On architectures that require an expensive barrier for serializing against + DMA, these "relaxed" versions of the MMIO accessors only serialize against + each other, but contain a less expensive barrier operation. A device driver + might use these in a particularly performance sensitive fast path, with a + comment that explains why the usage in a specific location is safe without + the extra barriers. + + See memory-barriers.txt for a more detailed discussion on the precise ordering + guarantees of the non-relaxed and relaxed versions. + +ioread64(), ioread32(), ioread16(), ioread8(), +iowrite64(), iowrite32(), iowrite16(), iowrite8() + + These are an alternative to the normal readl()/writel() functions, with almost + identical behavior, but they can also operate on ``__iomem`` tokens returned + for mapping PCI I/O space with pci_iomap() or ioport_map(). On architectures + that require special instructions for I/O port access, this adds a small + overhead for an indirect function call implemented in lib/iomap.c, while on + other architectures, these are simply aliases. + +ioread64be(), ioread32be(), ioread16be() +iowrite64be(), iowrite32be(), iowrite16be() + + These behave in the same way as the ioread32()/iowrite32() family, but with + reversed byte order, for accessing devices with big-endian MMIO registers. + Device drivers that can operate on either big-endian or little-endian + registers may have to implement a custom wrapper function that picks one or + the other depending on which device was found. + + Note: On some architectures, the normal readl()/writel() functions + traditionally assume that devices are the same endianness as the CPU, while + using a hardware byte-reverse on the PCI bus when running a big-endian kernel. + Drivers that use readl()/writel() this way are generally not portable, but + tend to be limited to a particular SoC. + +hi_lo_readq(), lo_hi_readq(), hi_lo_readq_relaxed(), lo_hi_readq_relaxed(), +ioread64_lo_hi(), ioread64_hi_lo(), ioread64be_lo_hi(), ioread64be_hi_lo(), +hi_lo_writeq(), lo_hi_writeq(), hi_lo_writeq_relaxed(), lo_hi_writeq_relaxed(), +iowrite64_lo_hi(), iowrite64_hi_lo(), iowrite64be_lo_hi(), iowrite64be_hi_lo() + + Some device drivers have 64-bit registers that cannot be accessed atomically + on 32-bit architectures but allow two consecutive 32-bit accesses instead. + Since it depends on the particular device which of the two halves has to be + accessed first, a helper is provided for each combination of 64-bit accessors + with either low/high or high/low word ordering. A device driver must include + either or to + get the function definitions along with helpers that redirect the normal + readq()/writeq() to them on architectures that do not provide 64-bit access + natively. + +__raw_readq(), __raw_readl(), __raw_readw(), __raw_readb(), +__raw_writeq(), __raw_writel(), __raw_writew(), __raw_writeb() + + These are low-level MMIO accessors without barriers or byteorder changes and + architecture specific behavior. Accesses are usually atomic in the sense that + a four-byte __raw_readl() does not get split into individual byte loads, but + multiple consecutive accesses can be combined on the bus. In portable code, it + is only safe to use these to access memory behind a device bus but not MMIO + registers, as there are no ordering guarantees with regard to other MMIO + accesses or even spinlocks. The byte order is generally the same as for normal + memory, so unlike the other functions, these can be used to copy data between + kernel memory and device memory. + +inl(), inw(), inb(), outl(), outw(), outb() + + PCI I/O port resources traditionally require separate helpers as they are + implemented using special instructions on the x86 architecture. On most other + architectures, these are mapped to readl()/writel() style accessors + internally, usually pointing to a fixed area in virtual memory. Instead of an + ``__iomem`` pointer, the address is a 32-bit integer token to identify a port + number. PCI requires I/O port access to be non-posted, meaning that an outb() + must complete before the following code executes, while a normal writeb() may + still be in progress. On architectures that correctly implement this, I/O port + access is therefore ordered against spinlocks. Many non-x86 PCI host bridge + implementations and CPU architectures however fail to implement non-posted I/O + space on PCI, so they can end up being posted on such hardware. + + In some architectures, the I/O port number space has a 1:1 mapping to + ``__iomem`` pointers, but this is not recommended and device drivers should + not rely on that for portability. Similarly, an I/O port number as described + in a PCI base address register may not correspond to the port number as seen + by a device driver. Portable drivers need to read the port number for the + resource provided by the kernel. + + There are no direct 64-bit I/O port accessors, but pci_iomap() in combination + with ioread64/iowrite64 can be used instead. + +inl_p(), inw_p(), inb_p(), outl_p(), outw_p(), outb_p() + + On ISA devices that require specific timing, the _p versions of the I/O + accessors add a small delay. On architectures that do not have ISA buses, + these are aliases to the normal inb/outb helpers. + +readsq, readsl, readsw, readsb +writesq, writesl, writesw, writesb +ioread64_rep, ioread32_rep, ioread16_rep, ioread8_rep +iowrite64_rep, iowrite32_rep, iowrite16_rep, iowrite8_rep +insl, insw, insb, outsl, outsw, outsb + + These are helpers that access the same address multiple times, usually to copy + data between kernel memory byte stream and a FIFO buffer. Unlike the normal + MMIO accessors, these do not perform a byteswap on big-endian kernels, so the + first byte in the FIFO register corresponds to the first byte in the memory + buffer regardless of the architecture. + +Device memory mapping modes +=========================== + +Some architectures support multiple modes for mapping device memory. +ioremap_*() variants provide a common abstraction around these +architecture-specific modes, with a shared set of semantics. + +ioremap() is the most common mapping type, and is applicable to typical device +memory (e.g. I/O registers). Other modes can offer weaker or stronger +guarantees, if supported by the architecture. From most to least common, they +are as follows: + +ioremap() +--------- + +The default mode, suitable for most memory-mapped devices, e.g. control +registers. Memory mapped using ioremap() has the following characteristics: + +* Uncached - CPU-side caches are bypassed, and all reads and writes are handled + directly by the device +* No speculative operations - the CPU may not issue a read or write to this + memory, unless the instruction that does so has been reached in committed + program flow. +* No reordering - The CPU may not reorder accesses to this memory mapping with + respect to each other. On some architectures, this relies on barriers in + readl_relaxed()/writel_relaxed(). +* No repetition - The CPU may not issue multiple reads or writes for a single + program instruction. +* No write-combining - Each I/O operation results in one discrete read or write + being issued to the device, and multiple writes are not combined into larger + writes. This may or may not be enforced when using __raw I/O accessors or + pointer dereferences. +* Non-executable - The CPU is not allowed to speculate instruction execution + from this memory (it probably goes without saying, but you're also not + allowed to jump into device memory). + +On many platforms and buses (e.g. PCI), writes issued through ioremap() +mappings are posted, which means that the CPU does not wait for the write to +actually reach the target device before retiring the write instruction. + +On many platforms, I/O accesses must be aligned with respect to the access +size; failure to do so will result in an exception or unpredictable results. + +ioremap_wc() +------------ + +Maps I/O memory as normal memory with write combining. Unlike ioremap(), + +* The CPU may speculatively issue reads from the device that the program + didn't actually execute, and may choose to basically read whatever it wants. +* The CPU may reorder operations as long as the result is consistent from the + program's point of view. +* The CPU may write to the same location multiple times, even when the program + issued a single write. +* The CPU may combine several writes into a single larger write. + +This mode is typically used for video framebuffers, where it can increase +performance of writes. It can also be used for other blocks of memory in +devices (e.g. buffers or shared memory), but care must be taken as accesses are +not guaranteed to be ordered with respect to normal ioremap() MMIO register +accesses without explicit barriers. + +On a PCI bus, it is usually safe to use ioremap_wc() on MMIO areas marked as +``IORESOURCE_PREFETCH``, but it may not be used on those without the flag. +For on-chip devices, there is no corresponding flag, but a driver can use +ioremap_wc() on a device that is known to be safe. + +ioremap_wt() +------------ + +Maps I/O memory as normal memory with write-through caching. Like ioremap_wc(), +but also, + +* The CPU may cache writes issued to and reads from the device, and serve reads + from that cache. + +This mode is sometimes used for video framebuffers, where drivers still expect +writes to reach the device in a timely manner (and not be stuck in the CPU +cache), but reads may be served from the cache for efficiency. However, it is +rarely useful these days, as framebuffer drivers usually perform writes only, +for which ioremap_wc() is more efficient (as it doesn't needlessly trash the +cache). Most drivers should not use this. + +ioremap_np() +------------ + +Like ioremap(), but explicitly requests non-posted write semantics. On some +architectures and buses, ioremap() mappings have posted write semantics, which +means that writes can appear to "complete" from the point of view of the +CPU before the written data actually arrives at the target device. Writes are +still ordered with respect to other writes and reads from the same device, but +due to the posted write semantics, this is not the case with respect to other +devices. ioremap_np() explicitly requests non-posted semantics, which means +that the write instruction will not appear to complete until the device has +received (and to some platform-specific extent acknowledged) the written data. + +This mapping mode primarily exists to cater for platforms with bus fabrics that +require this particular mapping mode to work correctly. These platforms set the +``IORESOURCE_MEM_NONPOSTED`` flag for a resource that requires ioremap_np() +semantics and portable drivers should use an abstraction that automatically +selects it where appropriate (see the `Higher-level ioremap abstractions`_ +section below). + +The bare ioremap_np() is only available on some architectures; on others, it +always returns NULL. Drivers should not normally use it, unless they are +platform-specific or they derive benefit from non-posted writes where +supported, and can fall back to ioremap() otherwise. The normal approach to +ensure posted write completion is to do a dummy read after a write as +explained in `Accessing the device`_, which works with ioremap() on all +platforms. + +ioremap_np() should never be used for PCI drivers. PCI memory space writes are +always posted, even on architectures that otherwise implement ioremap_np(). +Using ioremap_np() for PCI BARs will at best result in posted write semantics, +and at worst result in complete breakage. + +Note that non-posted write semantics are orthogonal to CPU-side ordering +guarantees. A CPU may still choose to issue other reads or writes before a +non-posted write instruction retires. See the previous section on MMIO access +functions for details on the CPU side of things. + +ioremap_uc() +------------ + +ioremap_uc() behaves like ioremap() except that on the x86 architecture without +'PAT' mode, it marks memory as uncached even when the MTRR has designated +it as cacheable, see Documentation/arch/x86/pat.rst. + +Portable drivers should avoid the use of ioremap_uc(). + +ioremap_cache() +--------------- + +ioremap_cache() effectively maps I/O memory as normal RAM. CPU write-back +caches can be used, and the CPU is free to treat the device as if it were a +block of RAM. This should never be used for device memory which has side +effects of any kind, or which does not return the data previously written on +read. + +It should also not be used for actual RAM, as the returned pointer is an +``__iomem`` token. memremap() can be used for mapping normal RAM that is outside +of the linear kernel memory area to a regular pointer. + +Portable drivers should avoid the use of ioremap_cache(). + +Architecture example +-------------------- + +Here is how the above modes map to memory attribute settings on the ARM64 +architecture: + ++------------------------+--------------------------------------------+ +| API | Memory region type and cacheability | ++------------------------+--------------------------------------------+ +| ioremap_np() | Device-nGnRnE | ++------------------------+--------------------------------------------+ +| ioremap() | Device-nGnRE | ++------------------------+--------------------------------------------+ +| ioremap_uc() | (not implemented) | ++------------------------+--------------------------------------------+ +| ioremap_wc() | Normal-Non Cacheable | ++------------------------+--------------------------------------------+ +| ioremap_wt() | (not implemented; fallback to ioremap) | ++------------------------+--------------------------------------------+ +| ioremap_cache() | Normal-Write-Back Cacheable | ++------------------------+--------------------------------------------+ + +Higher-level ioremap abstractions +================================= + +Instead of using the above raw ioremap() modes, drivers are encouraged to use +higher-level APIs. These APIs may implement platform-specific logic to +automatically choose an appropriate ioremap mode on any given bus, allowing for +a platform-agnostic driver to work on those platforms without any special +cases. At the time of this writing, the following ioremap() wrappers have such +logic: + +devm_ioremap_resource() + + Can automatically select ioremap_np() over ioremap() according to platform + requirements, if the ``IORESOURCE_MEM_NONPOSTED`` flag is set on the struct + resource. Uses devres to automatically unmap the resource when the driver + probe() function fails or a device in unbound from its driver. + + Documented in Documentation/driver-api/driver-model/devres.rst. + +of_address_to_resource() + + Automatically sets the ``IORESOURCE_MEM_NONPOSTED`` flag for platforms that + require non-posted writes for certain buses (see the nonposted-mmio and + posted-mmio device tree properties). + +of_iomap() + + Maps the resource described in a ``reg`` property in the device tree, doing + all required translations. Automatically selects ioremap_np() according to + platform requirements, as above. + +pci_ioremap_bar(), pci_ioremap_wc_bar() + + Maps the resource described in a PCI base address without having to extract + the physical address first. + +pci_iomap(), pci_iomap_wc() + + Like pci_ioremap_bar()/pci_ioremap_bar(), but also works on I/O space when + used together with ioread32()/iowrite32() and similar accessors + +pcim_iomap() + + Like pci_iomap(), but uses devres to automatically unmap the resource when + the driver probe() function fails or a device in unbound from its driver + + Documented in Documentation/driver-api/driver-model/devres.rst. + +Not using these wrappers may make drivers unusable on certain platforms with +stricter rules for mapping I/O memory. + +Generalizing Access to System and I/O Memory +============================================ + +.. kernel-doc:: include/linux/iosys-map.h + :doc: overview + +.. kernel-doc:: include/linux/iosys-map.h + :internal: + +Public Functions Provided +========================= + +.. kernel-doc:: arch/x86/include/asm/io.h + :internal: + +.. kernel-doc:: lib/pci_iomap.c + :export: diff --git a/Documentation/driver-api/device_link.rst b/Documentation/driver-api/device_link.rst new file mode 100644 index 000000000..ee913ae16 --- /dev/null +++ b/Documentation/driver-api/device_link.rst @@ -0,0 +1,320 @@ +.. _device_link: + +============ +Device links +============ + +By default, the driver core only enforces dependencies between devices +that are borne out of a parent/child relationship within the device +hierarchy: When suspending, resuming or shutting down the system, devices +are ordered based on this relationship, i.e. children are always suspended +before their parent, and the parent is always resumed before its children. + +Sometimes there is a need to represent device dependencies beyond the +mere parent/child relationship, e.g. between siblings, and have the +driver core automatically take care of them. + +Secondly, the driver core by default does not enforce any driver presence +dependencies, i.e. that one device must be bound to a driver before +another one can probe or function correctly. + +Often these two dependency types come together, so a device depends on +another one both with regards to driver presence *and* with regards to +suspend/resume and shutdown ordering. + +Device links allow representation of such dependencies in the driver core. + +In its standard or *managed* form, a device link combines *both* dependency +types: It guarantees correct suspend/resume and shutdown ordering between a +"supplier" device and its "consumer" devices, and it guarantees driver +presence on the supplier. The consumer devices are not probed before the +supplier is bound to a driver, and they're unbound before the supplier +is unbound. + +When driver presence on the supplier is irrelevant and only correct +suspend/resume and shutdown ordering is needed, the device link may +simply be set up with the ``DL_FLAG_STATELESS`` flag. In other words, +enforcing driver presence on the supplier is optional. + +Another optional feature is runtime PM integration: By setting the +``DL_FLAG_PM_RUNTIME`` flag on addition of the device link, the PM core +is instructed to runtime resume the supplier and keep it active +whenever and for as long as the consumer is runtime resumed. + +Usage +===== + +The earliest point in time when device links can be added is after +:c:func:`device_add()` has been called for the supplier and +:c:func:`device_initialize()` has been called for the consumer. + +It is legal to add them later, but care must be taken that the system +remains in a consistent state: E.g. a device link cannot be added in +the midst of a suspend/resume transition, so either commencement of +such a transition needs to be prevented with :c:func:`lock_system_sleep()`, +or the device link needs to be added from a function which is guaranteed +not to run in parallel to a suspend/resume transition, such as from a +device ``->probe`` callback or a boot-time PCI quirk. + +Another example for an inconsistent state would be a device link that +represents a driver presence dependency, yet is added from the consumer's +``->probe`` callback while the supplier hasn't started to probe yet: Had the +driver core known about the device link earlier, it wouldn't have probed the +consumer in the first place. The onus is thus on the consumer to check +presence of the supplier after adding the link, and defer probing on +non-presence. [Note that it is valid to create a link from the consumer's +``->probe`` callback while the supplier is still probing, but the consumer must +know that the supplier is functional already at the link creation time (that is +the case, for instance, if the consumer has just acquired some resources that +would not have been available had the supplier not been functional then).] + +If a device link with ``DL_FLAG_STATELESS`` set (i.e. a stateless device link) +is added in the ``->probe`` callback of the supplier or consumer driver, it is +typically deleted in its ``->remove`` callback for symmetry. That way, if the +driver is compiled as a module, the device link is added on module load and +orderly deleted on unload. The same restrictions that apply to device link +addition (e.g. exclusion of a parallel suspend/resume transition) apply equally +to deletion. Device links managed by the driver core are deleted automatically +by it. + +Several flags may be specified on device link addition, two of which +have already been mentioned above: ``DL_FLAG_STATELESS`` to express that no +driver presence dependency is needed (but only correct suspend/resume and +shutdown ordering) and ``DL_FLAG_PM_RUNTIME`` to express that runtime PM +integration is desired. + +Two other flags are specifically targeted at use cases where the device +link is added from the consumer's ``->probe`` callback: ``DL_FLAG_RPM_ACTIVE`` +can be specified to runtime resume the supplier and prevent it from suspending +before the consumer is runtime suspended. ``DL_FLAG_AUTOREMOVE_CONSUMER`` +causes the device link to be automatically purged when the consumer fails to +probe or later unbinds. + +Similarly, when the device link is added from supplier's ``->probe`` callback, +``DL_FLAG_AUTOREMOVE_SUPPLIER`` causes the device link to be automatically +purged when the supplier fails to probe or later unbinds. + +If neither ``DL_FLAG_AUTOREMOVE_CONSUMER`` nor ``DL_FLAG_AUTOREMOVE_SUPPLIER`` +is set, ``DL_FLAG_AUTOPROBE_CONSUMER`` can be used to request the driver core +to probe for a driver for the consumer driver on the link automatically after +a driver has been bound to the supplier device. + +Note, however, that any combinations of ``DL_FLAG_AUTOREMOVE_CONSUMER``, +``DL_FLAG_AUTOREMOVE_SUPPLIER`` or ``DL_FLAG_AUTOPROBE_CONSUMER`` with +``DL_FLAG_STATELESS`` are invalid and cannot be used. + +Limitations +=========== + +Driver authors should be aware that a driver presence dependency for managed +device links (i.e. when ``DL_FLAG_STATELESS`` is not specified on link addition) +may cause probing of the consumer to be deferred indefinitely. This can become +a problem if the consumer is required to probe before a certain initcall level +is reached. Worse, if the supplier driver is blacklisted or missing, the +consumer will never be probed. + +Moreover, managed device links cannot be deleted directly. They are deleted +by the driver core when they are not necessary any more in accordance with the +``DL_FLAG_AUTOREMOVE_CONSUMER`` and ``DL_FLAG_AUTOREMOVE_SUPPLIER`` flags. +However, stateless device links (i.e. device links with ``DL_FLAG_STATELESS`` +set) are expected to be removed by whoever called :c:func:`device_link_add()` +to add them with the help of either :c:func:`device_link_del()` or +:c:func:`device_link_remove()`. + +Passing ``DL_FLAG_RPM_ACTIVE`` along with ``DL_FLAG_STATELESS`` to +:c:func:`device_link_add()` may cause the PM-runtime usage counter of the +supplier device to remain nonzero after a subsequent invocation of either +:c:func:`device_link_del()` or :c:func:`device_link_remove()` to remove the +device link returned by it. This happens if :c:func:`device_link_add()` is +called twice in a row for the same consumer-supplier pair without removing the +link between these calls, in which case allowing the PM-runtime usage counter +of the supplier to drop on an attempt to remove the link may cause it to be +suspended while the consumer is still PM-runtime-active and that has to be +avoided. [To work around this limitation it is sufficient to let the consumer +runtime suspend at least once, or call :c:func:`pm_runtime_set_suspended()` for +it with PM-runtime disabled, between the :c:func:`device_link_add()` and +:c:func:`device_link_del()` or :c:func:`device_link_remove()` calls.] + +Sometimes drivers depend on optional resources. They are able to operate +in a degraded mode (reduced feature set or performance) when those resources +are not present. An example is an SPI controller that can use a DMA engine +or work in PIO mode. The controller can determine presence of the optional +resources at probe time but on non-presence there is no way to know whether +they will become available in the near future (due to a supplier driver +probing) or never. Consequently it cannot be determined whether to defer +probing or not. It would be possible to notify drivers when optional +resources become available after probing, but it would come at a high cost +for drivers as switching between modes of operation at runtime based on the +availability of such resources would be much more complex than a mechanism +based on probe deferral. In any case optional resources are beyond the +scope of device links. + +Examples +======== + +* An MMU device exists alongside a busmaster device, both are in the same + power domain. The MMU implements DMA address translation for the busmaster + device and shall be runtime resumed and kept active whenever and as long + as the busmaster device is active. The busmaster device's driver shall + not bind before the MMU is bound. To achieve this, a device link with + runtime PM integration is added from the busmaster device (consumer) + to the MMU device (supplier). The effect with regards to runtime PM + is the same as if the MMU was the parent of the master device. + + The fact that both devices share the same power domain would normally + suggest usage of a struct dev_pm_domain or struct generic_pm_domain, + however these are not independent devices that happen to share a power + switch, but rather the MMU device serves the busmaster device and is + useless without it. A device link creates a synthetic hierarchical + relationship between the devices and is thus more apt. + +* A Thunderbolt host controller comprises a number of PCIe hotplug ports + and an NHI device to manage the PCIe switch. On resume from system sleep, + the NHI device needs to re-establish PCI tunnels to attached devices + before the hotplug ports can resume. If the hotplug ports were children + of the NHI, this resume order would automatically be enforced by the + PM core, but unfortunately they're aunts. The solution is to add + device links from the hotplug ports (consumers) to the NHI device + (supplier). A driver presence dependency is not necessary for this + use case. + +* Discrete GPUs in hybrid graphics laptops often feature an HDA controller + for HDMI/DP audio. In the device hierarchy the HDA controller is a sibling + of the VGA device, yet both share the same power domain and the HDA + controller is only ever needed when an HDMI/DP display is attached to the + VGA device. A device link from the HDA controller (consumer) to the + VGA device (supplier) aptly represents this relationship. + +* ACPI allows definition of a device start order by way of _DEP objects. + A classical example is when ACPI power management methods on one device + are implemented in terms of I\ :sup:`2`\ C accesses and require a specific + I\ :sup:`2`\ C controller to be present and functional for the power + management of the device in question to work. + +* In some SoCs a functional dependency exists from display, video codec and + video processing IP cores on transparent memory access IP cores that handle + burst access and compression/decompression. + +Alternatives +============ + +* A struct dev_pm_domain can be used to override the bus, + class or device type callbacks. It is intended for devices sharing + a single on/off switch, however it does not guarantee a specific + suspend/resume ordering, this needs to be implemented separately. + It also does not by itself track the runtime PM status of the involved + devices and turn off the power switch only when all of them are runtime + suspended. Furthermore it cannot be used to enforce a specific shutdown + ordering or a driver presence dependency. + +* A struct generic_pm_domain is a lot more heavyweight than a + device link and does not allow for shutdown ordering or driver presence + dependencies. It also cannot be used on ACPI systems. + +Implementation +============== + +The device hierarchy, which -- as the name implies -- is a tree, +becomes a directed acyclic graph once device links are added. + +Ordering of these devices during suspend/resume is determined by the +dpm_list. During shutdown it is determined by the devices_kset. With +no device links present, the two lists are a flattened, one-dimensional +representations of the device tree such that a device is placed behind +all its ancestors. That is achieved by traversing the ACPI namespace +or OpenFirmware device tree top-down and appending devices to the lists +as they are discovered. + +Once device links are added, the lists need to satisfy the additional +constraint that a device is placed behind all its suppliers, recursively. +To ensure this, upon addition of the device link the consumer and the +entire sub-graph below it (all children and consumers of the consumer) +are moved to the end of the list. (Call to :c:func:`device_reorder_to_tail()` +from :c:func:`device_link_add()`.) + +To prevent introduction of dependency loops into the graph, it is +verified upon device link addition that the supplier is not dependent +on the consumer or any children or consumers of the consumer. +(Call to :c:func:`device_is_dependent()` from :c:func:`device_link_add()`.) +If that constraint is violated, :c:func:`device_link_add()` will return +``NULL`` and a ``WARNING`` will be logged. + +Notably this also prevents the addition of a device link from a parent +device to a child. However the converse is allowed, i.e. a device link +from a child to a parent. Since the driver core already guarantees +correct suspend/resume and shutdown ordering between parent and child, +such a device link only makes sense if a driver presence dependency is +needed on top of that. In this case driver authors should weigh +carefully if a device link is at all the right tool for the purpose. +A more suitable approach might be to simply use deferred probing or +add a device flag causing the parent driver to be probed before the +child one. + +State machine +============= + +.. kernel-doc:: include/linux/device.h + :functions: device_link_state + +:: + + .=============================. + | | + v | + DORMANT <=> AVAILABLE <=> CONSUMER_PROBE => ACTIVE + ^ | + | | + '============ SUPPLIER_UNBIND <============' + +* The initial state of a device link is automatically determined by + :c:func:`device_link_add()` based on the driver presence on the supplier + and consumer. If the link is created before any devices are probed, it + is set to ``DL_STATE_DORMANT``. + +* When a supplier device is bound to a driver, links to its consumers + progress to ``DL_STATE_AVAILABLE``. + (Call to :c:func:`device_links_driver_bound()` from + :c:func:`driver_bound()`.) + +* Before a consumer device is probed, presence of supplier drivers is + verified by checking the consumer device is not in the wait_for_suppliers + list and by checking that links to suppliers are in ``DL_STATE_AVAILABLE`` + state. The state of the links is updated to ``DL_STATE_CONSUMER_PROBE``. + (Call to :c:func:`device_links_check_suppliers()` from + :c:func:`really_probe()`.) + This prevents the supplier from unbinding. + (Call to :c:func:`wait_for_device_probe()` from + :c:func:`device_links_unbind_consumers()`.) + +* If the probe fails, links to suppliers revert back to ``DL_STATE_AVAILABLE``. + (Call to :c:func:`device_links_no_driver()` from :c:func:`really_probe()`.) + +* If the probe succeeds, links to suppliers progress to ``DL_STATE_ACTIVE``. + (Call to :c:func:`device_links_driver_bound()` from :c:func:`driver_bound()`.) + +* When the consumer's driver is later on removed, links to suppliers revert + back to ``DL_STATE_AVAILABLE``. + (Call to :c:func:`__device_links_no_driver()` from + :c:func:`device_links_driver_cleanup()`, which in turn is called from + :c:func:`__device_release_driver()`.) + +* Before a supplier's driver is removed, links to consumers that are not + bound to a driver are updated to ``DL_STATE_SUPPLIER_UNBIND``. + (Call to :c:func:`device_links_busy()` from + :c:func:`__device_release_driver()`.) + This prevents the consumers from binding. + (Call to :c:func:`device_links_check_suppliers()` from + :c:func:`really_probe()`.) + Consumers that are bound are freed from their driver; consumers that are + probing are waited for until they are done. + (Call to :c:func:`device_links_unbind_consumers()` from + :c:func:`__device_release_driver()`.) + Once all links to consumers are in ``DL_STATE_SUPPLIER_UNBIND`` state, + the supplier driver is released and the links revert to ``DL_STATE_DORMANT``. + (Call to :c:func:`device_links_driver_cleanup()` from + :c:func:`__device_release_driver()`.) + +API +=== + +See device_link_add(), device_link_del() and device_link_remove(). diff --git a/Documentation/driver-api/dma-buf.rst b/Documentation/driver-api/dma-buf.rst new file mode 100644 index 000000000..f92a32d09 --- /dev/null +++ b/Documentation/driver-api/dma-buf.rst @@ -0,0 +1,366 @@ +Buffer Sharing and Synchronization (dma-buf) +============================================ + +The dma-buf subsystem provides the framework for sharing buffers for +hardware (DMA) access across multiple device drivers and subsystems, and +for synchronizing asynchronous hardware access. + +This is used, for example, by drm "prime" multi-GPU support, but is of +course not limited to GPU use cases. + +The three main components of this are: (1) dma-buf, representing a +sg_table and exposed to userspace as a file descriptor to allow passing +between devices, (2) fence, which provides a mechanism to signal when +one device has finished access, and (3) reservation, which manages the +shared or exclusive fence(s) associated with the buffer. + +Shared DMA Buffers +------------------ + +This document serves as a guide to device-driver writers on what is the dma-buf +buffer sharing API, how to use it for exporting and using shared buffers. + +Any device driver which wishes to be a part of DMA buffer sharing, can do so as +either the 'exporter' of buffers, or the 'user' or 'importer' of buffers. + +Say a driver A wants to use buffers created by driver B, then we call B as the +exporter, and A as buffer-user/importer. + +The exporter + + - implements and manages operations in :c:type:`struct dma_buf_ops + ` for the buffer, + - allows other users to share the buffer by using dma_buf sharing APIs, + - manages the details of buffer allocation, wrapped in a :c:type:`struct + dma_buf `, + - decides about the actual backing storage where this allocation happens, + - and takes care of any migration of scatterlist - for all (shared) users of + this buffer. + +The buffer-user + + - is one of (many) sharing users of the buffer. + - doesn't need to worry about how the buffer is allocated, or where. + - and needs a mechanism to get access to the scatterlist that makes up this + buffer in memory, mapped into its own address space, so it can access the + same area of memory. This interface is provided by :c:type:`struct + dma_buf_attachment `. + +Any exporters or users of the dma-buf buffer sharing framework must have a +'select DMA_SHARED_BUFFER' in their respective Kconfigs. + +Userspace Interface Notes +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Mostly a DMA buffer file descriptor is simply an opaque object for userspace, +and hence the generic interface exposed is very minimal. There's a few things to +consider though: + +- Since kernel 3.12 the dma-buf FD supports the llseek system call, but only + with offset=0 and whence=SEEK_END|SEEK_SET. SEEK_SET is supported to allow + the usual size discover pattern size = SEEK_END(0); SEEK_SET(0). Every other + llseek operation will report -EINVAL. + + If llseek on dma-buf FDs isn't support the kernel will report -ESPIPE for all + cases. Userspace can use this to detect support for discovering the dma-buf + size using llseek. + +- In order to avoid fd leaks on exec, the FD_CLOEXEC flag must be set + on the file descriptor. This is not just a resource leak, but a + potential security hole. It could give the newly exec'd application + access to buffers, via the leaked fd, to which it should otherwise + not be permitted access. + + The problem with doing this via a separate fcntl() call, versus doing it + atomically when the fd is created, is that this is inherently racy in a + multi-threaded app[3]. The issue is made worse when it is library code + opening/creating the file descriptor, as the application may not even be + aware of the fd's. + + To avoid this problem, userspace must have a way to request O_CLOEXEC + flag be set when the dma-buf fd is created. So any API provided by + the exporting driver to create a dmabuf fd must provide a way to let + userspace control setting of O_CLOEXEC flag passed in to dma_buf_fd(). + +- Memory mapping the contents of the DMA buffer is also supported. See the + discussion below on `CPU Access to DMA Buffer Objects`_ for the full details. + +- The DMA buffer FD is also pollable, see `Implicit Fence Poll Support`_ below for + details. + +- The DMA buffer FD also supports a few dma-buf-specific ioctls, see + `DMA Buffer ioctls`_ below for details. + +Basic Operation and Device DMA Access +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/dma-buf/dma-buf.c + :doc: dma buf device access + +CPU Access to DMA Buffer Objects +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/dma-buf/dma-buf.c + :doc: cpu access + +Implicit Fence Poll Support +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/dma-buf/dma-buf.c + :doc: implicit fence polling + +DMA-BUF statistics +~~~~~~~~~~~~~~~~~~ +.. kernel-doc:: drivers/dma-buf/dma-buf-sysfs-stats.c + :doc: overview + +DMA Buffer ioctls +~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: include/uapi/linux/dma-buf.h + +DMA-BUF locking convention +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/dma-buf/dma-buf.c + :doc: locking convention + +Kernel Functions and Structures Reference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/dma-buf/dma-buf.c + :export: + +.. kernel-doc:: include/linux/dma-buf.h + :internal: + +Reservation Objects +------------------- + +.. kernel-doc:: drivers/dma-buf/dma-resv.c + :doc: Reservation Object Overview + +.. kernel-doc:: drivers/dma-buf/dma-resv.c + :export: + +.. kernel-doc:: include/linux/dma-resv.h + :internal: + +DMA Fences +---------- + +.. kernel-doc:: drivers/dma-buf/dma-fence.c + :doc: DMA fences overview + +DMA Fence Cross-Driver Contract +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/dma-buf/dma-fence.c + :doc: fence cross-driver contract + +DMA Fence Signalling Annotations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/dma-buf/dma-fence.c + :doc: fence signalling annotation + +DMA Fence Deadline Hints +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/dma-buf/dma-fence.c + :doc: deadline hints + +DMA Fences Functions Reference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/dma-buf/dma-fence.c + :export: + +.. kernel-doc:: include/linux/dma-fence.h + :internal: + +DMA Fence Array +~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/dma-buf/dma-fence-array.c + :export: + +.. kernel-doc:: include/linux/dma-fence-array.h + :internal: + +DMA Fence Chain +~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/dma-buf/dma-fence-chain.c + :export: + +.. kernel-doc:: include/linux/dma-fence-chain.h + :internal: + +DMA Fence unwrap +~~~~~~~~~~~~~~~~ + +.. kernel-doc:: include/linux/dma-fence-unwrap.h + :internal: + +DMA Fence Sync File +~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/dma-buf/sync_file.c + :export: + +.. kernel-doc:: include/linux/sync_file.h + :internal: + +DMA Fence Sync File uABI +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: include/uapi/linux/sync_file.h + :internal: + +Indefinite DMA Fences +~~~~~~~~~~~~~~~~~~~~~ + +At various times struct dma_fence with an indefinite time until dma_fence_wait() +finishes have been proposed. Examples include: + +* Future fences, used in HWC1 to signal when a buffer isn't used by the display + any longer, and created with the screen update that makes the buffer visible. + The time this fence completes is entirely under userspace's control. + +* Proxy fences, proposed to handle &drm_syncobj for which the fence has not yet + been set. Used to asynchronously delay command submission. + +* Userspace fences or gpu futexes, fine-grained locking within a command buffer + that userspace uses for synchronization across engines or with the CPU, which + are then imported as a DMA fence for integration into existing winsys + protocols. + +* Long-running compute command buffers, while still using traditional end of + batch DMA fences for memory management instead of context preemption DMA + fences which get reattached when the compute job is rescheduled. + +Common to all these schemes is that userspace controls the dependencies of these +fences and controls when they fire. Mixing indefinite fences with normal +in-kernel DMA fences does not work, even when a fallback timeout is included to +protect against malicious userspace: + +* Only the kernel knows about all DMA fence dependencies, userspace is not aware + of dependencies injected due to memory management or scheduler decisions. + +* Only userspace knows about all dependencies in indefinite fences and when + exactly they will complete, the kernel has no visibility. + +Furthermore the kernel has to be able to hold up userspace command submission +for memory management needs, which means we must support indefinite fences being +dependent upon DMA fences. If the kernel also support indefinite fences in the +kernel like a DMA fence, like any of the above proposal would, there is the +potential for deadlocks. + +.. kernel-render:: DOT + :alt: Indefinite Fencing Dependency Cycle + :caption: Indefinite Fencing Dependency Cycle + + digraph "Fencing Cycle" { + node [shape=box bgcolor=grey style=filled] + kernel [label="Kernel DMA Fences"] + userspace [label="userspace controlled fences"] + kernel -> userspace [label="memory management"] + userspace -> kernel [label="Future fence, fence proxy, ..."] + + { rank=same; kernel userspace } + } + +This means that the kernel might accidentally create deadlocks +through memory management dependencies which userspace is unaware of, which +randomly hangs workloads until the timeout kicks in. Workloads, which from +userspace's perspective, do not contain a deadlock. In such a mixed fencing +architecture there is no single entity with knowledge of all dependencies. +Therefore preventing such deadlocks from within the kernel is not possible. + +The only solution to avoid dependencies loops is by not allowing indefinite +fences in the kernel. This means: + +* No future fences, proxy fences or userspace fences imported as DMA fences, + with or without a timeout. + +* No DMA fences that signal end of batchbuffer for command submission where + userspace is allowed to use userspace fencing or long running compute + workloads. This also means no implicit fencing for shared buffers in these + cases. + +Recoverable Hardware Page Faults Implications +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Modern hardware supports recoverable page faults, which has a lot of +implications for DMA fences. + +First, a pending page fault obviously holds up the work that's running on the +accelerator and a memory allocation is usually required to resolve the fault. +But memory allocations are not allowed to gate completion of DMA fences, which +means any workload using recoverable page faults cannot use DMA fences for +synchronization. Synchronization fences controlled by userspace must be used +instead. + +On GPUs this poses a problem, because current desktop compositor protocols on +Linux rely on DMA fences, which means without an entirely new userspace stack +built on top of userspace fences, they cannot benefit from recoverable page +faults. Specifically this means implicit synchronization will not be possible. +The exception is when page faults are only used as migration hints and never to +on-demand fill a memory request. For now this means recoverable page +faults on GPUs are limited to pure compute workloads. + +Furthermore GPUs usually have shared resources between the 3D rendering and +compute side, like compute units or command submission engines. If both a 3D +job with a DMA fence and a compute workload using recoverable page faults are +pending they could deadlock: + +- The 3D workload might need to wait for the compute job to finish and release + hardware resources first. + +- The compute workload might be stuck in a page fault, because the memory + allocation is waiting for the DMA fence of the 3D workload to complete. + +There are a few options to prevent this problem, one of which drivers need to +ensure: + +- Compute workloads can always be preempted, even when a page fault is pending + and not yet repaired. Not all hardware supports this. + +- DMA fence workloads and workloads which need page fault handling have + independent hardware resources to guarantee forward progress. This could be + achieved through e.g. through dedicated engines and minimal compute unit + reservations for DMA fence workloads. + +- The reservation approach could be further refined by only reserving the + hardware resources for DMA fence workloads when they are in-flight. This must + cover the time from when the DMA fence is visible to other threads up to + moment when fence is completed through dma_fence_signal(). + +- As a last resort, if the hardware provides no useful reservation mechanics, + all workloads must be flushed from the GPU when switching between jobs + requiring DMA fences or jobs requiring page fault handling: This means all DMA + fences must complete before a compute job with page fault handling can be + inserted into the scheduler queue. And vice versa, before a DMA fence can be + made visible anywhere in the system, all compute workloads must be preempted + to guarantee all pending GPU page faults are flushed. + +- Only a fairly theoretical option would be to untangle these dependencies when + allocating memory to repair hardware page faults, either through separate + memory blocks or runtime tracking of the full dependency graph of all DMA + fences. This results very wide impact on the kernel, since resolving the page + on the CPU side can itself involve a page fault. It is much more feasible and + robust to limit the impact of handling hardware page faults to the specific + driver. + +Note that workloads that run on independent hardware like copy engines or other +GPUs do not have any impact. This allows us to keep using DMA fences internally +in the kernel even for resolving hardware page faults, e.g. by using copy +engines to clear or copy memory needed to resolve the page fault. + +In some ways this page fault problem is a special case of the `Infinite DMA +Fences` discussions: Infinite fences from compute workloads are allowed to +depend on DMA fences, but not the other way around. And not even the page fault +problem is new, because some other CPU thread in userspace might +hit a page fault which holds up a userspace fence - supporting page faults on +GPUs doesn't anything fundamentally new. diff --git a/Documentation/driver-api/dmaengine/client.rst b/Documentation/driver-api/dmaengine/client.rst new file mode 100644 index 000000000..ecf139f73 --- /dev/null +++ b/Documentation/driver-api/dmaengine/client.rst @@ -0,0 +1,379 @@ +==================== +DMA Engine API Guide +==================== + +Vinod Koul + +.. note:: For DMA Engine usage in async_tx please see: + ``Documentation/crypto/async-tx-api.rst`` + + +Below is a guide to device driver writers on how to use the Slave-DMA API of the +DMA Engine. This is applicable only for slave DMA usage only. + +DMA usage +========= + +The slave DMA usage consists of following steps: + +- Allocate a DMA slave channel + +- Set slave and controller specific parameters + +- Get a descriptor for transaction + +- Submit the transaction + +- Issue pending requests and wait for callback notification + +The details of these operations are: + +1. Allocate a DMA slave channel + + Channel allocation is slightly different in the slave DMA context, + client drivers typically need a channel from a particular DMA + controller only and even in some cases a specific channel is desired. + To request a channel dma_request_chan() API is used. + + Interface: + + .. code-block:: c + + struct dma_chan *dma_request_chan(struct device *dev, const char *name); + + Which will find and return the ``name`` DMA channel associated with the 'dev' + device. The association is done via DT, ACPI or board file based + dma_slave_map matching table. + + A channel allocated via this interface is exclusive to the caller, + until dma_release_channel() is called. + +2. Set slave and controller specific parameters + + Next step is always to pass some specific information to the DMA + driver. Most of the generic information which a slave DMA can use + is in struct dma_slave_config. This allows the clients to specify + DMA direction, DMA addresses, bus widths, DMA burst lengths etc + for the peripheral. + + If some DMA controllers have more parameters to be sent then they + should try to embed struct dma_slave_config in their controller + specific structure. That gives flexibility to client to pass more + parameters, if required. + + Interface: + + .. code-block:: c + + int dmaengine_slave_config(struct dma_chan *chan, + struct dma_slave_config *config) + + Please see the dma_slave_config structure definition in dmaengine.h + for a detailed explanation of the struct members. Please note + that the 'direction' member will be going away as it duplicates the + direction given in the prepare call. + +3. Get a descriptor for transaction + + For slave usage the various modes of slave transfers supported by the + DMA-engine are: + + - slave_sg: DMA a list of scatter gather buffers from/to a peripheral + + - dma_cyclic: Perform a cyclic DMA operation from/to a peripheral till the + operation is explicitly stopped. + + - interleaved_dma: This is common to Slave as well as M2M clients. For slave + address of devices' fifo could be already known to the driver. + Various types of operations could be expressed by setting + appropriate values to the 'dma_interleaved_template' members. Cyclic + interleaved DMA transfers are also possible if supported by the channel by + setting the DMA_PREP_REPEAT transfer flag. + + A non-NULL return of this transfer API represents a "descriptor" for + the given transaction. + + Interface: + + .. code-block:: c + + struct dma_async_tx_descriptor *dmaengine_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_data_direction direction, + unsigned long flags); + + struct dma_async_tx_descriptor *dmaengine_prep_dma_cyclic( + struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_data_direction direction); + + struct dma_async_tx_descriptor *dmaengine_prep_interleaved_dma( + struct dma_chan *chan, struct dma_interleaved_template *xt, + unsigned long flags); + + The peripheral driver is expected to have mapped the scatterlist for + the DMA operation prior to calling dmaengine_prep_slave_sg(), and must + keep the scatterlist mapped until the DMA operation has completed. + The scatterlist must be mapped using the DMA struct device. + If a mapping needs to be synchronized later, dma_sync_*_for_*() must be + called using the DMA struct device, too. + So, normal setup should look like this: + + .. code-block:: c + + struct device *dma_dev = dmaengine_get_dma_device(chan); + + nr_sg = dma_map_sg(dma_dev, sgl, sg_len); + if (nr_sg == 0) + /* error */ + + desc = dmaengine_prep_slave_sg(chan, sgl, nr_sg, direction, flags); + + Once a descriptor has been obtained, the callback information can be + added and the descriptor must then be submitted. Some DMA engine + drivers may hold a spinlock between a successful preparation and + submission so it is important that these two operations are closely + paired. + + .. note:: + + Although the async_tx API specifies that completion callback + routines cannot submit any new operations, this is not the + case for slave/cyclic DMA. + + For slave DMA, the subsequent transaction may not be available + for submission prior to callback function being invoked, so + slave DMA callbacks are permitted to prepare and submit a new + transaction. + + For cyclic DMA, a callback function may wish to terminate the + DMA via dmaengine_terminate_async(). + + Therefore, it is important that DMA engine drivers drop any + locks before calling the callback function which may cause a + deadlock. + + Note that callbacks will always be invoked from the DMA + engines tasklet, never from interrupt context. + + **Optional: per descriptor metadata** + + DMAengine provides two ways for metadata support. + + DESC_METADATA_CLIENT + + The metadata buffer is allocated/provided by the client driver and it is + attached to the descriptor. + + .. code-block:: c + + int dmaengine_desc_attach_metadata(struct dma_async_tx_descriptor *desc, + void *data, size_t len); + + DESC_METADATA_ENGINE + + The metadata buffer is allocated/managed by the DMA driver. The client + driver can ask for the pointer, maximum size and the currently used size of + the metadata and can directly update or read it. + + Because the DMA driver manages the memory area containing the metadata, + clients must make sure that they do not try to access or get the pointer + after their transfer completion callback has run for the descriptor. + If no completion callback has been defined for the transfer, then the + metadata must not be accessed after issue_pending. + In other words: if the aim is to read back metadata after the transfer is + completed, then the client must use completion callback. + + .. code-block:: c + + void *dmaengine_desc_get_metadata_ptr(struct dma_async_tx_descriptor *desc, + size_t *payload_len, size_t *max_len); + + int dmaengine_desc_set_metadata_len(struct dma_async_tx_descriptor *desc, + size_t payload_len); + + Client drivers can query if a given mode is supported with: + + .. code-block:: c + + bool dmaengine_is_metadata_mode_supported(struct dma_chan *chan, + enum dma_desc_metadata_mode mode); + + Depending on the used mode client drivers must follow different flow. + + DESC_METADATA_CLIENT + + - DMA_MEM_TO_DEV / DEV_MEM_TO_MEM: + + 1. prepare the descriptor (dmaengine_prep_*) + construct the metadata in the client's buffer + 2. use dmaengine_desc_attach_metadata() to attach the buffer to the + descriptor + 3. submit the transfer + + - DMA_DEV_TO_MEM: + + 1. prepare the descriptor (dmaengine_prep_*) + 2. use dmaengine_desc_attach_metadata() to attach the buffer to the + descriptor + 3. submit the transfer + 4. when the transfer is completed, the metadata should be available in the + attached buffer + + DESC_METADATA_ENGINE + + - DMA_MEM_TO_DEV / DEV_MEM_TO_MEM: + + 1. prepare the descriptor (dmaengine_prep_*) + 2. use dmaengine_desc_get_metadata_ptr() to get the pointer to the + engine's metadata area + 3. update the metadata at the pointer + 4. use dmaengine_desc_set_metadata_len() to tell the DMA engine the + amount of data the client has placed into the metadata buffer + 5. submit the transfer + + - DMA_DEV_TO_MEM: + + 1. prepare the descriptor (dmaengine_prep_*) + 2. submit the transfer + 3. on transfer completion, use dmaengine_desc_get_metadata_ptr() to get + the pointer to the engine's metadata area + 4. read out the metadata from the pointer + + .. note:: + + When DESC_METADATA_ENGINE mode is used the metadata area for the descriptor + is no longer valid after the transfer has been completed (valid up to the + point when the completion callback returns if used). + + Mixed use of DESC_METADATA_CLIENT / DESC_METADATA_ENGINE is not allowed, + client drivers must use either of the modes per descriptor. + +4. Submit the transaction + + Once the descriptor has been prepared and the callback information + added, it must be placed on the DMA engine drivers pending queue. + + Interface: + + .. code-block:: c + + dma_cookie_t dmaengine_submit(struct dma_async_tx_descriptor *desc) + + This returns a cookie can be used to check the progress of DMA engine + activity via other DMA engine calls not covered in this document. + + dmaengine_submit() will not start the DMA operation, it merely adds + it to the pending queue. For this, see step 5, dma_async_issue_pending. + + .. note:: + + After calling ``dmaengine_submit()`` the submitted transfer descriptor + (``struct dma_async_tx_descriptor``) belongs to the DMA engine. + Consequently, the client must consider invalid the pointer to that + descriptor. + +5. Issue pending DMA requests and wait for callback notification + + The transactions in the pending queue can be activated by calling the + issue_pending API. If channel is idle then the first transaction in + queue is started and subsequent ones queued up. + + On completion of each DMA operation, the next in queue is started and + a tasklet triggered. The tasklet will then call the client driver + completion callback routine for notification, if set. + + Interface: + + .. code-block:: c + + void dma_async_issue_pending(struct dma_chan *chan); + +Further APIs +------------ + +1. Terminate APIs + + .. code-block:: c + + int dmaengine_terminate_sync(struct dma_chan *chan) + int dmaengine_terminate_async(struct dma_chan *chan) + int dmaengine_terminate_all(struct dma_chan *chan) /* DEPRECATED */ + + This causes all activity for the DMA channel to be stopped, and may + discard data in the DMA FIFO which hasn't been fully transferred. + No callback functions will be called for any incomplete transfers. + + Two variants of this function are available. + + dmaengine_terminate_async() might not wait until the DMA has been fully + stopped or until any running complete callbacks have finished. But it is + possible to call dmaengine_terminate_async() from atomic context or from + within a complete callback. dmaengine_synchronize() must be called before it + is safe to free the memory accessed by the DMA transfer or free resources + accessed from within the complete callback. + + dmaengine_terminate_sync() will wait for the transfer and any running + complete callbacks to finish before it returns. But the function must not be + called from atomic context or from within a complete callback. + + dmaengine_terminate_all() is deprecated and should not be used in new code. + +2. Pause API + + .. code-block:: c + + int dmaengine_pause(struct dma_chan *chan) + + This pauses activity on the DMA channel without data loss. + +3. Resume API + + .. code-block:: c + + int dmaengine_resume(struct dma_chan *chan) + + Resume a previously paused DMA channel. It is invalid to resume a + channel which is not currently paused. + +4. Check Txn complete + + .. code-block:: c + + enum dma_status dma_async_is_tx_complete(struct dma_chan *chan, + dma_cookie_t cookie, dma_cookie_t *last, dma_cookie_t *used) + + This can be used to check the status of the channel. Please see + the documentation in include/linux/dmaengine.h for a more complete + description of this API. + + This can be used in conjunction with dma_async_is_complete() and + the cookie returned from dmaengine_submit() to check for + completion of a specific DMA transaction. + + .. note:: + + Not all DMA engine drivers can return reliable information for + a running DMA channel. It is recommended that DMA engine users + pause or stop (via dmaengine_terminate_all()) the channel before + using this API. + +5. Synchronize termination API + + .. code-block:: c + + void dmaengine_synchronize(struct dma_chan *chan) + + Synchronize the termination of the DMA channel to the current context. + + This function should be used after dmaengine_terminate_async() to synchronize + the termination of the DMA channel to the current context. The function will + wait for the transfer and any running complete callbacks to finish before it + returns. + + If dmaengine_terminate_async() is used to stop the DMA channel this function + must be called before it is safe to free memory accessed by previously + submitted descriptors or to free any resources accessed within the complete + callback of previously submitted descriptors. + + The behavior of this function is undefined if dma_async_issue_pending() has + been called between dmaengine_terminate_async() and this function. diff --git a/Documentation/driver-api/dmaengine/dmatest.rst b/Documentation/driver-api/dmaengine/dmatest.rst new file mode 100644 index 000000000..e2a63cefd --- /dev/null +++ b/Documentation/driver-api/dmaengine/dmatest.rst @@ -0,0 +1,232 @@ +============== +DMA Test Guide +============== + +Andy Shevchenko + +This small document introduces how to test DMA drivers using dmatest module. + +The dmatest module tests DMA memcpy, memset, XOR and RAID6 P+Q operations using +various lengths and various offsets into the source and destination buffers. It +will initialize both buffers with a repeatable pattern and verify that the DMA +engine copies the requested region and nothing more. It will also verify that +the bytes aren't swapped around, and that the source buffer isn't modified. + +The dmatest module can be configured to test a specific channel. It can also +test multiple channels at the same time, and it can start multiple threads +competing for the same channel. + +.. note:: + The test suite works only on the channels that have at least one + capability of the following: DMA_MEMCPY (memory-to-memory), DMA_MEMSET + (const-to-memory or memory-to-memory, when emulated), DMA_XOR, DMA_PQ. + +.. note:: + In case of any related questions use the official mailing list + dmaengine@vger.kernel.org. + +Part 1 - How to build the test module +===================================== + +The menuconfig contains an option that could be found by following path: + + Device Drivers -> DMA Engine support -> DMA Test client + +In the configuration file the option called CONFIG_DMATEST. The dmatest could +be built as module or inside kernel. Let's consider those cases. + +Part 2 - When dmatest is built as a module +========================================== + +Example of usage:: + + % modprobe dmatest timeout=2000 iterations=1 channel=dma0chan0 run=1 + +...or:: + + % modprobe dmatest + % echo 2000 > /sys/module/dmatest/parameters/timeout + % echo 1 > /sys/module/dmatest/parameters/iterations + % echo dma0chan0 > /sys/module/dmatest/parameters/channel + % echo 1 > /sys/module/dmatest/parameters/run + +...or on the kernel command line:: + + dmatest.timeout=2000 dmatest.iterations=1 dmatest.channel=dma0chan0 dmatest.run=1 + +Example of multi-channel test usage (new in the 5.0 kernel):: + + % modprobe dmatest + % echo 2000 > /sys/module/dmatest/parameters/timeout + % echo 1 > /sys/module/dmatest/parameters/iterations + % echo dma0chan0 > /sys/module/dmatest/parameters/channel + % echo dma0chan1 > /sys/module/dmatest/parameters/channel + % echo dma0chan2 > /sys/module/dmatest/parameters/channel + % echo 1 > /sys/module/dmatest/parameters/run + +.. note:: + For all tests, starting in the 5.0 kernel, either single- or multi-channel, + the channel parameter(s) must be set after all other parameters. It is at + that time that the existing parameter values are acquired for use by the + thread(s). All other parameters are shared. Therefore, if changes are made + to any of the other parameters, and an additional channel specified, the + (shared) parameters used for all threads will use the new values. + After the channels are specified, each thread is set as pending. All threads + begin execution when the run parameter is set to 1. + +.. hint:: + A list of available channels can be found by running the following command:: + + % ls -1 /sys/class/dma/ + +Once started a message like " dmatest: Added 1 threads using dma0chan0" is +emitted. A thread for that specific channel is created and is now pending, the +pending thread is started once run is to 1. + +Note that running a new test will not stop any in progress test. + +The following command returns the state of the test. :: + + % cat /sys/module/dmatest/parameters/run + +To wait for test completion userspace can poll 'run' until it is false, or use +the wait parameter. Specifying 'wait=1' when loading the module causes module +initialization to pause until a test run has completed, while reading +/sys/module/dmatest/parameters/wait waits for any running test to complete +before returning. For example, the following scripts wait for 42 tests +to complete before exiting. Note that if 'iterations' is set to 'infinite' then +waiting is disabled. + +Example:: + + % modprobe dmatest run=1 iterations=42 wait=1 + % modprobe -r dmatest + +...or:: + + % modprobe dmatest run=1 iterations=42 + % cat /sys/module/dmatest/parameters/wait + % modprobe -r dmatest + +Part 3 - When built-in in the kernel +==================================== + +The module parameters that is supplied to the kernel command line will be used +for the first performed test. After user gets a control, the test could be +re-run with the same or different parameters. For the details see the above +section `Part 2 - When dmatest is built as a module`_. + +In both cases the module parameters are used as the actual values for the test +case. You always could check them at run-time by running :: + + % grep -H . /sys/module/dmatest/parameters/* + +Part 4 - Gathering the test results +=================================== + +Test results are printed to the kernel log buffer with the format:: + + "dmatest: result : : '' with src_off= dst_off= len= ()" + +Example of output:: + + % dmesg | tail -n 1 + dmatest: result dma0chan0-copy0: #1: No errors with src_off=0x7bf dst_off=0x8ad len=0x3fea (0) + +The message format is unified across the different types of errors. A +number in the parentheses represents additional information, e.g. error +code, error counter, or status. A test thread also emits a summary line at +completion listing the number of tests executed, number that failed, and a +result code. + +Example:: + + % dmesg | tail -n 1 + dmatest: dma0chan0-copy0: summary 1 test, 0 failures 1000 iops 100000 KB/s (0) + +The details of a data miscompare error are also emitted, but do not follow the +above format. + +Part 5 - Handling channel allocation +==================================== + +Allocating Channels +------------------- + +Channels do not need to be configured prior to starting a test run. Attempting +to run the test without configuring the channels will result in testing any +channels that are available. + +Example:: + + % echo 1 > /sys/module/dmatest/parameters/run + dmatest: No channels configured, continue with any + +Channels are registered using the "channel" parameter. Channels can be requested by their +name, once requested, the channel is registered and a pending thread is added to the test list. + +Example:: + + % echo dma0chan2 > /sys/module/dmatest/parameters/channel + dmatest: Added 1 threads using dma0chan2 + +More channels can be added by repeating the example above. +Reading back the channel parameter will return the name of last channel that was added successfully. + +Example:: + + % echo dma0chan1 > /sys/module/dmatest/parameters/channel + dmatest: Added 1 threads using dma0chan1 + % echo dma0chan2 > /sys/module/dmatest/parameters/channel + dmatest: Added 1 threads using dma0chan2 + % cat /sys/module/dmatest/parameters/channel + dma0chan2 + +Another method of requesting channels is to request a channel with an empty string, Doing so +will request all channels available to be tested: + +Example:: + + % echo "" > /sys/module/dmatest/parameters/channel + dmatest: Added 1 threads using dma0chan0 + dmatest: Added 1 threads using dma0chan3 + dmatest: Added 1 threads using dma0chan4 + dmatest: Added 1 threads using dma0chan5 + dmatest: Added 1 threads using dma0chan6 + dmatest: Added 1 threads using dma0chan7 + dmatest: Added 1 threads using dma0chan8 + +At any point during the test configuration, reading the "test_list" parameter will +print the list of currently pending tests. + +Example:: + + % cat /sys/module/dmatest/parameters/test_list + dmatest: 1 threads using dma0chan0 + dmatest: 1 threads using dma0chan3 + dmatest: 1 threads using dma0chan4 + dmatest: 1 threads using dma0chan5 + dmatest: 1 threads using dma0chan6 + dmatest: 1 threads using dma0chan7 + dmatest: 1 threads using dma0chan8 + +Note: Channels will have to be configured for each test run as channel configurations do not +carry across to the next test run. + +Releasing Channels +------------------- + +Channels can be freed by setting run to 0. + +Example:: + + % echo dma0chan1 > /sys/module/dmatest/parameters/channel + dmatest: Added 1 threads using dma0chan1 + % cat /sys/class/dma/dma0chan1/in_use + 1 + % echo 0 > /sys/module/dmatest/parameters/run + % cat /sys/class/dma/dma0chan1/in_use + 0 + +Channels allocated by previous test runs are automatically freed when a new +channel is requested after completing a successful test run. diff --git a/Documentation/driver-api/dmaengine/index.rst b/Documentation/driver-api/dmaengine/index.rst new file mode 100644 index 000000000..bdc45d8b4 --- /dev/null +++ b/Documentation/driver-api/dmaengine/index.rst @@ -0,0 +1,55 @@ +======================= +DMAEngine documentation +======================= + +DMAEngine documentation provides documents for various aspects of DMAEngine +framework. + +DMAEngine development documentation +----------------------------------- + +This book helps with DMAengine internal APIs and guide for DMAEngine device +driver writers. + +.. toctree:: + :maxdepth: 1 + + provider + +DMAEngine client documentation +------------------------------ + +This book is a guide to device driver writers on how to use the Slave-DMA +API of the DMAEngine. This is applicable only for slave DMA usage only. + +.. toctree:: + :maxdepth: 1 + + client + +DMA Test documentation +---------------------- + +This book introduces how to test DMA drivers using dmatest module. + +.. toctree:: + :maxdepth: 1 + + dmatest + +PXA DMA documentation +---------------------- + +This book adds some notes about PXA DMA + +.. toctree:: + :maxdepth: 1 + + pxa_dma + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/driver-api/dmaengine/provider.rst b/Documentation/driver-api/dmaengine/provider.rst new file mode 100644 index 000000000..ceac2a300 --- /dev/null +++ b/Documentation/driver-api/dmaengine/provider.rst @@ -0,0 +1,647 @@ +================================== +DMAengine controller documentation +================================== + +Hardware Introduction +===================== + +Most of the Slave DMA controllers have the same general principles of +operations. + +They have a given number of channels to use for the DMA transfers, and +a given number of requests lines. + +Requests and channels are pretty much orthogonal. Channels can be used +to serve several to any requests. To simplify, channels are the +entities that will be doing the copy, and requests what endpoints are +involved. + +The request lines actually correspond to physical lines going from the +DMA-eligible devices to the controller itself. Whenever the device +will want to start a transfer, it will assert a DMA request (DRQ) by +asserting that request line. + +A very simple DMA controller would only take into account a single +parameter: the transfer size. At each clock cycle, it would transfer a +byte of data from one buffer to another, until the transfer size has +been reached. + +That wouldn't work well in the real world, since slave devices might +require a specific number of bits to be transferred in a single +cycle. For example, we may want to transfer as much data as the +physical bus allows to maximize performances when doing a simple +memory copy operation, but our audio device could have a narrower FIFO +that requires data to be written exactly 16 or 24 bits at a time. This +is why most if not all of the DMA controllers can adjust this, using a +parameter called the transfer width. + +Moreover, some DMA controllers, whenever the RAM is used as a source +or destination, can group the reads or writes in memory into a buffer, +so instead of having a lot of small memory accesses, which is not +really efficient, you'll get several bigger transfers. This is done +using a parameter called the burst size, that defines how many single +reads/writes it's allowed to do without the controller splitting the +transfer into smaller sub-transfers. + +Our theoretical DMA controller would then only be able to do transfers +that involve a single contiguous block of data. However, some of the +transfers we usually have are not, and want to copy data from +non-contiguous buffers to a contiguous buffer, which is called +scatter-gather. + +DMAEngine, at least for mem2dev transfers, require support for +scatter-gather. So we're left with two cases here: either we have a +quite simple DMA controller that doesn't support it, and we'll have to +implement it in software, or we have a more advanced DMA controller, +that implements in hardware scatter-gather. + +The latter are usually programmed using a collection of chunks to +transfer, and whenever the transfer is started, the controller will go +over that collection, doing whatever we programmed there. + +This collection is usually either a table or a linked list. You will +then push either the address of the table and its number of elements, +or the first item of the list to one channel of the DMA controller, +and whenever a DRQ will be asserted, it will go through the collection +to know where to fetch the data from. + +Either way, the format of this collection is completely dependent on +your hardware. Each DMA controller will require a different structure, +but all of them will require, for every chunk, at least the source and +destination addresses, whether it should increment these addresses or +not and the three parameters we saw earlier: the burst size, the +transfer width and the transfer size. + +The one last thing is that usually, slave devices won't issue DRQ by +default, and you have to enable this in your slave device driver first +whenever you're willing to use DMA. + +These were just the general memory-to-memory (also called mem2mem) or +memory-to-device (mem2dev) kind of transfers. Most devices often +support other kind of transfers or memory operations that dmaengine +support and will be detailed later in this document. + +DMA Support in Linux +==================== + +Historically, DMA controller drivers have been implemented using the +async TX API, to offload operations such as memory copy, XOR, +cryptography, etc., basically any memory to memory operation. + +Over time, the need for memory to device transfers arose, and +dmaengine was extended. Nowadays, the async TX API is written as a +layer on top of dmaengine, and acts as a client. Still, dmaengine +accommodates that API in some cases, and made some design choices to +ensure that it stayed compatible. + +For more information on the Async TX API, please look the relevant +documentation file in Documentation/crypto/async-tx-api.rst. + +DMAEngine APIs +============== + +``struct dma_device`` Initialization +------------------------------------ + +Just like any other kernel framework, the whole DMAEngine registration +relies on the driver filling a structure and registering against the +framework. In our case, that structure is dma_device. + +The first thing you need to do in your driver is to allocate this +structure. Any of the usual memory allocators will do, but you'll also +need to initialize a few fields in there: + +- ``channels``: should be initialized as a list using the + INIT_LIST_HEAD macro for example + +- ``src_addr_widths``: + should contain a bitmask of the supported source transfer width + +- ``dst_addr_widths``: + should contain a bitmask of the supported destination transfer width + +- ``directions``: + should contain a bitmask of the supported slave directions + (i.e. excluding mem2mem transfers) + +- ``residue_granularity``: + granularity of the transfer residue reported to dma_set_residue. + This can be either: + + - Descriptor: + your device doesn't support any kind of residue + reporting. The framework will only know that a particular + transaction descriptor is done. + + - Segment: + your device is able to report which chunks have been transferred + + - Burst: + your device is able to report which burst have been transferred + +- ``dev``: should hold the pointer to the ``struct device`` associated + to your current driver instance. + +Supported transaction types +--------------------------- + +The next thing you need is to set which transaction types your device +(and driver) supports. + +Our ``dma_device structure`` has a field called cap_mask that holds the +various types of transaction supported, and you need to modify this +mask using the dma_cap_set function, with various flags depending on +transaction types you support as an argument. + +All those capabilities are defined in the ``dma_transaction_type enum``, +in ``include/linux/dmaengine.h`` + +Currently, the types available are: + +- DMA_MEMCPY + + - The device is able to do memory to memory copies + + - No matter what the overall size of the combined chunks for source and + destination is, only as many bytes as the smallest of the two will be + transmitted. That means the number and size of the scatter-gather buffers in + both lists need not be the same, and that the operation functionally is + equivalent to a ``strncpy`` where the ``count`` argument equals the smallest + total size of the two scatter-gather list buffers. + + - It's usually used for copying pixel data between host memory and + memory-mapped GPU device memory, such as found on modern PCI video graphics + cards. The most immediate example is the OpenGL API function + ``glReadPielx()``, which might require a verbatim copy of a huge framebuffer + from local device memory onto host memory. + +- DMA_XOR + + - The device is able to perform XOR operations on memory areas + + - Used to accelerate XOR intensive tasks, such as RAID5 + +- DMA_XOR_VAL + + - The device is able to perform parity check using the XOR + algorithm against a memory buffer. + +- DMA_PQ + + - The device is able to perform RAID6 P+Q computations, P being a + simple XOR, and Q being a Reed-Solomon algorithm. + +- DMA_PQ_VAL + + - The device is able to perform parity check using RAID6 P+Q + algorithm against a memory buffer. + +- DMA_MEMSET + + - The device is able to fill memory with the provided pattern + + - The pattern is treated as a single byte signed value. + +- DMA_INTERRUPT + + - The device is able to trigger a dummy transfer that will + generate periodic interrupts + + - Used by the client drivers to register a callback that will be + called on a regular basis through the DMA controller interrupt + +- DMA_PRIVATE + + - The devices only supports slave transfers, and as such isn't + available for async transfers. + +- DMA_ASYNC_TX + + - Must not be set by the device, and will be set by the framework + if needed + + - TODO: What is it about? + +- DMA_SLAVE + + - The device can handle device to memory transfers, including + scatter-gather transfers. + + - While in the mem2mem case we were having two distinct types to + deal with a single chunk to copy or a collection of them, here, + we just have a single transaction type that is supposed to + handle both. + + - If you want to transfer a single contiguous memory buffer, + simply build a scatter list with only one item. + +- DMA_CYCLIC + + - The device can handle cyclic transfers. + + - A cyclic transfer is a transfer where the chunk collection will + loop over itself, with the last item pointing to the first. + + - It's usually used for audio transfers, where you want to operate + on a single ring buffer that you will fill with your audio data. + +- DMA_INTERLEAVE + + - The device supports interleaved transfer. + + - These transfers can transfer data from a non-contiguous buffer + to a non-contiguous buffer, opposed to DMA_SLAVE that can + transfer data from a non-contiguous data set to a continuous + destination buffer. + + - It's usually used for 2d content transfers, in which case you + want to transfer a portion of uncompressed data directly to the + display to print it + +- DMA_COMPLETION_NO_ORDER + + - The device does not support in order completion. + + - The driver should return DMA_OUT_OF_ORDER for device_tx_status if + the device is setting this capability. + + - All cookie tracking and checking API should be treated as invalid if + the device exports this capability. + + - At this point, this is incompatible with polling option for dmatest. + + - If this cap is set, the user is recommended to provide an unique + identifier for each descriptor sent to the DMA device in order to + properly track the completion. + +- DMA_REPEAT + + - The device supports repeated transfers. A repeated transfer, indicated by + the DMA_PREP_REPEAT transfer flag, is similar to a cyclic transfer in that + it gets automatically repeated when it ends, but can additionally be + replaced by the client. + + - This feature is limited to interleaved transfers, this flag should thus not + be set if the DMA_INTERLEAVE flag isn't set. This limitation is based on + the current needs of DMA clients, support for additional transfer types + should be added in the future if and when the need arises. + +- DMA_LOAD_EOT + + - The device supports replacing repeated transfers at end of transfer (EOT) + by queuing a new transfer with the DMA_PREP_LOAD_EOT flag set. + + - Support for replacing a currently running transfer at another point (such + as end of burst instead of end of transfer) will be added in the future + based on DMA clients needs, if and when the need arises. + +These various types will also affect how the source and destination +addresses change over time. + +Addresses pointing to RAM are typically incremented (or decremented) +after each transfer. In case of a ring buffer, they may loop +(DMA_CYCLIC). Addresses pointing to a device's register (e.g. a FIFO) +are typically fixed. + +Per descriptor metadata support +------------------------------- +Some data movement architecture (DMA controller and peripherals) uses metadata +associated with a transaction. The DMA controller role is to transfer the +payload and the metadata alongside. +The metadata itself is not used by the DMA engine itself, but it contains +parameters, keys, vectors, etc for peripheral or from the peripheral. + +The DMAengine framework provides a generic ways to facilitate the metadata for +descriptors. Depending on the architecture the DMA driver can implement either +or both of the methods and it is up to the client driver to choose which one +to use. + +- DESC_METADATA_CLIENT + + The metadata buffer is allocated/provided by the client driver and it is + attached (via the dmaengine_desc_attach_metadata() helper to the descriptor. + + From the DMA driver the following is expected for this mode: + + - DMA_MEM_TO_DEV / DEV_MEM_TO_MEM + + The data from the provided metadata buffer should be prepared for the DMA + controller to be sent alongside of the payload data. Either by copying to a + hardware descriptor, or highly coupled packet. + + - DMA_DEV_TO_MEM + + On transfer completion the DMA driver must copy the metadata to the client + provided metadata buffer before notifying the client about the completion. + After the transfer completion, DMA drivers must not touch the metadata + buffer provided by the client. + +- DESC_METADATA_ENGINE + + The metadata buffer is allocated/managed by the DMA driver. The client driver + can ask for the pointer, maximum size and the currently used size of the + metadata and can directly update or read it. dmaengine_desc_get_metadata_ptr() + and dmaengine_desc_set_metadata_len() is provided as helper functions. + + From the DMA driver the following is expected for this mode: + + - get_metadata_ptr() + + Should return a pointer for the metadata buffer, the maximum size of the + metadata buffer and the currently used / valid (if any) bytes in the buffer. + + - set_metadata_len() + + It is called by the clients after it have placed the metadata to the buffer + to let the DMA driver know the number of valid bytes provided. + + Note: since the client will ask for the metadata pointer in the completion + callback (in DMA_DEV_TO_MEM case) the DMA driver must ensure that the + descriptor is not freed up prior the callback is called. + +Device operations +----------------- + +Our dma_device structure also requires a few function pointers in +order to implement the actual logic, now that we described what +operations we were able to perform. + +The functions that we have to fill in there, and hence have to +implement, obviously depend on the transaction types you reported as +supported. + +- ``device_alloc_chan_resources`` + +- ``device_free_chan_resources`` + + - These functions will be called whenever a driver will call + ``dma_request_channel`` or ``dma_release_channel`` for the first/last + time on the channel associated to that driver. + + - They are in charge of allocating/freeing all the needed + resources in order for that channel to be useful for your driver. + + - These functions can sleep. + +- ``device_prep_dma_*`` + + - These functions are matching the capabilities you registered + previously. + + - These functions all take the buffer or the scatterlist relevant + for the transfer being prepared, and should create a hardware + descriptor or a list of hardware descriptors from it + + - These functions can be called from an interrupt context + + - Any allocation you might do should be using the GFP_NOWAIT + flag, in order not to potentially sleep, but without depleting + the emergency pool either. + + - Drivers should try to pre-allocate any memory they might need + during the transfer setup at probe time to avoid putting to + much pressure on the nowait allocator. + + - It should return a unique instance of the + ``dma_async_tx_descriptor structure``, that further represents this + particular transfer. + + - This structure can be initialized using the function + ``dma_async_tx_descriptor_init``. + + - You'll also need to set two fields in this structure: + + - flags: + TODO: Can it be modified by the driver itself, or + should it be always the flags passed in the arguments + + - tx_submit: A pointer to a function you have to implement, + that is supposed to push the current transaction descriptor to a + pending queue, waiting for issue_pending to be called. + + - In this structure the function pointer callback_result can be + initialized in order for the submitter to be notified that a + transaction has completed. In the earlier code the function pointer + callback has been used. However it does not provide any status to the + transaction and will be deprecated. The result structure defined as + ``dmaengine_result`` that is passed in to callback_result + has two fields: + + - result: This provides the transfer result defined by + ``dmaengine_tx_result``. Either success or some error condition. + + - residue: Provides the residue bytes of the transfer for those that + support residue. + +- ``device_issue_pending`` + + - Takes the first transaction descriptor in the pending queue, + and starts the transfer. Whenever that transfer is done, it + should move to the next transaction in the list. + + - This function can be called in an interrupt context + +- ``device_tx_status`` + + - Should report the bytes left to go over on the given channel + + - Should only care about the transaction descriptor passed as + argument, not the currently active one on a given channel + + - The tx_state argument might be NULL + + - Should use dma_set_residue to report it + + - In the case of a cyclic transfer, it should only take into + account the total size of the cyclic buffer. + + - Should return DMA_OUT_OF_ORDER if the device does not support in order + completion and is completing the operation out of order. + + - This function can be called in an interrupt context. + +- device_config + + - Reconfigures the channel with the configuration given as argument + + - This command should NOT perform synchronously, or on any + currently queued transfers, but only on subsequent ones + + - In this case, the function will receive a ``dma_slave_config`` + structure pointer as an argument, that will detail which + configuration to use. + + - Even though that structure contains a direction field, this + field is deprecated in favor of the direction argument given to + the prep_* functions + + - This call is mandatory for slave operations only. This should NOT be + set or expected to be set for memcpy operations. + If a driver support both, it should use this call for slave + operations only and not for memcpy ones. + +- device_pause + + - Pauses a transfer on the channel + + - This command should operate synchronously on the channel, + pausing right away the work of the given channel + +- device_resume + + - Resumes a transfer on the channel + + - This command should operate synchronously on the channel, + resuming right away the work of the given channel + +- device_terminate_all + + - Aborts all the pending and ongoing transfers on the channel + + - For aborted transfers the complete callback should not be called + + - Can be called from atomic context or from within a complete + callback of a descriptor. Must not sleep. Drivers must be able + to handle this correctly. + + - Termination may be asynchronous. The driver does not have to + wait until the currently active transfer has completely stopped. + See device_synchronize. + +- device_synchronize + + - Must synchronize the termination of a channel to the current + context. + + - Must make sure that memory for previously submitted + descriptors is no longer accessed by the DMA controller. + + - Must make sure that all complete callbacks for previously + submitted descriptors have finished running and none are + scheduled to run. + + - May sleep. + + +Misc notes +========== + +(stuff that should be documented, but don't really know +where to put them) + +``dma_run_dependencies`` + +- Should be called at the end of an async TX transfer, and can be + ignored in the slave transfers case. + +- Makes sure that dependent operations are run before marking it + as complete. + +dma_cookie_t + +- it's a DMA transaction ID that will increment over time. + +- Not really relevant any more since the introduction of ``virt-dma`` + that abstracts it away. + +DMA_CTRL_ACK + +- If clear, the descriptor cannot be reused by provider until the + client acknowledges receipt, i.e. has a chance to establish any + dependency chains + +- This can be acked by invoking async_tx_ack() + +- If set, does not mean descriptor can be reused + +DMA_CTRL_REUSE + +- If set, the descriptor can be reused after being completed. It should + not be freed by provider if this flag is set. + +- The descriptor should be prepared for reuse by invoking + ``dmaengine_desc_set_reuse()`` which will set DMA_CTRL_REUSE. + +- ``dmaengine_desc_set_reuse()`` will succeed only when channel support + reusable descriptor as exhibited by capabilities + +- As a consequence, if a device driver wants to skip the + ``dma_map_sg()`` and ``dma_unmap_sg()`` in between 2 transfers, + because the DMA'd data wasn't used, it can resubmit the transfer right after + its completion. + +- Descriptor can be freed in few ways + + - Clearing DMA_CTRL_REUSE by invoking + ``dmaengine_desc_clear_reuse()`` and submitting for last txn + + - Explicitly invoking ``dmaengine_desc_free()``, this can succeed only + when DMA_CTRL_REUSE is already set + + - Terminating the channel + +- DMA_PREP_CMD + + - If set, the client driver tells DMA controller that passed data in DMA + API is command data. + + - Interpretation of command data is DMA controller specific. It can be + used for issuing commands to other peripherals/register reads/register + writes for which the descriptor should be in different format from + normal data descriptors. + +- DMA_PREP_REPEAT + + - If set, the transfer will be automatically repeated when it ends until a + new transfer is queued on the same channel with the DMA_PREP_LOAD_EOT flag. + If the next transfer to be queued on the channel does not have the + DMA_PREP_LOAD_EOT flag set, the current transfer will be repeated until the + client terminates all transfers. + + - This flag is only supported if the channel reports the DMA_REPEAT + capability. + +- DMA_PREP_LOAD_EOT + + - If set, the transfer will replace the transfer currently being executed at + the end of the transfer. + + - This is the default behaviour for non-repeated transfers, specifying + DMA_PREP_LOAD_EOT for non-repeated transfers will thus make no difference. + + - When using repeated transfers, DMA clients will usually need to set the + DMA_PREP_LOAD_EOT flag on all transfers, otherwise the channel will keep + repeating the last repeated transfer and ignore the new transfers being + queued. Failure to set DMA_PREP_LOAD_EOT will appear as if the channel was + stuck on the previous transfer. + + - This flag is only supported if the channel reports the DMA_LOAD_EOT + capability. + +General Design Notes +==================== + +Most of the DMAEngine drivers you'll see are based on a similar design +that handles the end of transfer interrupts in the handler, but defer +most work to a tasklet, including the start of a new transfer whenever +the previous transfer ended. + +This is a rather inefficient design though, because the inter-transfer +latency will be not only the interrupt latency, but also the +scheduling latency of the tasklet, which will leave the channel idle +in between, which will slow down the global transfer rate. + +You should avoid this kind of practice, and instead of electing a new +transfer in your tasklet, move that part to the interrupt handler in +order to have a shorter idle window (that we can't really avoid +anyway). + +Glossary +======== + +- Burst: A number of consecutive read or write operations that + can be queued to buffers before being flushed to memory. + +- Chunk: A contiguous collection of bursts + +- Transfer: A collection of chunks (be it contiguous or not) diff --git a/Documentation/driver-api/dmaengine/pxa_dma.rst b/Documentation/driver-api/dmaengine/pxa_dma.rst new file mode 100644 index 000000000..442ee691a --- /dev/null +++ b/Documentation/driver-api/dmaengine/pxa_dma.rst @@ -0,0 +1,190 @@ +============================== +PXA/MMP - DMA Slave controller +============================== + +Constraints +=========== + +a) Transfers hot queuing +A driver submitting a transfer and issuing it should be granted the transfer +is queued even on a running DMA channel. +This implies that the queuing doesn't wait for the previous transfer end, +and that the descriptor chaining is not only done in the irq/tasklet code +triggered by the end of the transfer. +A transfer which is submitted and issued on a phy doesn't wait for a phy to +stop and restart, but is submitted on a "running channel". The other +drivers, especially mmp_pdma waited for the phy to stop before relaunching +a new transfer. + +b) All transfers having asked for confirmation should be signaled +Any issued transfer with DMA_PREP_INTERRUPT should trigger a callback call. +This implies that even if an irq/tasklet is triggered by end of tx1, but +at the time of irq/dma tx2 is already finished, tx1->complete() and +tx2->complete() should be called. + +c) Channel running state +A driver should be able to query if a channel is running or not. For the +multimedia case, such as video capture, if a transfer is submitted and then +a check of the DMA channel reports a "stopped channel", the transfer should +not be issued until the next "start of frame interrupt", hence the need to +know if a channel is in running or stopped state. + +d) Bandwidth guarantee +The PXA architecture has 4 levels of DMAs priorities : high, normal, low. +The high priorities get twice as much bandwidth as the normal, which get twice +as much as the low priorities. +A driver should be able to request a priority, especially the real-time +ones such as pxa_camera with (big) throughputs. + +Design +====== +a) Virtual channels +Same concept as in sa11x0 driver, ie. a driver was assigned a "virtual +channel" linked to the requestor line, and the physical DMA channel is +assigned on the fly when the transfer is issued. + +b) Transfer anatomy for a scatter-gather transfer + +:: + + +------------+-----+---------------+----------------+-----------------+ + | desc-sg[0] | ... | desc-sg[last] | status updater | finisher/linker | + +------------+-----+---------------+----------------+-----------------+ + +This structure is pointed by dma->sg_cpu. +The descriptors are used as follows : + + - desc-sg[i]: i-th descriptor, transferring the i-th sg + element to the video buffer scatter gather + + - status updater + Transfers a single u32 to a well known dma coherent memory to leave + a trace that this transfer is done. The "well known" is unique per + physical channel, meaning that a read of this value will tell which + is the last finished transfer at that point in time. + + - finisher: has ddadr=DADDR_STOP, dcmd=ENDIRQEN + + - linker: has ddadr= desc-sg[0] of next transfer, dcmd=0 + +c) Transfers hot-chaining +Suppose the running chain is: + +:: + + Buffer 1 Buffer 2 + +---------+----+---+ +----+----+----+---+ + | d0 | .. | dN | l | | d0 | .. | dN | f | + +---------+----+-|-+ ^----+----+----+---+ + | | + +----+ + +After a call to dmaengine_submit(b3), the chain will look like: + +:: + + Buffer 1 Buffer 2 Buffer 3 + +---------+----+---+ +----+----+----+---+ +----+----+----+---+ + | d0 | .. | dN | l | | d0 | .. | dN | l | | d0 | .. | dN | f | + +---------+----+-|-+ ^----+----+----+-|-+ ^----+----+----+---+ + | | | | + +----+ +----+ + new_link + +If while new_link was created the DMA channel stopped, it is _not_ +restarted. Hot-chaining doesn't break the assumption that +dma_async_issue_pending() is to be used to ensure the transfer is actually started. + +One exception to this rule : + +- if Buffer1 and Buffer2 had all their addresses 8 bytes aligned + +- and if Buffer3 has at least one address not 4 bytes aligned + +- then hot-chaining cannot happen, as the channel must be stopped, the + "align bit" must be set, and the channel restarted As a consequence, + such a transfer tx_submit() will be queued on the submitted queue, and + this specific case if the DMA is already running in aligned mode. + +d) Transfers completion updater +Each time a transfer is completed on a channel, an interrupt might be +generated or not, up to the client's request. But in each case, the last +descriptor of a transfer, the "status updater", will write the latest +transfer being completed into the physical channel's completion mark. + +This will speed up residue calculation, for large transfers such as video +buffers which hold around 6k descriptors or more. This also allows without +any lock to find out what is the latest completed transfer in a running +DMA chain. + +e) Transfers completion, irq and tasklet +When a transfer flagged as "DMA_PREP_INTERRUPT" is finished, the dma irq +is raised. Upon this interrupt, a tasklet is scheduled for the physical +channel. + +The tasklet is responsible for : + +- reading the physical channel last updater mark + +- calling all the transfer callbacks of finished transfers, based on + that mark, and each transfer flags. + +If a transfer is completed while this handling is done, a dma irq will +be raised, and the tasklet will be scheduled once again, having a new +updater mark. + +f) Residue +Residue granularity will be descriptor based. The issued but not completed +transfers will be scanned for all of their descriptors against the +currently running descriptor. + +g) Most complicated case of driver's tx queues +The most tricky situation is when : + + - there are not "acked" transfers (tx0) + + - a driver submitted an aligned tx1, not chained + + - a driver submitted an aligned tx2 => tx2 is cold chained to tx1 + + - a driver issued tx1+tx2 => channel is running in aligned mode + + - a driver submitted an aligned tx3 => tx3 is hot-chained + + - a driver submitted an unaligned tx4 => tx4 is put in submitted queue, + not chained + + - a driver issued tx4 => tx4 is put in issued queue, not chained + + - a driver submitted an aligned tx5 => tx5 is put in submitted queue, not + chained + + - a driver submitted an aligned tx6 => tx6 is put in submitted queue, + cold chained to tx5 + + This translates into (after tx4 is issued) : + + - issued queue + + :: + + +-----+ +-----+ +-----+ +-----+ + | tx1 | | tx2 | | tx3 | | tx4 | + +---|-+ ^---|-+ ^-----+ +-----+ + | | | | + +---+ +---+ + - submitted queue + +-----+ +-----+ + | tx5 | | tx6 | + +---|-+ ^-----+ + | | + +---+ + +- completed queue : empty + +- allocated queue : tx0 + +It should be noted that after tx3 is completed, the channel is stopped, and +restarted in "unaligned mode" to handle tx4. + +Author: Robert Jarzmik diff --git a/Documentation/driver-api/driver-model/binding.rst b/Documentation/driver-api/driver-model/binding.rst new file mode 100644 index 000000000..7ea1d7a41 --- /dev/null +++ b/Documentation/driver-api/driver-model/binding.rst @@ -0,0 +1,98 @@ +============== +Driver Binding +============== + +Driver binding is the process of associating a device with a device +driver that can control it. Bus drivers have typically handled this +because there have been bus-specific structures to represent the +devices and the drivers. With generic device and device driver +structures, most of the binding can take place using common code. + + +Bus +~~~ + +The bus type structure contains a list of all devices that are on that bus +type in the system. When device_register is called for a device, it is +inserted into the end of this list. The bus object also contains a +list of all drivers of that bus type. When driver_register is called +for a driver, it is inserted at the end of this list. These are the +two events which trigger driver binding. + + +device_register +~~~~~~~~~~~~~~~ + +When a new device is added, the bus's list of drivers is iterated over +to find one that supports it. In order to determine that, the device +ID of the device must match one of the device IDs that the driver +supports. The format and semantics for comparing IDs is bus-specific. +Instead of trying to derive a complex state machine and matching +algorithm, it is up to the bus driver to provide a callback to compare +a device against the IDs of a driver. The bus returns 1 if a match was +found; 0 otherwise. + +int match(struct device * dev, struct device_driver * drv); + +If a match is found, the device's driver field is set to the driver +and the driver's probe callback is called. This gives the driver a +chance to verify that it really does support the hardware, and that +it's in a working state. + +Device Class +~~~~~~~~~~~~ + +Upon the successful completion of probe, the device is registered with +the class to which it belongs. Device drivers belong to one and only one +class, and that is set in the driver's devclass field. +devclass_add_device is called to enumerate the device within the class +and actually register it with the class, which happens with the +class's register_dev callback. + + +Driver +~~~~~~ + +When a driver is attached to a device, the device is inserted into the +driver's list of devices. + + +sysfs +~~~~~ + +A symlink is created in the bus's 'devices' directory that points to +the device's directory in the physical hierarchy. + +A symlink is created in the driver's 'devices' directory that points +to the device's directory in the physical hierarchy. + +A directory for the device is created in the class's directory. A +symlink is created in that directory that points to the device's +physical location in the sysfs tree. + +A symlink can be created (though this isn't done yet) in the device's +physical directory to either its class directory, or the class's +top-level directory. One can also be created to point to its driver's +directory also. + + +driver_register +~~~~~~~~~~~~~~~ + +The process is almost identical for when a new driver is added. +The bus's list of devices is iterated over to find a match. Devices +that already have a driver are skipped. All the devices are iterated +over, to bind as many devices as possible to the driver. + + +Removal +~~~~~~~ + +When a device is removed, the reference count for it will eventually +go to 0. When it does, the remove callback of the driver is called. It +is removed from the driver's list of devices and the reference count +of the driver is decremented. All symlinks between the two are removed. + +When a driver is removed, the list of devices that it supports is +iterated over, and the driver's remove callback is called for each +one. The device is removed from that list and the symlinks removed. diff --git a/Documentation/driver-api/driver-model/bus.rst b/Documentation/driver-api/driver-model/bus.rst new file mode 100644 index 000000000..9709ab62a --- /dev/null +++ b/Documentation/driver-api/driver-model/bus.rst @@ -0,0 +1,146 @@ +========= +Bus Types +========= + +Definition +~~~~~~~~~~ +See the kerneldoc for the struct bus_type. + +int bus_register(struct bus_type * bus); + + +Declaration +~~~~~~~~~~~ + +Each bus type in the kernel (PCI, USB, etc) should declare one static +object of this type. They must initialize the name field, and may +optionally initialize the match callback:: + + struct bus_type pci_bus_type = { + .name = "pci", + .match = pci_bus_match, + }; + +The structure should be exported to drivers in a header file: + +extern struct bus_type pci_bus_type; + + +Registration +~~~~~~~~~~~~ + +When a bus driver is initialized, it calls bus_register. This +initializes the rest of the fields in the bus object and inserts it +into a global list of bus types. Once the bus object is registered, +the fields in it are usable by the bus driver. + + +Callbacks +~~~~~~~~~ + +match(): Attaching Drivers to Devices +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The format of device ID structures and the semantics for comparing +them are inherently bus-specific. Drivers typically declare an array +of device IDs of devices they support that reside in a bus-specific +driver structure. + +The purpose of the match callback is to give the bus an opportunity to +determine if a particular driver supports a particular device by +comparing the device IDs the driver supports with the device ID of a +particular device, without sacrificing bus-specific functionality or +type-safety. + +When a driver is registered with the bus, the bus's list of devices is +iterated over, and the match callback is called for each device that +does not have a driver associated with it. + + + +Device and Driver Lists +~~~~~~~~~~~~~~~~~~~~~~~ + +The lists of devices and drivers are intended to replace the local +lists that many buses keep. They are lists of struct devices and +struct device_drivers, respectively. Bus drivers are free to use the +lists as they please, but conversion to the bus-specific type may be +necessary. + +The LDM core provides helper functions for iterating over each list:: + + int bus_for_each_dev(struct bus_type * bus, struct device * start, + void * data, + int (*fn)(struct device *, void *)); + + int bus_for_each_drv(struct bus_type * bus, struct device_driver * start, + void * data, int (*fn)(struct device_driver *, void *)); + +These helpers iterate over the respective list, and call the callback +for each device or driver in the list. All list accesses are +synchronized by taking the bus's lock (read currently). The reference +count on each object in the list is incremented before the callback is +called; it is decremented after the next object has been obtained. The +lock is not held when calling the callback. + + +sysfs +~~~~~~~~ +There is a top-level directory named 'bus'. + +Each bus gets a directory in the bus directory, along with two default +directories:: + + /sys/bus/pci/ + |-- devices + `-- drivers + +Drivers registered with the bus get a directory in the bus's drivers +directory:: + + /sys/bus/pci/ + |-- devices + `-- drivers + |-- Intel ICH + |-- Intel ICH Joystick + |-- agpgart + `-- e100 + +Each device that is discovered on a bus of that type gets a symlink in +the bus's devices directory to the device's directory in the physical +hierarchy:: + + /sys/bus/pci/ + |-- devices + | |-- 00:00.0 -> ../../../root/pci0/00:00.0 + | |-- 00:01.0 -> ../../../root/pci0/00:01.0 + | `-- 00:02.0 -> ../../../root/pci0/00:02.0 + `-- drivers + + +Exporting Attributes +~~~~~~~~~~~~~~~~~~~~ + +:: + + struct bus_attribute { + struct attribute attr; + ssize_t (*show)(const struct bus_type *, char * buf); + ssize_t (*store)(const struct bus_type *, const char * buf, size_t count); + }; + +Bus drivers can export attributes using the BUS_ATTR_RW macro that works +similarly to the DEVICE_ATTR_RW macro for devices. For example, a +definition like this:: + + static BUS_ATTR_RW(debug); + +is equivalent to declaring:: + + static bus_attribute bus_attr_debug; + +This can then be used to add and remove the attribute from the bus's +sysfs directory using:: + + int bus_create_file(struct bus_type *, struct bus_attribute *); + void bus_remove_file(struct bus_type *, struct bus_attribute *); diff --git a/Documentation/driver-api/driver-model/design-patterns.rst b/Documentation/driver-api/driver-model/design-patterns.rst new file mode 100644 index 000000000..41eb8f41f --- /dev/null +++ b/Documentation/driver-api/driver-model/design-patterns.rst @@ -0,0 +1,116 @@ +============================= +Device Driver Design Patterns +============================= + +This document describes a few common design patterns found in device drivers. +It is likely that subsystem maintainers will ask driver developers to +conform to these design patterns. + +1. State Container +2. container_of() + + +1. State Container +~~~~~~~~~~~~~~~~~~ + +While the kernel contains a few device drivers that assume that they will +only be probed() once on a certain system (singletons), it is custom to assume +that the device the driver binds to will appear in several instances. This +means that the probe() function and all callbacks need to be reentrant. + +The most common way to achieve this is to use the state container design +pattern. It usually has this form:: + + struct foo { + spinlock_t lock; /* Example member */ + (...) + }; + + static int foo_probe(...) + { + struct foo *foo; + + foo = devm_kzalloc(dev, sizeof(*foo), GFP_KERNEL); + if (!foo) + return -ENOMEM; + spin_lock_init(&foo->lock); + (...) + } + +This will create an instance of struct foo in memory every time probe() is +called. This is our state container for this instance of the device driver. +Of course it is then necessary to always pass this instance of the +state around to all functions that need access to the state and its members. + +For example, if the driver is registering an interrupt handler, you would +pass around a pointer to struct foo like this:: + + static irqreturn_t foo_handler(int irq, void *arg) + { + struct foo *foo = arg; + (...) + } + + static int foo_probe(...) + { + struct foo *foo; + + (...) + ret = request_irq(irq, foo_handler, 0, "foo", foo); + } + +This way you always get a pointer back to the correct instance of foo in +your interrupt handler. + + +2. container_of() +~~~~~~~~~~~~~~~~~ + +Continuing on the above example we add an offloaded work:: + + struct foo { + spinlock_t lock; + struct workqueue_struct *wq; + struct work_struct offload; + (...) + }; + + static void foo_work(struct work_struct *work) + { + struct foo *foo = container_of(work, struct foo, offload); + + (...) + } + + static irqreturn_t foo_handler(int irq, void *arg) + { + struct foo *foo = arg; + + queue_work(foo->wq, &foo->offload); + (...) + } + + static int foo_probe(...) + { + struct foo *foo; + + foo->wq = create_singlethread_workqueue("foo-wq"); + INIT_WORK(&foo->offload, foo_work); + (...) + } + +The design pattern is the same for an hrtimer or something similar that will +return a single argument which is a pointer to a struct member in the +callback. + +container_of() is a macro defined in + +What container_of() does is to obtain a pointer to the containing struct from +a pointer to a member by a simple subtraction using the offsetof() macro from +standard C, which allows something similar to object oriented behaviours. +Notice that the contained member must not be a pointer, but an actual member +for this to work. + +We can see here that we avoid having global pointers to our struct foo * +instance this way, while still keeping the number of parameters passed to the +work function to a single pointer. diff --git a/Documentation/driver-api/driver-model/device.rst b/Documentation/driver-api/driver-model/device.rst new file mode 100644 index 000000000..0833be568 --- /dev/null +++ b/Documentation/driver-api/driver-model/device.rst @@ -0,0 +1,120 @@ +========================== +The Basic Device Structure +========================== + +See the kerneldoc for the struct device. + + +Programming Interface +~~~~~~~~~~~~~~~~~~~~~ +The bus driver that discovers the device uses this to register the +device with the core:: + + int device_register(struct device * dev); + +The bus should initialize the following fields: + + - parent + - name + - bus_id + - bus + +A device is removed from the core when its reference count goes to +0. The reference count can be adjusted using:: + + struct device * get_device(struct device * dev); + void put_device(struct device * dev); + +get_device() will return a pointer to the struct device passed to it +if the reference is not already 0 (if it's in the process of being +removed already). + +A driver can access the lock in the device structure using:: + + void lock_device(struct device * dev); + void unlock_device(struct device * dev); + + +Attributes +~~~~~~~~~~ + +:: + + struct device_attribute { + struct attribute attr; + ssize_t (*show)(struct device *dev, struct device_attribute *attr, + char *buf); + ssize_t (*store)(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count); + }; + +Attributes of devices can be exported by a device driver through sysfs. + +Please see Documentation/filesystems/sysfs.rst for more information +on how sysfs works. + +As explained in Documentation/core-api/kobject.rst, device attributes must be +created before the KOBJ_ADD uevent is generated. The only way to realize +that is by defining an attribute group. + +Attributes are declared using a macro called DEVICE_ATTR:: + + #define DEVICE_ATTR(name,mode,show,store) + +Example::: + + static DEVICE_ATTR(type, 0444, type_show, NULL); + static DEVICE_ATTR(power, 0644, power_show, power_store); + +Helper macros are available for common values of mode, so the above examples +can be simplified to::: + + static DEVICE_ATTR_RO(type); + static DEVICE_ATTR_RW(power); + +This declares two structures of type struct device_attribute with respective +names 'dev_attr_type' and 'dev_attr_power'. These two attributes can be +organized as follows into a group:: + + static struct attribute *dev_attrs[] = { + &dev_attr_type.attr, + &dev_attr_power.attr, + NULL, + }; + + static struct attribute_group dev_group = { + .attrs = dev_attrs, + }; + + static const struct attribute_group *dev_groups[] = { + &dev_group, + NULL, + }; + +A helper macro is available for the common case of a single group, so the +above two structures can be declared using::: + + ATTRIBUTE_GROUPS(dev); + +This array of groups can then be associated with a device by setting the +group pointer in struct device before device_register() is invoked:: + + dev->groups = dev_groups; + device_register(dev); + +The device_register() function will use the 'groups' pointer to create the +device attributes and the device_unregister() function will use this pointer +to remove the device attributes. + +Word of warning: While the kernel allows device_create_file() and +device_remove_file() to be called on a device at any time, userspace has +strict expectations on when attributes get created. When a new device is +registered in the kernel, a uevent is generated to notify userspace (like +udev) that a new device is available. If attributes are added after the +device is registered, then userspace won't get notified and userspace will +not know about the new attributes. + +This is important for device driver that need to publish additional +attributes for a device at driver probe time. If the device driver simply +calls device_create_file() on the device structure passed to it, then +userspace will never be notified of the new attributes. diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst new file mode 100644 index 000000000..8be086b3f --- /dev/null +++ b/Documentation/driver-api/driver-model/devres.rst @@ -0,0 +1,462 @@ +================================ +Devres - Managed Device Resource +================================ + +Tejun Heo + +First draft 10 January 2007 + +.. contents + + 1. Intro : Huh? Devres? + 2. Devres : Devres in a nutshell + 3. Devres Group : Group devres'es and release them together + 4. Details : Life time rules, calling context, ... + 5. Overhead : How much do we have to pay for this? + 6. List of managed interfaces: Currently implemented managed interfaces + + +1. Intro +-------- + +devres came up while trying to convert libata to use iomap. Each +iomapped address should be kept and unmapped on driver detach. For +example, a plain SFF ATA controller (that is, good old PCI IDE) in +native mode makes use of 5 PCI BARs and all of them should be +maintained. + +As with many other device drivers, libata low level drivers have +sufficient bugs in ->remove and ->probe failure path. Well, yes, +that's probably because libata low level driver developers are lazy +bunch, but aren't all low level driver developers? After spending a +day fiddling with braindamaged hardware with no document or +braindamaged document, if it's finally working, well, it's working. + +For one reason or another, low level drivers don't receive as much +attention or testing as core code, and bugs on driver detach or +initialization failure don't happen often enough to be noticeable. +Init failure path is worse because it's much less travelled while +needs to handle multiple entry points. + +So, many low level drivers end up leaking resources on driver detach +and having half broken failure path implementation in ->probe() which +would leak resources or even cause oops when failure occurs. iomap +adds more to this mix. So do msi and msix. + + +2. Devres +--------- + +devres is basically linked list of arbitrarily sized memory areas +associated with a struct device. Each devres entry is associated with +a release function. A devres can be released in several ways. No +matter what, all devres entries are released on driver detach. On +release, the associated release function is invoked and then the +devres entry is freed. + +Managed interface is created for resources commonly used by device +drivers using devres. For example, coherent DMA memory is acquired +using dma_alloc_coherent(). The managed version is called +dmam_alloc_coherent(). It is identical to dma_alloc_coherent() except +for the DMA memory allocated using it is managed and will be +automatically released on driver detach. Implementation looks like +the following:: + + struct dma_devres { + size_t size; + void *vaddr; + dma_addr_t dma_handle; + }; + + static void dmam_coherent_release(struct device *dev, void *res) + { + struct dma_devres *this = res; + + dma_free_coherent(dev, this->size, this->vaddr, this->dma_handle); + } + + dmam_alloc_coherent(dev, size, dma_handle, gfp) + { + struct dma_devres *dr; + void *vaddr; + + dr = devres_alloc(dmam_coherent_release, sizeof(*dr), gfp); + ... + + /* alloc DMA memory as usual */ + vaddr = dma_alloc_coherent(...); + ... + + /* record size, vaddr, dma_handle in dr */ + dr->vaddr = vaddr; + ... + + devres_add(dev, dr); + + return vaddr; + } + +If a driver uses dmam_alloc_coherent(), the area is guaranteed to be +freed whether initialization fails half-way or the device gets +detached. If most resources are acquired using managed interface, a +driver can have much simpler init and exit code. Init path basically +looks like the following:: + + my_init_one() + { + struct mydev *d; + + d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL); + if (!d) + return -ENOMEM; + + d->ring = dmam_alloc_coherent(...); + if (!d->ring) + return -ENOMEM; + + if (check something) + return -EINVAL; + ... + + return register_to_upper_layer(d); + } + +And exit path:: + + my_remove_one() + { + unregister_from_upper_layer(d); + shutdown_my_hardware(); + } + +As shown above, low level drivers can be simplified a lot by using +devres. Complexity is shifted from less maintained low level drivers +to better maintained higher layer. Also, as init failure path is +shared with exit path, both can get more testing. + +Note though that when converting current calls or assignments to +managed devm_* versions it is up to you to check if internal operations +like allocating memory, have failed. Managed resources pertains to the +freeing of these resources *only* - all other checks needed are still +on you. In some cases this may mean introducing checks that were not +necessary before moving to the managed devm_* calls. + + +3. Devres group +--------------- + +Devres entries can be grouped using devres group. When a group is +released, all contained normal devres entries and properly nested +groups are released. One usage is to rollback series of acquired +resources on failure. For example:: + + if (!devres_open_group(dev, NULL, GFP_KERNEL)) + return -ENOMEM; + + acquire A; + if (failed) + goto err; + + acquire B; + if (failed) + goto err; + ... + + devres_remove_group(dev, NULL); + return 0; + + err: + devres_release_group(dev, NULL); + return err_code; + +As resource acquisition failure usually means probe failure, constructs +like above are usually useful in midlayer driver (e.g. libata core +layer) where interface function shouldn't have side effect on failure. +For LLDs, just returning error code suffices in most cases. + +Each group is identified by `void *id`. It can either be explicitly +specified by @id argument to devres_open_group() or automatically +created by passing NULL as @id as in the above example. In both +cases, devres_open_group() returns the group's id. The returned id +can be passed to other devres functions to select the target group. +If NULL is given to those functions, the latest open group is +selected. + +For example, you can do something like the following:: + + int my_midlayer_create_something() + { + if (!devres_open_group(dev, my_midlayer_create_something, GFP_KERNEL)) + return -ENOMEM; + + ... + + devres_close_group(dev, my_midlayer_create_something); + return 0; + } + + void my_midlayer_destroy_something() + { + devres_release_group(dev, my_midlayer_create_something); + } + + +4. Details +---------- + +Lifetime of a devres entry begins on devres allocation and finishes +when it is released or destroyed (removed and freed) - no reference +counting. + +devres core guarantees atomicity to all basic devres operations and +has support for single-instance devres types (atomic +lookup-and-add-if-not-found). Other than that, synchronizing +concurrent accesses to allocated devres data is caller's +responsibility. This is usually non-issue because bus ops and +resource allocations already do the job. + +For an example of single-instance devres type, read pcim_iomap_table() +in lib/devres.c. + +All devres interface functions can be called without context if the +right gfp mask is given. + + +5. Overhead +----------- + +Each devres bookkeeping info is allocated together with requested data +area. With debug option turned off, bookkeeping info occupies 16 +bytes on 32bit machines and 24 bytes on 64bit (three pointers rounded +up to ull alignment). If singly linked list is used, it can be +reduced to two pointers (8 bytes on 32bit, 16 bytes on 64bit). + +Each devres group occupies 8 pointers. It can be reduced to 6 if +singly linked list is used. + +Memory space overhead on ahci controller with two ports is between 300 +and 400 bytes on 32bit machine after naive conversion (we can +certainly invest a bit more effort into libata core layer). + + +6. List of managed interfaces +----------------------------- + +CLOCK + devm_clk_get() + devm_clk_get_optional() + devm_clk_put() + devm_clk_bulk_get() + devm_clk_bulk_get_all() + devm_clk_bulk_get_optional() + devm_get_clk_from_child() + devm_clk_hw_register() + devm_of_clk_add_hw_provider() + devm_clk_hw_register_clkdev() + +DMA + dmaenginem_async_device_register() + dmam_alloc_coherent() + dmam_alloc_attrs() + dmam_free_coherent() + dmam_pool_create() + dmam_pool_destroy() + +DRM + devm_drm_dev_alloc() + +GPIO + devm_gpiod_get() + devm_gpiod_get_array() + devm_gpiod_get_array_optional() + devm_gpiod_get_index() + devm_gpiod_get_index_optional() + devm_gpiod_get_optional() + devm_gpiod_put() + devm_gpiod_unhinge() + devm_gpiochip_add_data() + devm_gpio_request() + devm_gpio_request_one() + +I2C + devm_i2c_add_adapter() + devm_i2c_new_dummy_device() + +IIO + devm_iio_device_alloc() + devm_iio_device_register() + devm_iio_dmaengine_buffer_setup() + devm_iio_kfifo_buffer_setup() + devm_iio_kfifo_buffer_setup_ext() + devm_iio_map_array_register() + devm_iio_triggered_buffer_setup() + devm_iio_triggered_buffer_setup_ext() + devm_iio_trigger_alloc() + devm_iio_trigger_register() + devm_iio_channel_get() + devm_iio_channel_get_all() + devm_iio_hw_consumer_alloc() + devm_fwnode_iio_channel_get_by_name() + +INPUT + devm_input_allocate_device() + +IO region + devm_release_mem_region() + devm_release_region() + devm_release_resource() + devm_request_mem_region() + devm_request_free_mem_region() + devm_request_region() + devm_request_resource() + +IOMAP + devm_ioport_map() + devm_ioport_unmap() + devm_ioremap() + devm_ioremap_uc() + devm_ioremap_wc() + devm_ioremap_resource() : checks resource, requests memory region, ioremaps + devm_ioremap_resource_wc() + devm_platform_ioremap_resource() : calls devm_ioremap_resource() for platform device + devm_platform_ioremap_resource_byname() + devm_platform_get_and_ioremap_resource() + devm_iounmap() + pcim_iomap() + pcim_iomap_regions() : do request_region() and iomap() on multiple BARs + pcim_iomap_table() : array of mapped addresses indexed by BAR + pcim_iounmap() + +IRQ + devm_free_irq() + devm_request_any_context_irq() + devm_request_irq() + devm_request_threaded_irq() + devm_irq_alloc_descs() + devm_irq_alloc_desc() + devm_irq_alloc_desc_at() + devm_irq_alloc_desc_from() + devm_irq_alloc_descs_from() + devm_irq_alloc_generic_chip() + devm_irq_setup_generic_chip() + devm_irq_domain_create_sim() + +LED + devm_led_classdev_register() + devm_led_classdev_register_ext() + devm_led_classdev_unregister() + devm_led_trigger_register() + devm_of_led_get() + +MDIO + devm_mdiobus_alloc() + devm_mdiobus_alloc_size() + devm_mdiobus_register() + devm_of_mdiobus_register() + +MEM + devm_free_pages() + devm_get_free_pages() + devm_kasprintf() + devm_kcalloc() + devm_kfree() + devm_kmalloc() + devm_kmalloc_array() + devm_kmemdup() + devm_krealloc() + devm_krealloc_array() + devm_kstrdup() + devm_kstrdup_const() + devm_kvasprintf() + devm_kzalloc() + +MFD + devm_mfd_add_devices() + +MUX + devm_mux_chip_alloc() + devm_mux_chip_register() + devm_mux_control_get() + devm_mux_state_get() + +NET + devm_alloc_etherdev() + devm_alloc_etherdev_mqs() + devm_register_netdev() + +PER-CPU MEM + devm_alloc_percpu() + devm_free_percpu() + +PCI + devm_pci_alloc_host_bridge() : managed PCI host bridge allocation + devm_pci_remap_cfgspace() : ioremap PCI configuration space + devm_pci_remap_cfg_resource() : ioremap PCI configuration space resource + pcim_enable_device() : after success, all PCI ops become managed + pcim_pin_device() : keep PCI device enabled after release + +PHY + devm_usb_get_phy() + devm_usb_get_phy_by_node() + devm_usb_get_phy_by_phandle() + devm_usb_put_phy() + +PINCTRL + devm_pinctrl_get() + devm_pinctrl_put() + devm_pinctrl_get_select() + devm_pinctrl_register() + devm_pinctrl_register_and_init() + devm_pinctrl_unregister() + +POWER + devm_reboot_mode_register() + devm_reboot_mode_unregister() + +PWM + devm_pwmchip_add() + devm_pwm_get() + devm_fwnode_pwm_get() + +REGULATOR + devm_regulator_bulk_register_supply_alias() + devm_regulator_bulk_get() + devm_regulator_bulk_get_const() + devm_regulator_bulk_get_enable() + devm_regulator_bulk_put() + devm_regulator_get() + devm_regulator_get_enable() + devm_regulator_get_enable_optional() + devm_regulator_get_exclusive() + devm_regulator_get_optional() + devm_regulator_irq_helper() + devm_regulator_put() + devm_regulator_register() + devm_regulator_register_notifier() + devm_regulator_register_supply_alias() + devm_regulator_unregister_notifier() + +RESET + devm_reset_control_get() + devm_reset_controller_register() + +RTC + devm_rtc_device_register() + devm_rtc_allocate_device() + devm_rtc_register_device() + devm_rtc_nvmem_register() + +SERDEV + devm_serdev_device_open() + +SLAVE DMA ENGINE + devm_acpi_dma_controller_register() + devm_acpi_dma_controller_free() + +SPI + devm_spi_alloc_master() + devm_spi_alloc_slave() + devm_spi_register_master() + +WATCHDOG + devm_watchdog_register_device() diff --git a/Documentation/driver-api/driver-model/driver.rst b/Documentation/driver-api/driver-model/driver.rst new file mode 100644 index 000000000..06f818b1d --- /dev/null +++ b/Documentation/driver-api/driver-model/driver.rst @@ -0,0 +1,286 @@ +============== +Device Drivers +============== + +See the kerneldoc for the struct device_driver. + +Allocation +~~~~~~~~~~ + +Device drivers are statically allocated structures. Though there may +be multiple devices in a system that a driver supports, struct +device_driver represents the driver as a whole (not a particular +device instance). + +Initialization +~~~~~~~~~~~~~~ + +The driver must initialize at least the name and bus fields. It should +also initialize the devclass field (when it arrives), so it may obtain +the proper linkage internally. It should also initialize as many of +the callbacks as possible, though each is optional. + +Declaration +~~~~~~~~~~~ + +As stated above, struct device_driver objects are statically +allocated. Below is an example declaration of the eepro100 +driver. This declaration is hypothetical only; it relies on the driver +being converted completely to the new model:: + + static struct device_driver eepro100_driver = { + .name = "eepro100", + .bus = &pci_bus_type, + + .probe = eepro100_probe, + .remove = eepro100_remove, + .suspend = eepro100_suspend, + .resume = eepro100_resume, + }; + +Most drivers will not be able to be converted completely to the new +model because the bus they belong to has a bus-specific structure with +bus-specific fields that cannot be generalized. + +The most common example of this are device ID structures. A driver +typically defines an array of device IDs that it supports. The format +of these structures and the semantics for comparing device IDs are +completely bus-specific. Defining them as bus-specific entities would +sacrifice type-safety, so we keep bus-specific structures around. + +Bus-specific drivers should include a generic struct device_driver in +the definition of the bus-specific driver. Like this:: + + struct pci_driver { + const struct pci_device_id *id_table; + struct device_driver driver; + }; + +A definition that included bus-specific fields would look like +(using the eepro100 driver again):: + + static struct pci_driver eepro100_driver = { + .id_table = eepro100_pci_tbl, + .driver = { + .name = "eepro100", + .bus = &pci_bus_type, + .probe = eepro100_probe, + .remove = eepro100_remove, + .suspend = eepro100_suspend, + .resume = eepro100_resume, + }, + }; + +Some may find the syntax of embedded struct initialization awkward or +even a bit ugly. So far, it's the best way we've found to do what we want... + +Registration +~~~~~~~~~~~~ + +:: + + int driver_register(struct device_driver *drv); + +The driver registers the structure on startup. For drivers that have +no bus-specific fields (i.e. don't have a bus-specific driver +structure), they would use driver_register and pass a pointer to their +struct device_driver object. + +Most drivers, however, will have a bus-specific structure and will +need to register with the bus using something like pci_driver_register. + +It is important that drivers register their driver structure as early as +possible. Registration with the core initializes several fields in the +struct device_driver object, including the reference count and the +lock. These fields are assumed to be valid at all times and may be +used by the device model core or the bus driver. + + +Transition Bus Drivers +~~~~~~~~~~~~~~~~~~~~~~ + +By defining wrapper functions, the transition to the new model can be +made easier. Drivers can ignore the generic structure altogether and +let the bus wrapper fill in the fields. For the callbacks, the bus can +define generic callbacks that forward the call to the bus-specific +callbacks of the drivers. + +This solution is intended to be only temporary. In order to get class +information in the driver, the drivers must be modified anyway. Since +converting drivers to the new model should reduce some infrastructural +complexity and code size, it is recommended that they are converted as +class information is added. + +Access +~~~~~~ + +Once the object has been registered, it may access the common fields of +the object, like the lock and the list of devices:: + + int driver_for_each_dev(struct device_driver *drv, void *data, + int (*callback)(struct device *dev, void *data)); + +The devices field is a list of all the devices that have been bound to +the driver. The LDM core provides a helper function to operate on all +the devices a driver controls. This helper locks the driver on each +node access, and does proper reference counting on each device as it +accesses it. + + +sysfs +~~~~~ + +When a driver is registered, a sysfs directory is created in its +bus's directory. In this directory, the driver can export an interface +to userspace to control operation of the driver on a global basis; +e.g. toggling debugging output in the driver. + +A future feature of this directory will be a 'devices' directory. This +directory will contain symlinks to the directories of devices it +supports. + + + +Callbacks +~~~~~~~~~ + +:: + + int (*probe) (struct device *dev); + +The probe() entry is called in task context, with the bus's rwsem locked +and the driver partially bound to the device. Drivers commonly use +container_of() to convert "dev" to a bus-specific type, both in probe() +and other routines. That type often provides device resource data, such +as pci_dev.resource[] or platform_device.resources, which is used in +addition to dev->platform_data to initialize the driver. + +This callback holds the driver-specific logic to bind the driver to a +given device. That includes verifying that the device is present, that +it's a version the driver can handle, that driver data structures can +be allocated and initialized, and that any hardware can be initialized. +Drivers often store a pointer to their state with dev_set_drvdata(). +When the driver has successfully bound itself to that device, then probe() +returns zero and the driver model code will finish its part of binding +the driver to that device. + +A driver's probe() may return a negative errno value to indicate that +the driver did not bind to this device, in which case it should have +released all resources it allocated. + +Optionally, probe() may return -EPROBE_DEFER if the driver depends on +resources that are not yet available (e.g., supplied by a driver that +hasn't initialized yet). The driver core will put the device onto the +deferred probe list and will try to call it again later. If a driver +must defer, it should return -EPROBE_DEFER as early as possible to +reduce the amount of time spent on setup work that will need to be +unwound and reexecuted at a later time. + +.. warning:: + -EPROBE_DEFER must not be returned if probe() has already created + child devices, even if those child devices are removed again + in a cleanup path. If -EPROBE_DEFER is returned after a child + device has been registered, it may result in an infinite loop of + .probe() calls to the same driver. + +:: + + void (*sync_state) (struct device *dev); + +sync_state is called only once for a device. It's called when all the consumer +devices of the device have successfully probed. The list of consumers of the +device is obtained by looking at the device links connecting that device to its +consumer devices. + +The first attempt to call sync_state() is made during late_initcall_sync() to +give firmware and drivers time to link devices to each other. During the first +attempt at calling sync_state(), if all the consumers of the device at that +point in time have already probed successfully, sync_state() is called right +away. If there are no consumers of the device during the first attempt, that +too is considered as "all consumers of the device have probed" and sync_state() +is called right away. + +If during the first attempt at calling sync_state() for a device, there are +still consumers that haven't probed successfully, the sync_state() call is +postponed and reattempted in the future only when one or more consumers of the +device probe successfully. If during the reattempt, the driver core finds that +there are one or more consumers of the device that haven't probed yet, then +sync_state() call is postponed again. + +A typical use case for sync_state() is to have the kernel cleanly take over +management of devices from the bootloader. For example, if a device is left on +and at a particular hardware configuration by the bootloader, the device's +driver might need to keep the device in the boot configuration until all the +consumers of the device have probed. Once all the consumers of the device have +probed, the device's driver can synchronize the hardware state of the device to +match the aggregated software state requested by all the consumers. Hence the +name sync_state(). + +While obvious examples of resources that can benefit from sync_state() include +resources such as regulator, sync_state() can also be useful for complex +resources like IOMMUs. For example, IOMMUs with multiple consumers (devices +whose addresses are remapped by the IOMMU) might need to keep their mappings +fixed at (or additive to) the boot configuration until all its consumers have +probed. + +While the typical use case for sync_state() is to have the kernel cleanly take +over management of devices from the bootloader, the usage of sync_state() is +not restricted to that. Use it whenever it makes sense to take an action after +all the consumers of a device have probed:: + + int (*remove) (struct device *dev); + +remove is called to unbind a driver from a device. This may be +called if a device is physically removed from the system, if the +driver module is being unloaded, during a reboot sequence, or +in other cases. + +It is up to the driver to determine if the device is present or +not. It should free any resources allocated specifically for the +device; i.e. anything in the device's driver_data field. + +If the device is still present, it should quiesce the device and place +it into a supported low-power state. + +:: + + int (*suspend) (struct device *dev, pm_message_t state); + +suspend is called to put the device in a low power state. + +:: + + int (*resume) (struct device *dev); + +Resume is used to bring a device back from a low power state. + + +Attributes +~~~~~~~~~~ + +:: + + struct driver_attribute { + struct attribute attr; + ssize_t (*show)(struct device_driver *driver, char *buf); + ssize_t (*store)(struct device_driver *, const char *buf, size_t count); + }; + +Device drivers can export attributes via their sysfs directories. +Drivers can declare attributes using a DRIVER_ATTR_RW and DRIVER_ATTR_RO +macro that works identically to the DEVICE_ATTR_RW and DEVICE_ATTR_RO +macros. + +Example:: + + DRIVER_ATTR_RW(debug); + +This is equivalent to declaring:: + + struct driver_attribute driver_attr_debug; + +This can then be used to add and remove the attribute from the +driver's directory using:: + + int driver_create_file(struct device_driver *, const struct driver_attribute *); + void driver_remove_file(struct device_driver *, const struct driver_attribute *); diff --git a/Documentation/driver-api/driver-model/index.rst b/Documentation/driver-api/driver-model/index.rst new file mode 100644 index 000000000..4831bdd92 --- /dev/null +++ b/Documentation/driver-api/driver-model/index.rst @@ -0,0 +1,23 @@ +============ +Driver Model +============ + +.. toctree:: + :maxdepth: 1 + + binding + bus + design-patterns + device + devres + driver + overview + platform + porting + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/driver-api/driver-model/overview.rst b/Documentation/driver-api/driver-model/overview.rst new file mode 100644 index 000000000..e98d0ab4a --- /dev/null +++ b/Documentation/driver-api/driver-model/overview.rst @@ -0,0 +1,124 @@ +============================= +The Linux Kernel Device Model +============================= + +Patrick Mochel + +Drafted 26 August 2002 +Updated 31 January 2006 + + +Overview +~~~~~~~~ + +The Linux Kernel Driver Model is a unification of all the disparate driver +models that were previously used in the kernel. It is intended to augment the +bus-specific drivers for bridges and devices by consolidating a set of data +and operations into globally accessible data structures. + +Traditional driver models implemented some sort of tree-like structure +(sometimes just a list) for the devices they control. There wasn't any +uniformity across the different bus types. + +The current driver model provides a common, uniform data model for describing +a bus and the devices that can appear under the bus. The unified bus +model includes a set of common attributes which all busses carry, and a set +of common callbacks, such as device discovery during bus probing, bus +shutdown, bus power management, etc. + +The common device and bridge interface reflects the goals of the modern +computer: namely the ability to do seamless device "plug and play", power +management, and hot plug. In particular, the model dictated by Intel and +Microsoft (namely ACPI) ensures that almost every device on almost any bus +on an x86-compatible system can work within this paradigm. Of course, +not every bus is able to support all such operations, although most +buses support most of those operations. + + +Downstream Access +~~~~~~~~~~~~~~~~~ + +Common data fields have been moved out of individual bus layers into a common +data structure. These fields must still be accessed by the bus layers, +and sometimes by the device-specific drivers. + +Other bus layers are encouraged to do what has been done for the PCI layer. +struct pci_dev now looks like this:: + + struct pci_dev { + ... + + struct device dev; /* Generic device interface */ + ... + }; + +Note first that the struct device dev within the struct pci_dev is +statically allocated. This means only one allocation on device discovery. + +Note also that that struct device dev is not necessarily defined at the +front of the pci_dev structure. This is to make people think about what +they're doing when switching between the bus driver and the global driver, +and to discourage meaningless and incorrect casts between the two. + +The PCI bus layer freely accesses the fields of struct device. It knows about +the structure of struct pci_dev, and it should know the structure of struct +device. Individual PCI device drivers that have been converted to the current +driver model generally do not and should not touch the fields of struct device, +unless there is a compelling reason to do so. + +The above abstraction prevents unnecessary pain during transitional phases. +If it were not done this way, then when a field was renamed or removed, every +downstream driver would break. On the other hand, if only the bus layer +(and not the device layer) accesses the struct device, it is only the bus +layer that needs to change. + + +User Interface +~~~~~~~~~~~~~~ + +By virtue of having a complete hierarchical view of all the devices in the +system, exporting a complete hierarchical view to userspace becomes relatively +easy. This has been accomplished by implementing a special purpose virtual +file system named sysfs. + +Almost all mainstream Linux distros mount this filesystem automatically; you +can see some variation of the following in the output of the "mount" command:: + + $ mount + ... + none on /sys type sysfs (rw,noexec,nosuid,nodev) + ... + $ + +The auto-mounting of sysfs is typically accomplished by an entry similar to +the following in the /etc/fstab file:: + + none /sys sysfs defaults 0 0 + +or something similar in the /lib/init/fstab file on Debian-based systems:: + + none /sys sysfs nodev,noexec,nosuid 0 0 + +If sysfs is not automatically mounted, you can always do it manually with:: + + # mount -t sysfs sysfs /sys + +Whenever a device is inserted into the tree, a directory is created for it. +This directory may be populated at each layer of discovery - the global layer, +the bus layer, or the device layer. + +The global layer currently creates two files - 'name' and 'power'. The +former only reports the name of the device. The latter reports the +current power state of the device. It will also be used to set the current +power state. + +The bus layer may also create files for the devices it finds while probing the +bus. For example, the PCI layer currently creates 'irq' and 'resource' files +for each PCI device. + +A device-specific driver may also export files in its directory to expose +device-specific data or tunable interfaces. + +More information about the sysfs directory layout can be found in +the other documents in this directory and in the file +Documentation/filesystems/sysfs.rst. diff --git a/Documentation/driver-api/driver-model/platform.rst b/Documentation/driver-api/driver-model/platform.rst new file mode 100644 index 000000000..1fe5c6c61 --- /dev/null +++ b/Documentation/driver-api/driver-model/platform.rst @@ -0,0 +1,246 @@ +============================ +Platform Devices and Drivers +============================ + +See for the driver model interface to the +platform bus: platform_device, and platform_driver. This pseudo-bus +is used to connect devices on busses with minimal infrastructure, +like those used to integrate peripherals on many system-on-chip +processors, or some "legacy" PC interconnects; as opposed to large +formally specified ones like PCI or USB. + + +Platform devices +~~~~~~~~~~~~~~~~ +Platform devices are devices that typically appear as autonomous +entities in the system. This includes legacy port-based devices and +host bridges to peripheral buses, and most controllers integrated +into system-on-chip platforms. What they usually have in common +is direct addressing from a CPU bus. Rarely, a platform_device will +be connected through a segment of some other kind of bus; but its +registers will still be directly addressable. + +Platform devices are given a name, used in driver binding, and a +list of resources such as addresses and IRQs:: + + struct platform_device { + const char *name; + u32 id; + struct device dev; + u32 num_resources; + struct resource *resource; + }; + + +Platform drivers +~~~~~~~~~~~~~~~~ +Platform drivers follow the standard driver model convention, where +discovery/enumeration is handled outside the drivers, and drivers +provide probe() and remove() methods. They support power management +and shutdown notifications using the standard conventions:: + + struct platform_driver { + int (*probe)(struct platform_device *); + int (*remove)(struct platform_device *); + void (*shutdown)(struct platform_device *); + int (*suspend)(struct platform_device *, pm_message_t state); + int (*suspend_late)(struct platform_device *, pm_message_t state); + int (*resume_early)(struct platform_device *); + int (*resume)(struct platform_device *); + struct device_driver driver; + }; + +Note that probe() should in general verify that the specified device hardware +actually exists; sometimes platform setup code can't be sure. The probing +can use device resources, including clocks, and device platform_data. + +Platform drivers register themselves the normal way:: + + int platform_driver_register(struct platform_driver *drv); + +Or, in common situations where the device is known not to be hot-pluggable, +the probe() routine can live in an init section to reduce the driver's +runtime memory footprint:: + + int platform_driver_probe(struct platform_driver *drv, + int (*probe)(struct platform_device *)) + +Kernel modules can be composed of several platform drivers. The platform core +provides helpers to register and unregister an array of drivers:: + + int __platform_register_drivers(struct platform_driver * const *drivers, + unsigned int count, struct module *owner); + void platform_unregister_drivers(struct platform_driver * const *drivers, + unsigned int count); + +If one of the drivers fails to register, all drivers registered up to that +point will be unregistered in reverse order. Note that there is a convenience +macro that passes THIS_MODULE as owner parameter:: + + #define platform_register_drivers(drivers, count) + + +Device Enumeration +~~~~~~~~~~~~~~~~~~ +As a rule, platform specific (and often board-specific) setup code will +register platform devices:: + + int platform_device_register(struct platform_device *pdev); + + int platform_add_devices(struct platform_device **pdevs, int ndev); + +The general rule is to register only those devices that actually exist, +but in some cases extra devices might be registered. For example, a kernel +might be configured to work with an external network adapter that might not +be populated on all boards, or likewise to work with an integrated controller +that some boards might not hook up to any peripherals. + +In some cases, boot firmware will export tables describing the devices +that are populated on a given board. Without such tables, often the +only way for system setup code to set up the correct devices is to build +a kernel for a specific target board. Such board-specific kernels are +common with embedded and custom systems development. + +In many cases, the memory and IRQ resources associated with the platform +device are not enough to let the device's driver work. Board setup code +will often provide additional information using the device's platform_data +field to hold additional information. + +Embedded systems frequently need one or more clocks for platform devices, +which are normally kept off until they're actively needed (to save power). +System setup also associates those clocks with the device, so that +calls to clk_get(&pdev->dev, clock_name) return them as needed. + + +Legacy Drivers: Device Probing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Some drivers are not fully converted to the driver model, because they take +on a non-driver role: the driver registers its platform device, rather than +leaving that for system infrastructure. Such drivers can't be hotplugged +or coldplugged, since those mechanisms require device creation to be in a +different system component than the driver. + +The only "good" reason for this is to handle older system designs which, like +original IBM PCs, rely on error-prone "probe-the-hardware" models for hardware +configuration. Newer systems have largely abandoned that model, in favor of +bus-level support for dynamic configuration (PCI, USB), or device tables +provided by the boot firmware (e.g. PNPACPI on x86). There are too many +conflicting options about what might be where, and even educated guesses by +an operating system will be wrong often enough to make trouble. + +This style of driver is discouraged. If you're updating such a driver, +please try to move the device enumeration to a more appropriate location, +outside the driver. This will usually be cleanup, since such drivers +tend to already have "normal" modes, such as ones using device nodes that +were created by PNP or by platform device setup. + +None the less, there are some APIs to support such legacy drivers. Avoid +using these calls except with such hotplug-deficient drivers:: + + struct platform_device *platform_device_alloc( + const char *name, int id); + +You can use platform_device_alloc() to dynamically allocate a device, which +you will then initialize with resources and platform_device_register(). +A better solution is usually:: + + struct platform_device *platform_device_register_simple( + const char *name, int id, + struct resource *res, unsigned int nres); + +You can use platform_device_register_simple() as a one-step call to allocate +and register a device. + + +Device Naming and Driver Binding +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The platform_device.dev.bus_id is the canonical name for the devices. +It's built from two components: + + * platform_device.name ... which is also used to for driver matching. + + * platform_device.id ... the device instance number, or else "-1" + to indicate there's only one. + +These are concatenated, so name/id "serial"/0 indicates bus_id "serial.0", and +"serial/3" indicates bus_id "serial.3"; both would use the platform_driver +named "serial". While "my_rtc"/-1 would be bus_id "my_rtc" (no instance id) +and use the platform_driver called "my_rtc". + +Driver binding is performed automatically by the driver core, invoking +driver probe() after finding a match between device and driver. If the +probe() succeeds, the driver and device are bound as usual. There are +three different ways to find such a match: + + - Whenever a device is registered, the drivers for that bus are + checked for matches. Platform devices should be registered very + early during system boot. + + - When a driver is registered using platform_driver_register(), all + unbound devices on that bus are checked for matches. Drivers + usually register later during booting, or by module loading. + + - Registering a driver using platform_driver_probe() works just like + using platform_driver_register(), except that the driver won't + be probed later if another device registers. (Which is OK, since + this interface is only for use with non-hotpluggable devices.) + + +Early Platform Devices and Drivers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The early platform interfaces provide platform data to platform device +drivers early on during the system boot. The code is built on top of the +early_param() command line parsing and can be executed very early on. + +Example: "earlyprintk" class early serial console in 6 steps + +1. Registering early platform device data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The architecture code registers platform device data using the function +early_platform_add_devices(). In the case of early serial console this +should be hardware configuration for the serial port. Devices registered +at this point will later on be matched against early platform drivers. + +2. Parsing kernel command line +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The architecture code calls parse_early_param() to parse the kernel +command line. This will execute all matching early_param() callbacks. +User specified early platform devices will be registered at this point. +For the early serial console case the user can specify port on the +kernel command line as "earlyprintk=serial.0" where "earlyprintk" is +the class string, "serial" is the name of the platform driver and +0 is the platform device id. If the id is -1 then the dot and the +id can be omitted. + +3. Installing early platform drivers belonging to a certain class +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The architecture code may optionally force registration of all early +platform drivers belonging to a certain class using the function +early_platform_driver_register_all(). User specified devices from +step 2 have priority over these. This step is omitted by the serial +driver example since the early serial driver code should be disabled +unless the user has specified port on the kernel command line. + +4. Early platform driver registration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Compiled-in platform drivers making use of early_platform_init() are +automatically registered during step 2 or 3. The serial driver example +should use early_platform_init("earlyprintk", &platform_driver). + +5. Probing of early platform drivers belonging to a certain class +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The architecture code calls early_platform_driver_probe() to match +registered early platform devices associated with a certain class with +registered early platform drivers. Matched devices will get probed(). +This step can be executed at any point during the early boot. As soon +as possible may be good for the serial port case. + +6. Inside the early platform driver probe() +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The driver code needs to take special care during early boot, especially +when it comes to memory allocation and interrupt registration. The code +in the probe() function can use is_early_platform_device() to check if +it is called at early platform device or at the regular platform device +time. The early serial driver performs register_console() at this point. + +For further information, see . diff --git a/Documentation/driver-api/driver-model/porting.rst b/Documentation/driver-api/driver-model/porting.rst new file mode 100644 index 000000000..931ea879a --- /dev/null +++ b/Documentation/driver-api/driver-model/porting.rst @@ -0,0 +1,448 @@ +======================================= +Porting Drivers to the New Driver Model +======================================= + +Patrick Mochel + +7 January 2003 + + +Overview + +Please refer to `Documentation/driver-api/driver-model/*.rst` for definitions of +various driver types and concepts. + +Most of the work of porting devices drivers to the new model happens +at the bus driver layer. This was intentional, to minimize the +negative effect on kernel drivers, and to allow a gradual transition +of bus drivers. + +In a nutshell, the driver model consists of a set of objects that can +be embedded in larger, bus-specific objects. Fields in these generic +objects can replace fields in the bus-specific objects. + +The generic objects must be registered with the driver model core. By +doing so, they will exported via the sysfs filesystem. sysfs can be +mounted by doing:: + + # mount -t sysfs sysfs /sys + + + +The Process + +Step 0: Read include/linux/device.h for object and function definitions. + +Step 1: Registering the bus driver. + + +- Define a struct bus_type for the bus driver:: + + struct bus_type pci_bus_type = { + .name = "pci", + }; + + +- Register the bus type. + + This should be done in the initialization function for the bus type, + which is usually the module_init(), or equivalent, function:: + + static int __init pci_driver_init(void) + { + return bus_register(&pci_bus_type); + } + + subsys_initcall(pci_driver_init); + + + The bus type may be unregistered (if the bus driver may be compiled + as a module) by doing:: + + bus_unregister(&pci_bus_type); + + +- Export the bus type for others to use. + + Other code may wish to reference the bus type, so declare it in a + shared header file and export the symbol. + +From include/linux/pci.h:: + + extern struct bus_type pci_bus_type; + + +From file the above code appears in:: + + EXPORT_SYMBOL(pci_bus_type); + + + +- This will cause the bus to show up in /sys/bus/pci/ with two + subdirectories: 'devices' and 'drivers':: + + # tree -d /sys/bus/pci/ + /sys/bus/pci/ + |-- devices + `-- drivers + + + +Step 2: Registering Devices. + +struct device represents a single device. It mainly contains metadata +describing the relationship the device has to other entities. + + +- Embed a struct device in the bus-specific device type:: + + + struct pci_dev { + ... + struct device dev; /* Generic device interface */ + ... + }; + + It is recommended that the generic device not be the first item in + the struct to discourage programmers from doing mindless casts + between the object types. Instead macros, or inline functions, + should be created to convert from the generic object type:: + + + #define to_pci_dev(n) container_of(n, struct pci_dev, dev) + + or + + static inline struct pci_dev * to_pci_dev(struct kobject * kobj) + { + return container_of(n, struct pci_dev, dev); + } + + This allows the compiler to verify type-safety of the operations + that are performed (which is Good). + + +- Initialize the device on registration. + + When devices are discovered or registered with the bus type, the + bus driver should initialize the generic device. The most important + things to initialize are the bus_id, parent, and bus fields. + + The bus_id is an ASCII string that contains the device's address on + the bus. The format of this string is bus-specific. This is + necessary for representing devices in sysfs. + + parent is the physical parent of the device. It is important that + the bus driver sets this field correctly. + + The driver model maintains an ordered list of devices that it uses + for power management. This list must be in order to guarantee that + devices are shutdown before their physical parents, and vice versa. + The order of this list is determined by the parent of registered + devices. + + Also, the location of the device's sysfs directory depends on a + device's parent. sysfs exports a directory structure that mirrors + the device hierarchy. Accurately setting the parent guarantees that + sysfs will accurately represent the hierarchy. + + The device's bus field is a pointer to the bus type the device + belongs to. This should be set to the bus_type that was declared + and initialized before. + + Optionally, the bus driver may set the device's name and release + fields. + + The name field is an ASCII string describing the device, like + + "ATI Technologies Inc Radeon QD" + + The release field is a callback that the driver model core calls + when the device has been removed, and all references to it have + been released. More on this in a moment. + + +- Register the device. + + Once the generic device has been initialized, it can be registered + with the driver model core by doing:: + + device_register(&dev->dev); + + It can later be unregistered by doing:: + + device_unregister(&dev->dev); + + This should happen on buses that support hotpluggable devices. + If a bus driver unregisters a device, it should not immediately free + it. It should instead wait for the driver model core to call the + device's release method, then free the bus-specific object. + (There may be other code that is currently referencing the device + structure, and it would be rude to free the device while that is + happening). + + + When the device is registered, a directory in sysfs is created. + The PCI tree in sysfs looks like:: + + /sys/devices/pci0/ + |-- 00:00.0 + |-- 00:01.0 + | `-- 01:00.0 + |-- 00:02.0 + | `-- 02:1f.0 + | `-- 03:00.0 + |-- 00:1e.0 + | `-- 04:04.0 + |-- 00:1f.0 + |-- 00:1f.1 + | |-- ide0 + | | |-- 0.0 + | | `-- 0.1 + | `-- ide1 + | `-- 1.0 + |-- 00:1f.2 + |-- 00:1f.3 + `-- 00:1f.5 + + Also, symlinks are created in the bus's 'devices' directory + that point to the device's directory in the physical hierarchy:: + + /sys/bus/pci/devices/ + |-- 00:00.0 -> ../../../devices/pci0/00:00.0 + |-- 00:01.0 -> ../../../devices/pci0/00:01.0 + |-- 00:02.0 -> ../../../devices/pci0/00:02.0 + |-- 00:1e.0 -> ../../../devices/pci0/00:1e.0 + |-- 00:1f.0 -> ../../../devices/pci0/00:1f.0 + |-- 00:1f.1 -> ../../../devices/pci0/00:1f.1 + |-- 00:1f.2 -> ../../../devices/pci0/00:1f.2 + |-- 00:1f.3 -> ../../../devices/pci0/00:1f.3 + |-- 00:1f.5 -> ../../../devices/pci0/00:1f.5 + |-- 01:00.0 -> ../../../devices/pci0/00:01.0/01:00.0 + |-- 02:1f.0 -> ../../../devices/pci0/00:02.0/02:1f.0 + |-- 03:00.0 -> ../../../devices/pci0/00:02.0/02:1f.0/03:00.0 + `-- 04:04.0 -> ../../../devices/pci0/00:1e.0/04:04.0 + + + +Step 3: Registering Drivers. + +struct device_driver is a simple driver structure that contains a set +of operations that the driver model core may call. + + +- Embed a struct device_driver in the bus-specific driver. + + Just like with devices, do something like:: + + struct pci_driver { + ... + struct device_driver driver; + }; + + +- Initialize the generic driver structure. + + When the driver registers with the bus (e.g. doing pci_register_driver()), + initialize the necessary fields of the driver: the name and bus + fields. + + +- Register the driver. + + After the generic driver has been initialized, call:: + + driver_register(&drv->driver); + + to register the driver with the core. + + When the driver is unregistered from the bus, unregister it from the + core by doing:: + + driver_unregister(&drv->driver); + + Note that this will block until all references to the driver have + gone away. Normally, there will not be any. + + +- Sysfs representation. + + Drivers are exported via sysfs in their bus's 'driver's directory. + For example:: + + /sys/bus/pci/drivers/ + |-- 3c59x + |-- Ensoniq AudioPCI + |-- agpgart-amdk7 + |-- e100 + `-- serial + + +Step 4: Define Generic Methods for Drivers. + +struct device_driver defines a set of operations that the driver model +core calls. Most of these operations are probably similar to +operations the bus already defines for drivers, but taking different +parameters. + +It would be difficult and tedious to force every driver on a bus to +simultaneously convert their drivers to generic format. Instead, the +bus driver should define single instances of the generic methods that +forward call to the bus-specific drivers. For instance:: + + + static int pci_device_remove(struct device * dev) + { + struct pci_dev * pci_dev = to_pci_dev(dev); + struct pci_driver * drv = pci_dev->driver; + + if (drv) { + if (drv->remove) + drv->remove(pci_dev); + pci_dev->driver = NULL; + } + return 0; + } + + +The generic driver should be initialized with these methods before it +is registered:: + + /* initialize common driver fields */ + drv->driver.name = drv->name; + drv->driver.bus = &pci_bus_type; + drv->driver.probe = pci_device_probe; + drv->driver.resume = pci_device_resume; + drv->driver.suspend = pci_device_suspend; + drv->driver.remove = pci_device_remove; + + /* register with core */ + driver_register(&drv->driver); + + +Ideally, the bus should only initialize the fields if they are not +already set. This allows the drivers to implement their own generic +methods. + + +Step 5: Support generic driver binding. + +The model assumes that a device or driver can be dynamically +registered with the bus at any time. When registration happens, +devices must be bound to a driver, or drivers must be bound to all +devices that it supports. + +A driver typically contains a list of device IDs that it supports. The +bus driver compares these IDs to the IDs of devices registered with it. +The format of the device IDs, and the semantics for comparing them are +bus-specific, so the generic model does attempt to generalize them. + +Instead, a bus may supply a method in struct bus_type that does the +comparison:: + + int (*match)(struct device * dev, struct device_driver * drv); + +match should return positive value if the driver supports the device, +and zero otherwise. It may also return error code (for example +-EPROBE_DEFER) if determining that given driver supports the device is +not possible. + +When a device is registered, the bus's list of drivers is iterated +over. bus->match() is called for each one until a match is found. + +When a driver is registered, the bus's list of devices is iterated +over. bus->match() is called for each device that is not already +claimed by a driver. + +When a device is successfully bound to a driver, device->driver is +set, the device is added to a per-driver list of devices, and a +symlink is created in the driver's sysfs directory that points to the +device's physical directory:: + + /sys/bus/pci/drivers/ + |-- 3c59x + | `-- 00:0b.0 -> ../../../../devices/pci0/00:0b.0 + |-- Ensoniq AudioPCI + |-- agpgart-amdk7 + | `-- 00:00.0 -> ../../../../devices/pci0/00:00.0 + |-- e100 + | `-- 00:0c.0 -> ../../../../devices/pci0/00:0c.0 + `-- serial + + +This driver binding should replace the existing driver binding +mechanism the bus currently uses. + + +Step 6: Supply a hotplug callback. + +Whenever a device is registered with the driver model core, the +userspace program /sbin/hotplug is called to notify userspace. +Users can define actions to perform when a device is inserted or +removed. + +The driver model core passes several arguments to userspace via +environment variables, including + +- ACTION: set to 'add' or 'remove' +- DEVPATH: set to the device's physical path in sysfs. + +A bus driver may also supply additional parameters for userspace to +consume. To do this, a bus must implement the 'hotplug' method in +struct bus_type:: + + int (*hotplug) (struct device *dev, char **envp, + int num_envp, char *buffer, int buffer_size); + +This is called immediately before /sbin/hotplug is executed. + + +Step 7: Cleaning up the bus driver. + +The generic bus, device, and driver structures provide several fields +that can replace those defined privately to the bus driver. + +- Device list. + +struct bus_type contains a list of all devices registered with the bus +type. This includes all devices on all instances of that bus type. +An internal list that the bus uses may be removed, in favor of using +this one. + +The core provides an iterator to access these devices:: + + int bus_for_each_dev(struct bus_type * bus, struct device * start, + void * data, int (*fn)(struct device *, void *)); + + +- Driver list. + +struct bus_type also contains a list of all drivers registered with +it. An internal list of drivers that the bus driver maintains may +be removed in favor of using the generic one. + +The drivers may be iterated over, like devices:: + + int bus_for_each_drv(struct bus_type * bus, struct device_driver * start, + void * data, int (*fn)(struct device_driver *, void *)); + + +Please see drivers/base/bus.c for more information. + + +- rwsem + +struct bus_type contains an rwsem that protects all core accesses to +the device and driver lists. This can be used by the bus driver +internally, and should be used when accessing the device or driver +lists the bus maintains. + + +- Device and driver fields. + +Some of the fields in struct device and struct device_driver duplicate +fields in the bus-specific representations of these objects. Feel free +to remove the bus-specific ones and favor the generic ones. Note +though, that this will likely mean fixing up all the drivers that +reference the bus-specific fields (though those should all be 1-line +changes). diff --git a/Documentation/driver-api/early-userspace/buffer-format.rst b/Documentation/driver-api/early-userspace/buffer-format.rst new file mode 100644 index 000000000..7f74e301f --- /dev/null +++ b/Documentation/driver-api/early-userspace/buffer-format.rst @@ -0,0 +1,119 @@ +======================= +initramfs buffer format +======================= + +Al Viro, H. Peter Anvin + +Last revision: 2002-01-13 + +Starting with kernel 2.5.x, the old "initial ramdisk" protocol is +getting {replaced/complemented} with the new "initial ramfs" +(initramfs) protocol. The initramfs contents is passed using the same +memory buffer protocol used by the initrd protocol, but the contents +is different. The initramfs buffer contains an archive which is +expanded into a ramfs filesystem; this document details the format of +the initramfs buffer format. + +The initramfs buffer format is based around the "newc" or "crc" CPIO +formats, and can be created with the cpio(1) utility. The cpio +archive can be compressed using gzip(1). One valid version of an +initramfs buffer is thus a single .cpio.gz file. + +The full format of the initramfs buffer is defined by the following +grammar, where:: + + * is used to indicate "0 or more occurrences of" + (|) indicates alternatives + + indicates concatenation + GZIP() indicates the gzip(1) of the operand + ALGN(n) means padding with null bytes to an n-byte boundary + + initramfs := ("\0" | cpio_archive | cpio_gzip_archive)* + + cpio_gzip_archive := GZIP(cpio_archive) + + cpio_archive := cpio_file* + ( | cpio_trailer) + + cpio_file := ALGN(4) + cpio_header + filename + "\0" + ALGN(4) + data + + cpio_trailer := ALGN(4) + cpio_header + "TRAILER!!!\0" + ALGN(4) + + +In human terms, the initramfs buffer contains a collection of +compressed and/or uncompressed cpio archives (in the "newc" or "crc" +formats); arbitrary amounts zero bytes (for padding) can be added +between members. + +The cpio "TRAILER!!!" entry (cpio end-of-archive) is optional, but is +not ignored; see "handling of hard links" below. + +The structure of the cpio_header is as follows (all fields contain +hexadecimal ASCII numbers fully padded with '0' on the left to the +full width of the field, for example, the integer 4780 is represented +by the ASCII string "000012ac"): + +============= ================== ============================================== +Field name Field size Meaning +============= ================== ============================================== +c_magic 6 bytes The string "070701" or "070702" +c_ino 8 bytes File inode number +c_mode 8 bytes File mode and permissions +c_uid 8 bytes File uid +c_gid 8 bytes File gid +c_nlink 8 bytes Number of links +c_mtime 8 bytes Modification time +c_filesize 8 bytes Size of data field +c_maj 8 bytes Major part of file device number +c_min 8 bytes Minor part of file device number +c_rmaj 8 bytes Major part of device node reference +c_rmin 8 bytes Minor part of device node reference +c_namesize 8 bytes Length of filename, including final \0 +c_chksum 8 bytes Checksum of data field if c_magic is 070702; + otherwise zero +============= ================== ============================================== + +The c_mode field matches the contents of st_mode returned by stat(2) +on Linux, and encodes the file type and file permissions. + +The c_filesize should be zero for any file which is not a regular file +or symlink. + +The c_chksum field contains a simple 32-bit unsigned sum of all the +bytes in the data field. cpio(1) refers to this as "crc", which is +clearly incorrect (a cyclic redundancy check is a different and +significantly stronger integrity check), however, this is the +algorithm used. + +If the filename is "TRAILER!!!" this is actually an end-of-archive +marker; the c_filesize for an end-of-archive marker must be zero. + + +Handling of hard links +====================== + +When a nondirectory with c_nlink > 1 is seen, the (c_maj,c_min,c_ino) +tuple is looked up in a tuple buffer. If not found, it is entered in +the tuple buffer and the entry is created as usual; if found, a hard +link rather than a second copy of the file is created. It is not +necessary (but permitted) to include a second copy of the file +contents; if the file contents is not included, the c_filesize field +should be set to zero to indicate no data section follows. If data is +present, the previous instance of the file is overwritten; this allows +the data-carrying instance of a file to occur anywhere in the sequence +(GNU cpio is reported to attach the data to the last instance of a +file only.) + +c_filesize must not be zero for a symlink. + +When a "TRAILER!!!" end-of-archive marker is seen, the tuple buffer is +reset. This permits archives which are generated independently to be +concatenated. + +To combine file data from different sources (without having to +regenerate the (c_maj,c_min,c_ino) fields), therefore, either one of +the following techniques can be used: + +a) Separate the different file data sources with a "TRAILER!!!" + end-of-archive marker, or + +b) Make sure c_nlink == 1 for all nondirectory entries. diff --git a/Documentation/driver-api/early-userspace/early_userspace_support.rst b/Documentation/driver-api/early-userspace/early_userspace_support.rst new file mode 100644 index 000000000..61bdeac1b --- /dev/null +++ b/Documentation/driver-api/early-userspace/early_userspace_support.rst @@ -0,0 +1,154 @@ +======================= +Early userspace support +======================= + +Last update: 2004-12-20 tlh + + +"Early userspace" is a set of libraries and programs that provide +various pieces of functionality that are important enough to be +available while a Linux kernel is coming up, but that don't need to be +run inside the kernel itself. + +It consists of several major infrastructure components: + +- gen_init_cpio, a program that builds a cpio-format archive + containing a root filesystem image. This archive is compressed, and + the compressed image is linked into the kernel image. +- initramfs, a chunk of code that unpacks the compressed cpio image + midway through the kernel boot process. +- klibc, a userspace C library, currently packaged separately, that is + optimized for correctness and small size. + +The cpio file format used by initramfs is the "newc" (aka "cpio -H newc") +format, and is documented in the file "buffer-format.txt". There are +two ways to add an early userspace image: specify an existing cpio +archive to be used as the image or have the kernel build process build +the image from specifications. + +CPIO ARCHIVE method +------------------- + +You can create a cpio archive that contains the early userspace image. +Your cpio archive should be specified in CONFIG_INITRAMFS_SOURCE and it +will be used directly. Only a single cpio file may be specified in +CONFIG_INITRAMFS_SOURCE and directory and file names are not allowed in +combination with a cpio archive. + +IMAGE BUILDING method +--------------------- + +The kernel build process can also build an early userspace image from +source parts rather than supplying a cpio archive. This method provides +a way to create images with root-owned files even though the image was +built by an unprivileged user. + +The image is specified as one or more sources in +CONFIG_INITRAMFS_SOURCE. Sources can be either directories or files - +cpio archives are *not* allowed when building from sources. + +A source directory will have it and all of its contents packaged. The +specified directory name will be mapped to '/'. When packaging a +directory, limited user and group ID translation can be performed. +INITRAMFS_ROOT_UID can be set to a user ID that needs to be mapped to +user root (0). INITRAMFS_ROOT_GID can be set to a group ID that needs +to be mapped to group root (0). + +A source file must be directives in the format required by the +usr/gen_init_cpio utility (run 'usr/gen_init_cpio -h' to get the +file format). The directives in the file will be passed directly to +usr/gen_init_cpio. + +When a combination of directories and files are specified then the +initramfs image will be an aggregate of all of them. In this way a user +can create a 'root-image' directory and install all files into it. +Because device-special files cannot be created by a unprivileged user, +special files can be listed in a 'root-files' file. Both 'root-image' +and 'root-files' can be listed in CONFIG_INITRAMFS_SOURCE and a complete +early userspace image can be built by an unprivileged user. + +As a technical note, when directories and files are specified, the +entire CONFIG_INITRAMFS_SOURCE is passed to +usr/gen_initramfs.sh. This means that CONFIG_INITRAMFS_SOURCE +can really be interpreted as any legal argument to +gen_initramfs.sh. If a directory is specified as an argument then +the contents are scanned, uid/gid translation is performed, and +usr/gen_init_cpio file directives are output. If a directory is +specified as an argument to usr/gen_initramfs.sh then the +contents of the file are simply copied to the output. All of the output +directives from directory scanning and file contents copying are +processed by usr/gen_init_cpio. + +See also 'usr/gen_initramfs.sh -h'. + +Where's this all leading? +========================= + +The klibc distribution contains some of the necessary software to make +early userspace useful. The klibc distribution is currently +maintained separately from the kernel. + +You can obtain somewhat infrequent snapshots of klibc from +https://www.kernel.org/pub/linux/libs/klibc/ + +For active users, you are better off using the klibc git +repository, at https://git.kernel.org/?p=libs/klibc/klibc.git + +The standalone klibc distribution currently provides three components, +in addition to the klibc library: + +- ipconfig, a program that configures network interfaces. It can + configure them statically, or use DHCP to obtain information + dynamically (aka "IP autoconfiguration"). +- nfsmount, a program that can mount an NFS filesystem. +- kinit, the "glue" that uses ipconfig and nfsmount to replace the old + support for IP autoconfig, mount a filesystem over NFS, and continue + system boot using that filesystem as root. + +kinit is built as a single statically linked binary to save space. + +Eventually, several more chunks of kernel functionality will hopefully +move to early userspace: + +- Almost all of init/do_mounts* (the beginning of this is already in + place) +- ACPI table parsing +- Insert unwieldy subsystem that doesn't really need to be in kernel + space here + +If kinit doesn't meet your current needs and you've got bytes to burn, +the klibc distribution includes a small Bourne-compatible shell (ash) +and a number of other utilities, so you can replace kinit and build +custom initramfs images that meet your needs exactly. + +For questions and help, you can sign up for the early userspace +mailing list at https://www.zytor.com/mailman/listinfo/klibc + +How does it work? +================= + +The kernel has currently 3 ways to mount the root filesystem: + +a) all required device and filesystem drivers compiled into the kernel, no + initrd. init/main.c:init() will call prepare_namespace() to mount the + final root filesystem, based on the root= option and optional init= to run + some other init binary than listed at the end of init/main.c:init(). + +b) some device and filesystem drivers built as modules and stored in an + initrd. The initrd must contain a binary '/linuxrc' which is supposed to + load these driver modules. It is also possible to mount the final root + filesystem via linuxrc and use the pivot_root syscall. The initrd is + mounted and executed via prepare_namespace(). + +c) using initramfs. The call to prepare_namespace() must be skipped. + This means that a binary must do all the work. Said binary can be stored + into initramfs either via modifying usr/gen_init_cpio.c or via the new + initrd format, an cpio archive. It must be called "/init". This binary + is responsible to do all the things prepare_namespace() would do. + + To maintain backwards compatibility, the /init binary will only run if it + comes via an initramfs cpio archive. If this is not the case, + init/main.c:init() will run prepare_namespace() to mount the final root + and exec one of the predefined init binaries. + +Bryan O'Sullivan diff --git a/Documentation/driver-api/early-userspace/index.rst b/Documentation/driver-api/early-userspace/index.rst new file mode 100644 index 000000000..149c1822f --- /dev/null +++ b/Documentation/driver-api/early-userspace/index.rst @@ -0,0 +1,18 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============== +Early Userspace +=============== + +.. toctree:: + :maxdepth: 1 + + early_userspace_support + buffer-format + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/driver-api/edac.rst b/Documentation/driver-api/edac.rst new file mode 100644 index 000000000..f4f044b95 --- /dev/null +++ b/Documentation/driver-api/edac.rst @@ -0,0 +1,298 @@ +Error Detection And Correction (EDAC) Devices +============================================= + +Main Concepts used at the EDAC subsystem +---------------------------------------- + +There are several things to be aware of that aren't at all obvious, like +*sockets, *socket sets*, *banks*, *rows*, *chip-select rows*, *channels*, +etc... + +These are some of the many terms that are thrown about that don't always +mean what people think they mean (Inconceivable!). In the interest of +creating a common ground for discussion, terms and their definitions +will be established. + +* Memory devices + +The individual DRAM chips on a memory stick. These devices commonly +output 4 and 8 bits each (x4, x8). Grouping several of these in parallel +provides the number of bits that the memory controller expects: +typically 72 bits, in order to provide 64 bits + 8 bits of ECC data. + +* Memory Stick + +A printed circuit board that aggregates multiple memory devices in +parallel. In general, this is the Field Replaceable Unit (FRU) which +gets replaced, in the case of excessive errors. Most often it is also +called DIMM (Dual Inline Memory Module). + +* Memory Socket + +A physical connector on the motherboard that accepts a single memory +stick. Also called as "slot" on several datasheets. + +* Channel + +A memory controller channel, responsible to communicate with a group of +DIMMs. Each channel has its own independent control (command) and data +bus, and can be used independently or grouped with other channels. + +* Branch + +It is typically the highest hierarchy on a Fully-Buffered DIMM memory +controller. Typically, it contains two channels. Two channels at the +same branch can be used in single mode or in lockstep mode. When +lockstep is enabled, the cacheline is doubled, but it generally brings +some performance penalty. Also, it is generally not possible to point to +just one memory stick when an error occurs, as the error correction code +is calculated using two DIMMs instead of one. Due to that, it is capable +of correcting more errors than on single mode. + +* Single-channel + +The data accessed by the memory controller is contained into one dimm +only. E. g. if the data is 64 bits-wide, the data flows to the CPU using +one 64 bits parallel access. Typically used with SDR, DDR, DDR2 and DDR3 +memories. FB-DIMM and RAMBUS use a different concept for channel, so +this concept doesn't apply there. + +* Double-channel + +The data size accessed by the memory controller is interlaced into two +dimms, accessed at the same time. E. g. if the DIMM is 64 bits-wide (72 +bits with ECC), the data flows to the CPU using a 128 bits parallel +access. + +* Chip-select row + +This is the name of the DRAM signal used to select the DRAM ranks to be +accessed. Common chip-select rows for single channel are 64 bits, for +dual channel 128 bits. It may not be visible by the memory controller, +as some DIMM types have a memory buffer that can hide direct access to +it from the Memory Controller. + +* Single-Ranked stick + +A Single-ranked stick has 1 chip-select row of memory. Motherboards +commonly drive two chip-select pins to a memory stick. A single-ranked +stick, will occupy only one of those rows. The other will be unused. + +.. _doubleranked: + +* Double-Ranked stick + +A double-ranked stick has two chip-select rows which access different +sets of memory devices. The two rows cannot be accessed concurrently. + +* Double-sided stick + +**DEPRECATED TERM**, see :ref:`Double-Ranked stick `. + +A double-sided stick has two chip-select rows which access different sets +of memory devices. The two rows cannot be accessed concurrently. +"Double-sided" is irrespective of the memory devices being mounted on +both sides of the memory stick. + +* Socket set + +All of the memory sticks that are required for a single memory access or +all of the memory sticks spanned by a chip-select row. A single socket +set has two chip-select rows and if double-sided sticks are used these +will occupy those chip-select rows. + +* Bank + +This term is avoided because it is unclear when needing to distinguish +between chip-select rows and socket sets. + +* High Bandwidth Memory (HBM) + +HBM is a new memory type with low power consumption and ultra-wide +communication lanes. It uses vertically stacked memory chips (DRAM dies) +interconnected by microscopic wires called "through-silicon vias," or +TSVs. + +Several stacks of HBM chips connect to the CPU or GPU through an ultra-fast +interconnect called the "interposer". Therefore, HBM's characteristics +are nearly indistinguishable from on-chip integrated RAM. + +Memory Controllers +------------------ + +Most of the EDAC core is focused on doing Memory Controller error detection. +The :c:func:`edac_mc_alloc`. It uses internally the struct ``mem_ctl_info`` +to describe the memory controllers, with is an opaque struct for the EDAC +drivers. Only the EDAC core is allowed to touch it. + +.. kernel-doc:: include/linux/edac.h + +.. kernel-doc:: drivers/edac/edac_mc.h + +PCI Controllers +--------------- + +The EDAC subsystem provides a mechanism to handle PCI controllers by calling +the :c:func:`edac_pci_alloc_ctl_info`. It will use the struct +:c:type:`edac_pci_ctl_info` to describe the PCI controllers. + +.. kernel-doc:: drivers/edac/edac_pci.h + +EDAC Blocks +----------- + +The EDAC subsystem also provides a generic mechanism to report errors on +other parts of the hardware via :c:func:`edac_device_alloc_ctl_info` function. + +The structures :c:type:`edac_dev_sysfs_block_attribute`, +:c:type:`edac_device_block`, :c:type:`edac_device_instance` and +:c:type:`edac_device_ctl_info` provide a generic or abstract 'edac_device' +representation at sysfs. + +This set of structures and the code that implements the APIs for the same, provide for registering EDAC type devices which are NOT standard memory or +PCI, like: + +- CPU caches (L1 and L2) +- DMA engines +- Core CPU switches +- Fabric switch units +- PCIe interface controllers +- other EDAC/ECC type devices that can be monitored for + errors, etc. + +It allows for a 2 level set of hierarchy. + +For example, a cache could be composed of L1, L2 and L3 levels of cache. +Each CPU core would have its own L1 cache, while sharing L2 and maybe L3 +caches. On such case, those can be represented via the following sysfs +nodes:: + + /sys/devices/system/edac/.. + + pci/ + mc/ + cpu/cpu0/.. + /L1-cache/ce_count + /ue_count + /L2-cache/ce_count + /ue_count + cpu/cpu1/.. + /L1-cache/ce_count + /ue_count + /L2-cache/ce_count + /ue_count + ... + + the L1 and L2 directories would be "edac_device_block's" + +.. kernel-doc:: drivers/edac/edac_device.h + + +Heterogeneous system support +---------------------------- + +An AMD heterogeneous system is built by connecting the data fabrics of +both CPUs and GPUs via custom xGMI links. Thus, the data fabric on the +GPU nodes can be accessed the same way as the data fabric on CPU nodes. + +The MI200 accelerators are data center GPUs. They have 2 data fabrics, +and each GPU data fabric contains four Unified Memory Controllers (UMC). +Each UMC contains eight channels. Each UMC channel controls one 128-bit +HBM2e (2GB) channel (equivalent to 8 X 2GB ranks). This creates a total +of 4096-bits of DRAM data bus. + +While the UMC is interfacing a 16GB (8high X 2GB DRAM) HBM stack, each UMC +channel is interfacing 2GB of DRAM (represented as rank). + +Memory controllers on AMD GPU nodes can be represented in EDAC thusly: + + GPU DF / GPU Node -> EDAC MC + GPU UMC -> EDAC CSROW + GPU UMC channel -> EDAC CHANNEL + +For example: a heterogeneous system with 1 AMD CPU is connected to +4 MI200 (Aldebaran) GPUs using xGMI. + +Some more heterogeneous hardware details: + +- The CPU UMC (Unified Memory Controller) is mostly the same as the GPU UMC. + They have chip selects (csrows) and channels. However, the layouts are different + for performance, physical layout, or other reasons. +- CPU UMCs use 1 channel, In this case UMC = EDAC channel. This follows the + marketing speak. CPU has X memory channels, etc. +- CPU UMCs use up to 4 chip selects, So UMC chip select = EDAC CSROW. +- GPU UMCs use 1 chip select, So UMC = EDAC CSROW. +- GPU UMCs use 8 channels, So UMC channel = EDAC channel. + +The EDAC subsystem provides a mechanism to handle AMD heterogeneous +systems by calling system specific ops for both CPUs and GPUs. + +AMD GPU nodes are enumerated in sequential order based on the PCI +hierarchy, and the first GPU node is assumed to have a Node ID value +following those of the CPU nodes after latter are fully populated:: + + $ ls /sys/devices/system/edac/mc/ + mc0 - CPU MC node 0 + mc1 | + mc2 |- GPU card[0] => node 0(mc1), node 1(mc2) + mc3 | + mc4 |- GPU card[1] => node 0(mc3), node 1(mc4) + mc5 | + mc6 |- GPU card[2] => node 0(mc5), node 1(mc6) + mc7 | + mc8 |- GPU card[3] => node 0(mc7), node 1(mc8) + +For example, a heterogeneous system with one AMD CPU is connected to +four MI200 (Aldebaran) GPUs using xGMI. This topology can be represented +via the following sysfs entries:: + + /sys/devices/system/edac/mc/.. + + CPU # CPU node + ├── mc 0 + + GPU Nodes are enumerated sequentially after CPU nodes have been populated + GPU card 1 # Each MI200 GPU has 2 nodes/mcs + ├── mc 1 # GPU node 0 == mc1, Each MC node has 4 UMCs/CSROWs + │   ├── csrow 0 # UMC 0 + │   │   ├── channel 0 # Each UMC has 8 channels + │   │   ├── channel 1 # size of each channel is 2 GB, so each UMC has 16 GB + │   │   ├── channel 2 + │   │   ├── channel 3 + │   │   ├── channel 4 + │   │   ├── channel 5 + │   │   ├── channel 6 + │   │   ├── channel 7 + │   ├── csrow 1 # UMC 1 + │   │   ├── channel 0 + │   │   ├── .. + │   │   ├── channel 7 + │   ├── .. .. + │   ├── csrow 3 # UMC 3 + │   │   ├── channel 0 + │   │   ├── .. + │   │   ├── channel 7 + │   ├── rank 0 + │   ├── .. .. + │   ├── rank 31 # total 32 ranks/dimms from 4 UMCs + ├ + ├── mc 2 # GPU node 1 == mc2 + │   ├── .. # each GPU has total 64 GB + + GPU card 2 + ├── mc 3 + │   ├── .. + ├── mc 4 + │   ├── .. + + GPU card 3 + ├── mc 5 + │   ├── .. + ├── mc 6 + │   ├── .. + + GPU card 4 + ├── mc 7 + │   ├── .. + ├── mc 8 + │   ├── .. diff --git a/Documentation/driver-api/eisa.rst b/Documentation/driver-api/eisa.rst new file mode 100644 index 000000000..3eac11b7e --- /dev/null +++ b/Documentation/driver-api/eisa.rst @@ -0,0 +1,230 @@ +================ +EISA bus support +================ + +:Author: Marc Zyngier + +This document groups random notes about porting EISA drivers to the +new EISA/sysfs API. + +Starting from version 2.5.59, the EISA bus is almost given the same +status as other much more mainstream busses such as PCI or USB. This +has been possible through sysfs, which defines a nice enough set of +abstractions to manage busses, devices and drivers. + +Although the new API is quite simple to use, converting existing +drivers to the new infrastructure is not an easy task (mostly because +detection code is generally also used to probe ISA cards). Moreover, +most EISA drivers are among the oldest Linux drivers so, as you can +imagine, some dust has settled here over the years. + +The EISA infrastructure is made up of three parts: + + - The bus code implements most of the generic code. It is shared + among all the architectures that the EISA code runs on. It + implements bus probing (detecting EISA cards available on the bus), + allocates I/O resources, allows fancy naming through sysfs, and + offers interfaces for driver to register. + + - The bus root driver implements the glue between the bus hardware + and the generic bus code. It is responsible for discovering the + device implementing the bus, and setting it up to be latter probed + by the bus code. This can go from something as simple as reserving + an I/O region on x86, to the rather more complex, like the hppa + EISA code. This is the part to implement in order to have EISA + running on an "new" platform. + + - The driver offers the bus a list of devices that it manages, and + implements the necessary callbacks to probe and release devices + whenever told to. + +Every function/structure below lives in , which depends +heavily on . + +Bus root driver +=============== + +:: + + int eisa_root_register (struct eisa_root_device *root); + +The eisa_root_register function is used to declare a device as the +root of an EISA bus. The eisa_root_device structure holds a reference +to this device, as well as some parameters for probing purposes:: + + struct eisa_root_device { + struct device *dev; /* Pointer to bridge device */ + struct resource *res; + unsigned long bus_base_addr; + int slots; /* Max slot number */ + int force_probe; /* Probe even when no slot 0 */ + u64 dma_mask; /* from bridge device */ + int bus_nr; /* Set by eisa_root_register */ + struct resource eisa_root_res; /* ditto */ + }; + +============= ====================================================== +node used for eisa_root_register internal purpose +dev pointer to the root device +res root device I/O resource +bus_base_addr slot 0 address on this bus +slots max slot number to probe +force_probe Probe even when slot 0 is empty (no EISA mainboard) +dma_mask Default DMA mask. Usually the bridge device dma_mask. +bus_nr unique bus id, set by eisa_root_register +============= ====================================================== + +Driver +====== + +:: + + int eisa_driver_register (struct eisa_driver *edrv); + void eisa_driver_unregister (struct eisa_driver *edrv); + +Clear enough ? + +:: + + struct eisa_device_id { + char sig[EISA_SIG_LEN]; + unsigned long driver_data; + }; + + struct eisa_driver { + const struct eisa_device_id *id_table; + struct device_driver driver; + }; + +=============== ==================================================== +id_table an array of NULL terminated EISA id strings, + followed by an empty string. Each string can + optionally be paired with a driver-dependent value + (driver_data). + +driver a generic driver, such as described in + Documentation/driver-api/driver-model/driver.rst. Only .name, + .probe and .remove members are mandatory. +=============== ==================================================== + +An example is the 3c59x driver:: + + static struct eisa_device_id vortex_eisa_ids[] = { + { "TCM5920", EISA_3C592_OFFSET }, + { "TCM5970", EISA_3C597_OFFSET }, + { "" } + }; + + static struct eisa_driver vortex_eisa_driver = { + .id_table = vortex_eisa_ids, + .driver = { + .name = "3c59x", + .probe = vortex_eisa_probe, + .remove = vortex_eisa_remove + } + }; + +Device +====== + +The sysfs framework calls .probe and .remove functions upon device +discovery and removal (note that the .remove function is only called +when driver is built as a module). + +Both functions are passed a pointer to a 'struct device', which is +encapsulated in a 'struct eisa_device' described as follows:: + + struct eisa_device { + struct eisa_device_id id; + int slot; + int state; + unsigned long base_addr; + struct resource res[EISA_MAX_RESOURCES]; + u64 dma_mask; + struct device dev; /* generic device */ + }; + +======== ============================================================ +id EISA id, as read from device. id.driver_data is set from the + matching driver EISA id. +slot slot number which the device was detected on +state set of flags indicating the state of the device. Current + flags are EISA_CONFIG_ENABLED and EISA_CONFIG_FORCED. +res set of four 256 bytes I/O regions allocated to this device +dma_mask DMA mask set from the parent device. +dev generic device (see Documentation/driver-api/driver-model/device.rst) +======== ============================================================ + +You can get the 'struct eisa_device' from 'struct device' using the +'to_eisa_device' macro. + +Misc stuff +========== + +:: + + void eisa_set_drvdata (struct eisa_device *edev, void *data); + +Stores data into the device's driver_data area. + +:: + + void *eisa_get_drvdata (struct eisa_device *edev): + +Gets the pointer previously stored into the device's driver_data area. + +:: + + int eisa_get_region_index (void *addr); + +Returns the region number (0 <= x < EISA_MAX_RESOURCES) of a given +address. + +Kernel parameters +================= + +eisa_bus.enable_dev + A comma-separated list of slots to be enabled, even if the firmware + set the card as disabled. The driver must be able to properly + initialize the device in such conditions. + +eisa_bus.disable_dev + A comma-separated list of slots to be disabled, even if the firmware + set the card as enabled. The driver won't be called to handle this + device. + +virtual_root.force_probe + Force the probing code to probe EISA slots even when it cannot find an + EISA compliant mainboard (nothing appears on slot 0). Defaults to 0 + (don't force), and set to 1 (force probing) when either + CONFIG_ALPHA_JENSEN or CONFIG_EISA_VLB_PRIMING are set. + +Random notes +============ + +Converting an EISA driver to the new API mostly involves *deleting* +code (since probing is now in the core EISA code). Unfortunately, most +drivers share their probing routine between ISA, and EISA. Special +care must be taken when ripping out the EISA code, so other busses +won't suffer from these surgical strikes... + +You *must not* expect any EISA device to be detected when returning +from eisa_driver_register, since the chances are that the bus has not +yet been probed. In fact, that's what happens most of the time (the +bus root driver usually kicks in rather late in the boot process). +Unfortunately, most drivers are doing the probing by themselves, and +expect to have explored the whole machine when they exit their probe +routine. + +For example, switching your favorite EISA SCSI card to the "hotplug" +model is "the right thing"(tm). + +Thanks +====== + +I'd like to thank the following people for their help: + +- Xavier Benigni for lending me a wonderful Alpha Jensen, +- James Bottomley, Jeff Garzik for getting this stuff into the kernel, +- Andries Brouwer for contributing numerous EISA ids, +- Catrin Jones for coping with far too many machines at home. diff --git a/Documentation/driver-api/firewire.rst b/Documentation/driver-api/firewire.rst new file mode 100644 index 000000000..d3cfa73cb --- /dev/null +++ b/Documentation/driver-api/firewire.rst @@ -0,0 +1,48 @@ +=========================================== +Firewire (IEEE 1394) driver Interface Guide +=========================================== + +Introduction and Overview +========================= + +The Linux FireWire subsystem adds some interfaces into the Linux system to + use/maintain+any resource on IEEE 1394 bus. + +The main purpose of these interfaces is to access address space on each node +on IEEE 1394 bus by ISO/IEC 13213 (IEEE 1212) procedure, and to control +isochronous resources on the bus by IEEE 1394 procedure. + +Two types of interfaces are added, according to consumers of the interface. A +set of userspace interfaces is available via `firewire character devices`. A set +of kernel interfaces is available via exported symbols in `firewire-core` module. + +Firewire char device data structures +==================================== + +.. include:: ../ABI/stable/firewire-cdev + :literal: + +.. kernel-doc:: include/uapi/linux/firewire-cdev.h + :internal: + +Firewire device probing and sysfs interfaces +============================================ + +.. include:: ../ABI/stable/sysfs-bus-firewire + :literal: + +.. kernel-doc:: drivers/firewire/core-device.c + :export: + +Firewire core transaction interfaces +==================================== + +.. kernel-doc:: drivers/firewire/core-transaction.c + :export: + +Firewire Isochronous I/O interfaces +=================================== + +.. kernel-doc:: drivers/firewire/core-iso.c + :export: + diff --git a/Documentation/driver-api/firmware/built-in-fw.rst b/Documentation/driver-api/firmware/built-in-fw.rst new file mode 100644 index 000000000..bc1c961ba --- /dev/null +++ b/Documentation/driver-api/firmware/built-in-fw.rst @@ -0,0 +1,33 @@ +================= +Built-in firmware +================= + +Firmware can be built-in to the kernel, this means building the firmware +into vmlinux directly, to enable avoiding having to look for firmware from +the filesystem. Instead, firmware can be looked for inside the kernel +directly. You can enable built-in firmware using the kernel configuration +options: + + * CONFIG_EXTRA_FIRMWARE + * CONFIG_EXTRA_FIRMWARE_DIR + +There are a few reasons why you might want to consider building your firmware +into the kernel with CONFIG_EXTRA_FIRMWARE: + +* Speed +* Firmware is needed for accessing the boot device, and the user doesn't + want to stuff the firmware into the boot initramfs. + +Even if you have these needs there are a few reasons why you may not be +able to make use of built-in firmware: + +* Legalese - firmware is non-GPL compatible +* Some firmware may be optional +* Firmware upgrades are possible, therefore a new firmware would implicate + a complete kernel rebuild. +* Some firmware files may be really large in size. The remote-proc subsystem + is an example subsystem which deals with these sorts of firmware +* The firmware may need to be scraped out from some device specific location + dynamically, an example is calibration data for some WiFi chipsets. This + calibration data can be unique per sold device. + diff --git a/Documentation/driver-api/firmware/core.rst b/Documentation/driver-api/firmware/core.rst new file mode 100644 index 000000000..803cd574b --- /dev/null +++ b/Documentation/driver-api/firmware/core.rst @@ -0,0 +1,17 @@ +========================== +Firmware API core features +========================== + +The firmware API has a rich set of core features available. This section +documents these features. + +.. toctree:: + + fw_search_path + built-in-fw + firmware_cache + direct-fs-lookup + fallback-mechanisms + lookup-order + firmware-usage-guidelines + diff --git a/Documentation/driver-api/firmware/direct-fs-lookup.rst b/Documentation/driver-api/firmware/direct-fs-lookup.rst new file mode 100644 index 000000000..e04353d1b --- /dev/null +++ b/Documentation/driver-api/firmware/direct-fs-lookup.rst @@ -0,0 +1,30 @@ +======================== +Direct filesystem lookup +======================== + +Direct filesystem lookup is the most common form of firmware lookup performed +by the kernel. The kernel looks for the firmware directly on the root +filesystem in the paths documented in the section 'Firmware search paths'. +The filesystem lookup is implemented in fw_get_filesystem_firmware(), it +uses common core kernel file loader facility kernel_read_file_from_path(). +The max path allowed is PATH_MAX -- currently this is 4096 characters. + +It is recommended you keep /lib/firmware paths on your root filesystem, +avoid having a separate partition for them in order to avoid possible +races with lookups and avoid uses of the custom fallback mechanisms +documented below. + +Firmware and initramfs +---------------------- + +Drivers which are built-in to the kernel should have the firmware integrated +also as part of the initramfs used to boot the kernel given that otherwise +a race is possible with loading the driver and the real rootfs not yet being +available. Stuffing the firmware into initramfs resolves this race issue, +however note that using initrd does not suffice to address the same race. + +There are circumstances that justify not wanting to include firmware into +initramfs, such as dealing with large firmware files for the +remote-proc subsystem. For such cases using a userspace fallback mechanism +is currently the only viable solution as only userspace can know for sure +when the real rootfs is ready and mounted. diff --git a/Documentation/driver-api/firmware/efi/index.rst b/Documentation/driver-api/firmware/efi/index.rst new file mode 100644 index 000000000..4fe8abba9 --- /dev/null +++ b/Documentation/driver-api/firmware/efi/index.rst @@ -0,0 +1,11 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============ +UEFI Support +============ + +UEFI stub library functions +=========================== + +.. kernel-doc:: drivers/firmware/efi/libstub/mem.c + :internal: diff --git a/Documentation/driver-api/firmware/fallback-mechanisms.rst b/Documentation/driver-api/firmware/fallback-mechanisms.rst new file mode 100644 index 000000000..5f04c3bcd --- /dev/null +++ b/Documentation/driver-api/firmware/fallback-mechanisms.rst @@ -0,0 +1,308 @@ +=================== +Fallback mechanisms +=================== + +A fallback mechanism is supported to allow to overcome failures to do a direct +filesystem lookup on the root filesystem or when the firmware simply cannot be +installed for practical reasons on the root filesystem. The kernel +configuration options related to supporting the firmware fallback mechanism are: + + * CONFIG_FW_LOADER_USER_HELPER: enables building the firmware fallback + mechanism. Most distributions enable this option today. If enabled but + CONFIG_FW_LOADER_USER_HELPER_FALLBACK is disabled, only the custom fallback + mechanism is available and for the request_firmware_nowait() call. + * CONFIG_FW_LOADER_USER_HELPER_FALLBACK: force enables each request to + enable the kobject uevent fallback mechanism on all firmware API calls + except request_firmware_direct(). Most distributions disable this option + today. The call request_firmware_nowait() allows for one alternative + fallback mechanism: if this kconfig option is enabled and your second + argument to request_firmware_nowait(), uevent, is set to false you are + informing the kernel that you have a custom fallback mechanism and it will + manually load the firmware. Read below for more details. + +Note that this means when having this configuration: + +CONFIG_FW_LOADER_USER_HELPER=y +CONFIG_FW_LOADER_USER_HELPER_FALLBACK=n + +the kobject uevent fallback mechanism will never take effect even +for request_firmware_nowait() when uevent is set to true. + +Justifying the firmware fallback mechanism +========================================== + +Direct filesystem lookups may fail for a variety of reasons. Known reasons for +this are worth itemizing and documenting as it justifies the need for the +fallback mechanism: + +* Race against access with the root filesystem upon bootup. + +* Races upon resume from suspend. This is resolved by the firmware cache, but + the firmware cache is only supported if you use uevents, and its not + supported for request_firmware_into_buf(). + +* Firmware is not accessible through typical means: + + * It cannot be installed into the root filesystem + * The firmware provides very unique device specific data tailored for + the unit gathered with local information. An example is calibration + data for WiFi chipsets for mobile devices. This calibration data is + not common to all units, but tailored per unit. Such information may + be installed on a separate flash partition other than where the root + filesystem is provided. + +Types of fallback mechanisms +============================ + +There are really two fallback mechanisms available using one shared sysfs +interface as a loading facility: + +* Kobject uevent fallback mechanism +* Custom fallback mechanism + +First lets document the shared sysfs loading facility. + +Firmware sysfs loading facility +=============================== + +In order to help device drivers upload firmware using a fallback mechanism +the firmware infrastructure creates a sysfs interface to enable userspace +to load and indicate when firmware is ready. The sysfs directory is created +via fw_create_instance(). This call creates a new struct device named after +the firmware requested, and establishes it in the device hierarchy by +associating the device used to make the request as the device's parent. +The sysfs directory's file attributes are defined and controlled through +the new device's class (firmware_class) and group (fw_dev_attr_groups). +This is actually where the original firmware_class module name came from, +given that originally the only firmware loading mechanism available was the +mechanism we now use as a fallback mechanism, which registers a struct class +firmware_class. Because the attributes exposed are part of the module name, the +module name firmware_class cannot be renamed in the future, to ensure backward +compatibility with old userspace. + +To load firmware using the sysfs interface we expose a loading indicator, +and a file upload firmware into: + + * /sys/$DEVPATH/loading + * /sys/$DEVPATH/data + +To upload firmware you will echo 1 onto the loading file to indicate +you are loading firmware. You then write the firmware into the data file, +and you notify the kernel the firmware is ready by echo'ing 0 onto +the loading file. + +The firmware device used to help load firmware using sysfs is only created if +direct firmware loading fails and if the fallback mechanism is enabled for your +firmware request, this is set up with :c:func:`firmware_fallback_sysfs`. It is +important to re-iterate that no device is created if a direct filesystem lookup +succeeded. + +Using:: + + echo 1 > /sys/$DEVPATH/loading + +Will clean any previous partial load at once and make the firmware API +return an error. When loading firmware the firmware_class grows a buffer +for the firmware in PAGE_SIZE increments to hold the image as it comes in. + +firmware_data_read() and firmware_loading_show() are just provided for the +test_firmware driver for testing, they are not called in normal use or +expected to be used regularly by userspace. + +firmware_fallback_sysfs +----------------------- +.. kernel-doc:: drivers/base/firmware_loader/fallback.c + :functions: firmware_fallback_sysfs + +Firmware kobject uevent fallback mechanism +========================================== + +Since a device is created for the sysfs interface to help load firmware as a +fallback mechanism userspace can be informed of the addition of the device by +relying on kobject uevents. The addition of the device into the device +hierarchy means the fallback mechanism for firmware loading has been initiated. +For details of implementation refer to fw_load_sysfs_fallback(), in particular +on the use of dev_set_uevent_suppress() and kobject_uevent(). + +The kernel's kobject uevent mechanism is implemented in lib/kobject_uevent.c, +it issues uevents to userspace. As a supplement to kobject uevents Linux +distributions could also enable CONFIG_UEVENT_HELPER_PATH, which makes use of +core kernel's usermode helper (UMH) functionality to call out to a userspace +helper for kobject uevents. In practice though no standard distribution has +ever used the CONFIG_UEVENT_HELPER_PATH. If CONFIG_UEVENT_HELPER_PATH is +enabled this binary would be called each time kobject_uevent_env() gets called +in the kernel for each kobject uevent triggered. + +Different implementations have been supported in userspace to take advantage of +this fallback mechanism. When firmware loading was only possible using the +sysfs mechanism the userspace component "hotplug" provided the functionality of +monitoring for kobject events. Historically this was superseded be systemd's +udev, however firmware loading support was removed from udev as of systemd +commit be2ea723b1d0 ("udev: remove userspace firmware loading support") +as of v217 on August, 2014. This means most Linux distributions today are +not using or taking advantage of the firmware fallback mechanism provided +by kobject uevents. This is specially exacerbated due to the fact that most +distributions today disable CONFIG_FW_LOADER_USER_HELPER_FALLBACK. + +Refer to do_firmware_uevent() for details of the kobject event variables +setup. The variables currently passed to userspace with a "kobject add" +event are: + +* FIRMWARE=firmware name +* TIMEOUT=timeout value +* ASYNC=whether or not the API request was asynchronous + +By default DEVPATH is set by the internal kernel kobject infrastructure. +Below is an example simple kobject uevent script:: + + # Both $DEVPATH and $FIRMWARE are already provided in the environment. + MY_FW_DIR=/lib/firmware/ + echo 1 > /sys/$DEVPATH/loading + cat $MY_FW_DIR/$FIRMWARE > /sys/$DEVPATH/data + echo 0 > /sys/$DEVPATH/loading + +Firmware custom fallback mechanism +================================== + +Users of the request_firmware_nowait() call have yet another option available +at their disposal: rely on the sysfs fallback mechanism but request that no +kobject uevents be issued to userspace. The original logic behind this +was that utilities other than udev might be required to lookup firmware +in non-traditional paths -- paths outside of the listing documented in the +section 'Direct filesystem lookup'. This option is not available to any of +the other API calls as uevents are always forced for them. + +Since uevents are only meaningful if the fallback mechanism is enabled +in your kernel it would seem odd to enable uevents with kernels that do not +have the fallback mechanism enabled in their kernels. Unfortunately we also +rely on the uevent flag which can be disabled by request_firmware_nowait() to +also setup the firmware cache for firmware requests. As documented above, +the firmware cache is only set up if uevent is enabled for an API call. +Although this can disable the firmware cache for request_firmware_nowait() +calls, users of this API should not use it for the purposes of disabling +the cache as that was not the original purpose of the flag. Not setting +the uevent flag means you want to opt-in for the firmware fallback mechanism +but you want to suppress kobject uevents, as you have a custom solution which +will monitor for your device addition into the device hierarchy somehow and +load firmware for you through a custom path. + +Firmware fallback timeout +========================= + +The firmware fallback mechanism has a timeout. If firmware is not loaded +onto the sysfs interface by the timeout value an error is sent to the +driver. By default the timeout is set to 60 seconds if uevents are +desirable, otherwise MAX_JIFFY_OFFSET is used (max timeout possible). +The logic behind using MAX_JIFFY_OFFSET for non-uevents is that a custom +solution will have as much time as it needs to load firmware. + +You can customize the firmware timeout by echo'ing your desired timeout into +the following file: + +* /sys/class/firmware/timeout + +If you echo 0 into it means MAX_JIFFY_OFFSET will be used. The data type +for the timeout is an int. + +EFI embedded firmware fallback mechanism +======================================== + +On some devices the system's EFI code / ROM may contain an embedded copy +of firmware for some of the system's integrated peripheral devices and +the peripheral's Linux device-driver needs to access this firmware. + +Device drivers which need such firmware can use the +firmware_request_platform() function for this, note that this is a +separate fallback mechanism from the other fallback mechanisms and +this does not use the sysfs interface. + +A device driver which needs this can describe the firmware it needs +using an efi_embedded_fw_desc struct: + +.. kernel-doc:: include/linux/efi_embedded_fw.h + :functions: efi_embedded_fw_desc + +The EFI embedded-fw code works by scanning all EFI_BOOT_SERVICES_CODE memory +segments for an eight byte sequence matching prefix; if the prefix is found it +then does a sha256 over length bytes and if that matches makes a copy of length +bytes and adds that to its list with found firmwares. + +To avoid doing this somewhat expensive scan on all systems, dmi matching is +used. Drivers are expected to export a dmi_system_id array, with each entries' +driver_data pointing to an efi_embedded_fw_desc. + +To register this array with the efi-embedded-fw code, a driver needs to: + +1. Always be builtin to the kernel or store the dmi_system_id array in a + separate object file which always gets builtin. + +2. Add an extern declaration for the dmi_system_id array to + include/linux/efi_embedded_fw.h. + +3. Add the dmi_system_id array to the embedded_fw_table in + drivers/firmware/efi/embedded-firmware.c wrapped in a #ifdef testing that + the driver is being builtin. + +4. Add "select EFI_EMBEDDED_FIRMWARE if EFI_STUB" to its Kconfig entry. + +The firmware_request_platform() function will always first try to load firmware +with the specified name directly from the disk, so the EFI embedded-fw can +always be overridden by placing a file under /lib/firmware. + +Note that: + +1. The code scanning for EFI embedded-firmware runs near the end + of start_kernel(), just before calling rest_init(). For normal drivers and + subsystems using subsys_initcall() to register themselves this does not + matter. This means that code running earlier cannot use EFI + embedded-firmware. + +2. At the moment the EFI embedded-fw code assumes that firmwares always start at + an offset which is a multiple of 8 bytes, if this is not true for your case + send in a patch to fix this. + +3. At the moment the EFI embedded-fw code only works on x86 because other archs + free EFI_BOOT_SERVICES_CODE before the EFI embedded-fw code gets a chance to + scan it. + +4. The current brute-force scanning of EFI_BOOT_SERVICES_CODE is an ad-hoc + brute-force solution. There has been discussion to use the UEFI Platform + Initialization (PI) spec's Firmware Volume protocol. This has been rejected + because the FV Protocol relies on *internal* interfaces of the PI spec, and: + 1. The PI spec does not define peripheral firmware at all + 2. The internal interfaces of the PI spec do not guarantee any backward + compatibility. Any implementation details in FV may be subject to change, + and may vary system to system. Supporting the FV Protocol would be + difficult as it is purposely ambiguous. + +Example how to check for and extract embedded firmware +------------------------------------------------------ + +To check for, for example Silead touchscreen controller embedded firmware, +do the following: + +1. Boot the system with efi=debug on the kernel commandline + +2. cp /sys/kernel/debug/efi/boot_services_code? to your home dir + +3. Open the boot_services_code? files in a hex-editor, search for the + magic prefix for Silead firmware: F0 00 00 00 02 00 00 00, this gives you + the beginning address of the firmware inside the boot_services_code? file. + +4. The firmware has a specific pattern, it starts with a 8 byte page-address, + typically F0 00 00 00 02 00 00 00 for the first page followed by 32-bit + word-address + 32-bit value pairs. With the word-address incrementing 4 + bytes (1 word) for each pair until a page is complete. A complete page is + followed by a new page-address, followed by more word + value pairs. This + leads to a very distinct pattern. Scroll down until this pattern stops, + this gives you the end of the firmware inside the boot_services_code? file. + +5. "dd if=boot_services_code? of=firmware bs=1 skip= count=" + will extract the firmware for you. Inspect the firmware file in a + hexeditor to make sure you got the dd parameters correct. + +6. Copy it to /lib/firmware under the expected name to test it. + +7. If the extracted firmware works, you can use the found info to fill an + efi_embedded_fw_desc struct to describe it, run "sha256sum firmware" + to get the sha256sum to put in the sha256 field. diff --git a/Documentation/driver-api/firmware/firmware-usage-guidelines.rst b/Documentation/driver-api/firmware/firmware-usage-guidelines.rst new file mode 100644 index 000000000..fdcfce42c --- /dev/null +++ b/Documentation/driver-api/firmware/firmware-usage-guidelines.rst @@ -0,0 +1,44 @@ +=================== +Firmware Guidelines +=================== + +Users switching to a newer kernel should *not* have to install newer +firmware files to keep their hardware working. At the same time updated +firmware files must not cause any regressions for users of older kernel +releases. + +Drivers that use firmware from linux-firmware should follow the rules in +this guide. (Where there is limited control of the firmware, +i.e. company doesn't support Linux, firmwares sourced from misc places, +then of course these rules will not apply strictly.) + +* Firmware files shall be designed in a way that it allows checking for + firmware ABI version changes. It is recommended that firmware files be + versioned with at least a major/minor version. It is suggested that + the firmware files in linux-firmware be named with some device + specific name, and just the major version. The firmware version should + be stored in the firmware header, or as an exception, as part of the + firmware file name, in order to let the driver detact any non-ABI + fixes/changes. The firmware files in linux-firmware should be + overwritten with the newest compatible major version. Newer major + version firmware shall remain compatible with all kernels that load + that major number. + +* If the kernel support for the hardware is normally inactive, or the + hardware isn't available for public consumption, this can + be ignored, until the first kernel release that enables that hardware. + This means no major version bumps without the kernel retaining + backwards compatibility for the older major versions. Minor version + bumps should not introduce new features that newer kernels depend on + non-optionally. + +* If a security fix needs lockstep firmware and kernel fixes in order to + be successful, then all supported major versions in the linux-firmware + repo that are required by currently supported stable/LTS kernels, + should be updated with the security fix. The kernel patches should + detect if the firmware is new enough to declare if the security issue + is fixed. All communications around security fixes should point at + both the firmware and kernel fixes. If a security fix requires + deprecating old major versions, then this should only be done as a + last option, and be stated clearly in all communications. + diff --git a/Documentation/driver-api/firmware/firmware_cache.rst b/Documentation/driver-api/firmware/firmware_cache.rst new file mode 100644 index 000000000..417b9e834 --- /dev/null +++ b/Documentation/driver-api/firmware/firmware_cache.rst @@ -0,0 +1,51 @@ +============== +Firmware cache +============== + +When Linux resumes from suspend some device drivers require firmware lookups to +re-initialize devices. During resume there may be a period of time during which +firmware lookups are not possible, during this short period of time firmware +requests will fail. Time is of essence though, and delaying drivers to wait for +the root filesystem for firmware delays user experience with device +functionality. In order to support these requirements the firmware +infrastructure implements a firmware cache for device drivers for most API +calls, automatically behind the scenes. + +The firmware cache makes using certain firmware API calls safe during a device +driver's suspend and resume callback. Users of these API calls needn't cache +the firmware by themselves for dealing with firmware loss during system resume. + +The firmware cache works by requesting for firmware prior to suspend and +caching it in memory. Upon resume device drivers using the firmware API will +have access to the firmware immediately, without having to wait for the root +filesystem to mount or dealing with possible race issues with lookups as the +root filesystem mounts. + +Some implementation details about the firmware cache setup: + +* The firmware cache is setup by adding a devres entry for each device that + uses all synchronous call except :c:func:`request_firmware_into_buf`. + +* If an asynchronous call is used the firmware cache is only set up for a + device if the second argument (uevent) to request_firmware_nowait() is + true. When uevent is true it requests that a kobject uevent be sent to + userspace for the firmware request through the sysfs fallback mechanism + if the firmware file is not found. + +* If the firmware cache is determined to be needed as per the above two + criteria the firmware cache is setup by adding a devres entry for the + device making the firmware request. + +* The firmware devres entry is maintained throughout the lifetime of the + device. This means that even if you release_firmware() the firmware cache + will still be used on resume from suspend. + +* The timeout for the fallback mechanism is temporarily reduced to 10 seconds + as the firmware cache is set up during suspend, the timeout is set back to + the old value you had configured after the cache is set up. + +* Upon suspend any pending non-uevent firmware requests are killed to avoid + stalling the kernel, this is done with kill_requests_without_uevent(). Kernel + calls requiring the non-uevent therefore need to implement their own firmware + cache mechanism but must not use the firmware API on suspend. + diff --git a/Documentation/driver-api/firmware/fw_search_path.rst b/Documentation/driver-api/firmware/fw_search_path.rst new file mode 100644 index 000000000..d7cb1e8f0 --- /dev/null +++ b/Documentation/driver-api/firmware/fw_search_path.rst @@ -0,0 +1,31 @@ +===================== +Firmware search paths +===================== + +The following search paths are used to look for firmware on your +root filesystem. + +* fw_path_para - module parameter - default is empty so this is ignored +* /lib/firmware/updates/UTS_RELEASE/ +* /lib/firmware/updates/ +* /lib/firmware/UTS_RELEASE/ +* /lib/firmware/ + +The module parameter ''path'' can be passed to the firmware_class module +to activate the first optional custom fw_path_para. The custom path can +only be up to 256 characters long. The kernel parameter passed would be: + +* 'firmware_class.path=$CUSTOMIZED_PATH' + +There is an alternative to customize the path at run time after bootup, you +can use the file: + +* /sys/module/firmware_class/parameters/path + +You would echo into it your custom path and firmware requested will be searched +for there first. Be aware that newline characters will be taken into account +and may not produce the intended effects. For instance you might want to use: + +echo -n /path/to/script > /sys/module/firmware_class/parameters/path + +to ensure that your script is being used. diff --git a/Documentation/driver-api/firmware/fw_upload.rst b/Documentation/driver-api/firmware/fw_upload.rst new file mode 100644 index 000000000..edf1d0c5e --- /dev/null +++ b/Documentation/driver-api/firmware/fw_upload.rst @@ -0,0 +1,127 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=================== +Firmware Upload API +=================== + +A device driver that registers with the firmware loader will expose +persistent sysfs nodes to enable users to initiate firmware updates for +that device. It is the responsibility of the device driver and/or the +device itself to perform any validation on the data received. Firmware +upload uses the same *loading* and *data* sysfs files described in the +documentation for firmware fallback. It also adds additional sysfs files +to provide status on the transfer of the firmware image to the device. + +Register for firmware upload +============================ + +A device driver registers for firmware upload by calling +firmware_upload_register(). Among the parameter list is a name to +identify the device under /sys/class/firmware. A user may initiate a +firmware upload by echoing a 1 to the *loading* sysfs file for the target +device. Next, the user writes the firmware image to the *data* sysfs +file. After writing the firmware data, the user echos 0 to the *loading* +sysfs file to signal completion. Echoing 0 to *loading* also triggers the +transfer of the firmware to the lower-lever device driver in the context +of a kernel worker thread. + +To use the firmware upload API, write a driver that implements a set of +ops. The probe function calls firmware_upload_register() and the remove +function calls firmware_upload_unregister() such as:: + + static const struct fw_upload_ops m10bmc_ops = { + .prepare = m10bmc_sec_prepare, + .write = m10bmc_sec_write, + .poll_complete = m10bmc_sec_poll_complete, + .cancel = m10bmc_sec_cancel, + .cleanup = m10bmc_sec_cleanup, + }; + + static int m10bmc_sec_probe(struct platform_device *pdev) + { + const char *fw_name, *truncate; + struct m10bmc_sec *sec; + struct fw_upload *fwl; + unsigned int len; + + sec = devm_kzalloc(&pdev->dev, sizeof(*sec), GFP_KERNEL); + if (!sec) + return -ENOMEM; + + sec->dev = &pdev->dev; + sec->m10bmc = dev_get_drvdata(pdev->dev.parent); + dev_set_drvdata(&pdev->dev, sec); + + fw_name = dev_name(sec->dev); + truncate = strstr(fw_name, ".auto"); + len = (truncate) ? truncate - fw_name : strlen(fw_name); + sec->fw_name = kmemdup_nul(fw_name, len, GFP_KERNEL); + + fwl = firmware_upload_register(THIS_MODULE, sec->dev, sec->fw_name, + &m10bmc_ops, sec); + if (IS_ERR(fwl)) { + dev_err(sec->dev, "Firmware Upload driver failed to start\n"); + kfree(sec->fw_name); + return PTR_ERR(fwl); + } + + sec->fwl = fwl; + return 0; + } + + static int m10bmc_sec_remove(struct platform_device *pdev) + { + struct m10bmc_sec *sec = dev_get_drvdata(&pdev->dev); + + firmware_upload_unregister(sec->fwl); + kfree(sec->fw_name); + return 0; + } + +firmware_upload_register +------------------------ +.. kernel-doc:: drivers/base/firmware_loader/sysfs_upload.c + :identifiers: firmware_upload_register + +firmware_upload_unregister +-------------------------- +.. kernel-doc:: drivers/base/firmware_loader/sysfs_upload.c + :identifiers: firmware_upload_unregister + +Firmware Upload Ops +------------------- +.. kernel-doc:: include/linux/firmware.h + :identifiers: fw_upload_ops + +Firmware Upload Progress Codes +------------------------------ +The following progress codes are used internally by the firmware loader. +Corresponding strings are reported through the status sysfs node that +is described below and are documented in the ABI documentation. + +.. kernel-doc:: drivers/base/firmware_loader/sysfs_upload.h + :identifiers: fw_upload_prog + +Firmware Upload Error Codes +--------------------------- +The following error codes may be returned by the driver ops in case of +failure: + +.. kernel-doc:: include/linux/firmware.h + :identifiers: fw_upload_err + +Sysfs Attributes +================ + +In addition to the *loading* and *data* sysfs files, there are additional +sysfs files to monitor the status of the data transfer to the target +device and to determine the final pass/fail status of the transfer. +Depending on the device and the size of the firmware image, a firmware +update could take milliseconds or minutes. + +The additional sysfs files are: + +* status - provides an indication of the progress of a firmware update +* error - provides error information for a failed firmware update +* remaining_size - tracks the data transfer portion of an update +* cancel - echo 1 to this file to cancel the update diff --git a/Documentation/driver-api/firmware/index.rst b/Documentation/driver-api/firmware/index.rst new file mode 100644 index 000000000..9d2c19dc8 --- /dev/null +++ b/Documentation/driver-api/firmware/index.rst @@ -0,0 +1,19 @@ +================== +Linux Firmware API +================== + +.. toctree:: + + introduction + core + efi/index + request_firmware + fw_upload + other_interfaces + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/driver-api/firmware/introduction.rst b/Documentation/driver-api/firmware/introduction.rst new file mode 100644 index 000000000..211cb44eb --- /dev/null +++ b/Documentation/driver-api/firmware/introduction.rst @@ -0,0 +1,27 @@ +============ +Introduction +============ + +The firmware API enables kernel code to request files required +for functionality from userspace, the uses vary: + +* Microcode for CPU errata +* Device driver firmware, required to be loaded onto device + microcontrollers +* Device driver information data (calibration data, EEPROM overrides), + some of which can be completely optional. + +Types of firmware requests +========================== + +There are two types of calls: + +* Synchronous +* Asynchronous + +Which one you use vary depending on your requirements, the rule of thumb +however is you should strive to use the asynchronous APIs unless you also +are already using asynchronous initialization mechanisms which will not +stall or delay boot. Even if loading firmware does not take a lot of time +processing firmware might, and this can still delay boot or initialization, +as such mechanisms such as asynchronous probe can help supplement drivers. diff --git a/Documentation/driver-api/firmware/lookup-order.rst b/Documentation/driver-api/firmware/lookup-order.rst new file mode 100644 index 000000000..6064672a7 --- /dev/null +++ b/Documentation/driver-api/firmware/lookup-order.rst @@ -0,0 +1,20 @@ +===================== +Firmware lookup order +===================== + +Different functionality is available to enable firmware to be found. +Below is chronological order of how firmware will be looked for once +a driver issues a firmware API call. + +* The ''Built-in firmware'' is checked first, if the firmware is present we + return it immediately +* The ''Firmware cache'' is looked at next. If the firmware is found we + return it immediately +* The ''Direct filesystem lookup'' is performed next, if found we + return it immediately +* The ''Platform firmware fallback'' is performed next, but only when + firmware_request_platform() is used, if found we return it immediately +* If no firmware has been found and the fallback mechanism was enabled + the sysfs interface is created. After this either a kobject uevent + is issued or the custom firmware loading is relied upon for firmware + loading up to the timeout value. diff --git a/Documentation/driver-api/firmware/other_interfaces.rst b/Documentation/driver-api/firmware/other_interfaces.rst new file mode 100644 index 000000000..06ac89ada --- /dev/null +++ b/Documentation/driver-api/firmware/other_interfaces.rst @@ -0,0 +1,51 @@ +Other Firmware Interfaces +========================= + +DMI Interfaces +-------------- + +.. kernel-doc:: drivers/firmware/dmi_scan.c + :export: + +EDD Interfaces +-------------- + +.. kernel-doc:: drivers/firmware/edd.c + :internal: + +Generic System Framebuffers Interface +------------------------------------- + +.. kernel-doc:: drivers/firmware/sysfb.c + :export: + +Intel Stratix10 SoC Service Layer +--------------------------------- +Some features of the Intel Stratix10 SoC require a level of privilege +higher than the kernel is granted. Such secure features include +FPGA programming. In terms of the ARMv8 architecture, the kernel runs +at Exception Level 1 (EL1), access to the features requires +Exception Level 3 (EL3). + +The Intel Stratix10 SoC service layer provides an in kernel API for +drivers to request access to the secure features. The requests are queued +and processed one by one. ARM’s SMCCC is used to pass the execution +of the requests on to a secure monitor (EL3). + +.. kernel-doc:: include/linux/firmware/intel/stratix10-svc-client.h + :functions: stratix10_svc_command_code + +.. kernel-doc:: include/linux/firmware/intel/stratix10-svc-client.h + :functions: stratix10_svc_client_msg + +.. kernel-doc:: include/linux/firmware/intel/stratix10-svc-client.h + :functions: stratix10_svc_command_config_type + +.. kernel-doc:: include/linux/firmware/intel/stratix10-svc-client.h + :functions: stratix10_svc_cb_data + +.. kernel-doc:: include/linux/firmware/intel/stratix10-svc-client.h + :functions: stratix10_svc_client + +.. kernel-doc:: drivers/firmware/stratix10-svc.c + :export: diff --git a/Documentation/driver-api/firmware/request_firmware.rst b/Documentation/driver-api/firmware/request_firmware.rst new file mode 100644 index 000000000..0d6ea0329 --- /dev/null +++ b/Documentation/driver-api/firmware/request_firmware.rst @@ -0,0 +1,80 @@ +==================== +request_firmware API +==================== + +You would typically load firmware and then load it into your device somehow. +The typical firmware work flow is reflected below:: + + if(request_firmware(&fw_entry, $FIRMWARE, device) == 0) + copy_fw_to_device(fw_entry->data, fw_entry->size); + release_firmware(fw_entry); + +Synchronous firmware requests +============================= + +Synchronous firmware requests will wait until the firmware is found or until +an error is returned. + +request_firmware +---------------- +.. kernel-doc:: drivers/base/firmware_loader/main.c + :functions: request_firmware + +firmware_request_nowarn +----------------------- +.. kernel-doc:: drivers/base/firmware_loader/main.c + :functions: firmware_request_nowarn + +firmware_request_platform +------------------------- +.. kernel-doc:: drivers/base/firmware_loader/main.c + :functions: firmware_request_platform + +request_firmware_direct +----------------------- +.. kernel-doc:: drivers/base/firmware_loader/main.c + :functions: request_firmware_direct + +request_firmware_into_buf +------------------------- +.. kernel-doc:: drivers/base/firmware_loader/main.c + :functions: request_firmware_into_buf + +Asynchronous firmware requests +============================== + +Asynchronous firmware requests allow driver code to not have to wait +until the firmware or an error is returned. Function callbacks are +provided so that when the firmware or an error is found the driver is +informed through the callback. request_firmware_nowait() cannot be called +in atomic contexts. + +request_firmware_nowait +----------------------- +.. kernel-doc:: drivers/base/firmware_loader/main.c + :functions: request_firmware_nowait + +Special optimizations on reboot +=============================== + +Some devices have an optimization in place to enable the firmware to be +retained during system reboot. When such optimizations are used the driver +author must ensure the firmware is still available on resume from suspend, +this can be done with firmware_request_cache() instead of requesting for the +firmware to be loaded. + +firmware_request_cache() +------------------------ +.. kernel-doc:: drivers/base/firmware_loader/main.c + :functions: firmware_request_cache + +request firmware API expected driver use +======================================== + +Once an API call returns you process the firmware and then release the +firmware. For example if you used request_firmware() and it returns, +the driver has the firmware image accessible in fw_entry->{data,size}. +If something went wrong request_firmware() returns non-zero and fw_entry +is set to NULL. Once your driver is done with processing the firmware it +can call release_firmware(fw_entry) to release the firmware image +and any related resource. diff --git a/Documentation/driver-api/fpga/fpga-bridge.rst b/Documentation/driver-api/fpga/fpga-bridge.rst new file mode 100644 index 000000000..604208534 --- /dev/null +++ b/Documentation/driver-api/fpga/fpga-bridge.rst @@ -0,0 +1,22 @@ +FPGA Bridge +=========== + +API to implement a new FPGA bridge +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* struct fpga_bridge - The FPGA Bridge structure +* struct fpga_bridge_ops - Low level Bridge driver ops +* fpga_bridge_register() - Create and register a bridge +* fpga_bridge_unregister() - Unregister a bridge + +.. kernel-doc:: include/linux/fpga/fpga-bridge.h + :functions: fpga_bridge + +.. kernel-doc:: include/linux/fpga/fpga-bridge.h + :functions: fpga_bridge_ops + +.. kernel-doc:: drivers/fpga/fpga-bridge.c + :functions: fpga_bridge_register + +.. kernel-doc:: drivers/fpga/fpga-bridge.c + :functions: fpga_bridge_unregister diff --git a/Documentation/driver-api/fpga/fpga-mgr.rst b/Documentation/driver-api/fpga/fpga-mgr.rst new file mode 100644 index 000000000..49c0a9512 --- /dev/null +++ b/Documentation/driver-api/fpga/fpga-mgr.rst @@ -0,0 +1,162 @@ +FPGA Manager +============ + +Overview +-------- + +The FPGA manager core exports a set of functions for programming an FPGA with +an image. The API is manufacturer agnostic. All manufacturer specifics are +hidden away in a low level driver which registers a set of ops with the core. +The FPGA image data itself is very manufacturer specific, but for our purposes +it's just binary data. The FPGA manager core won't parse it. + +The FPGA image to be programmed can be in a scatter gather list, a single +contiguous buffer, or a firmware file. Because allocating contiguous kernel +memory for the buffer should be avoided, users are encouraged to use a scatter +gather list instead if possible. + +The particulars for programming the image are presented in a structure (struct +fpga_image_info). This struct contains parameters such as pointers to the +FPGA image as well as image-specific particulars such as whether the image was +built for full or partial reconfiguration. + +How to support a new FPGA device +-------------------------------- + +To add another FPGA manager, write a driver that implements a set of ops. The +probe function calls fpga_mgr_register() or fpga_mgr_register_full(), such as:: + + static const struct fpga_manager_ops socfpga_fpga_ops = { + .write_init = socfpga_fpga_ops_configure_init, + .write = socfpga_fpga_ops_configure_write, + .write_complete = socfpga_fpga_ops_configure_complete, + .state = socfpga_fpga_ops_state, + }; + + static int socfpga_fpga_probe(struct platform_device *pdev) + { + struct device *dev = &pdev->dev; + struct socfpga_fpga_priv *priv; + struct fpga_manager *mgr; + int ret; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + /* + * do ioremaps, get interrupts, etc. and save + * them in priv + */ + + mgr = fpga_mgr_register(dev, "Altera SOCFPGA FPGA Manager", + &socfpga_fpga_ops, priv); + if (IS_ERR(mgr)) + return PTR_ERR(mgr); + + platform_set_drvdata(pdev, mgr); + + return 0; + } + + static int socfpga_fpga_remove(struct platform_device *pdev) + { + struct fpga_manager *mgr = platform_get_drvdata(pdev); + + fpga_mgr_unregister(mgr); + + return 0; + } + +Alternatively, the probe function could call one of the resource managed +register functions, devm_fpga_mgr_register() or devm_fpga_mgr_register_full(). +When these functions are used, the parameter syntax is the same, but the call +to fpga_mgr_unregister() should be removed. In the above example, the +socfpga_fpga_remove() function would not be required. + +The ops will implement whatever device specific register writes are needed to +do the programming sequence for this particular FPGA. These ops return 0 for +success or negative error codes otherwise. + +The programming sequence is:: + 1. .parse_header (optional, may be called once or multiple times) + 2. .write_init + 3. .write or .write_sg (may be called once or multiple times) + 4. .write_complete + +The .parse_header function will set header_size and data_size to +struct fpga_image_info. Before parse_header call, header_size is initialized +with initial_header_size. If flag skip_header of fpga_manager_ops is true, +.write function will get image buffer starting at header_size offset from the +beginning. If data_size is set, .write function will get data_size bytes of +the image buffer, otherwise .write will get data up to the end of image buffer. +This will not affect .write_sg, .write_sg will still get whole image in +sg_table form. If FPGA image is already mapped as a single contiguous buffer, +whole buffer will be passed into .parse_header. If image is in scatter-gather +form, core code will buffer up at least .initial_header_size before the first +call of .parse_header, if it is not enough, .parse_header should set desired +size into info->header_size and return -EAGAIN, then it will be called again +with greater part of image buffer on the input. + +The .write_init function will prepare the FPGA to receive the image data. The +buffer passed into .write_init will be at least info->header_size bytes long; +if the whole bitstream is not immediately available then the core code will +buffer up at least this much before starting. + +The .write function writes a buffer to the FPGA. The buffer may be contain the +whole FPGA image or may be a smaller chunk of an FPGA image. In the latter +case, this function is called multiple times for successive chunks. This interface +is suitable for drivers which use PIO. + +The .write_sg version behaves the same as .write except the input is a sg_table +scatter list. This interface is suitable for drivers which use DMA. + +The .write_complete function is called after all the image has been written +to put the FPGA into operating mode. + +The ops include a .state function which will determine the state the FPGA is in +and return a code of type enum fpga_mgr_states. It doesn't result in a change +in state. + +API for implementing a new FPGA Manager driver +---------------------------------------------- + +* ``fpga_mgr_states`` - Values for :c:expr:`fpga_manager->state`. +* struct fpga_manager - the FPGA manager struct +* struct fpga_manager_ops - Low level FPGA manager driver ops +* struct fpga_manager_info - Parameter structure for fpga_mgr_register_full() +* fpga_mgr_register_full() - Create and register an FPGA manager using the + fpga_mgr_info structure to provide the full flexibility of options +* fpga_mgr_register() - Create and register an FPGA manager using standard + arguments +* devm_fpga_mgr_register_full() - Resource managed version of + fpga_mgr_register_full() +* devm_fpga_mgr_register() - Resource managed version of fpga_mgr_register() +* fpga_mgr_unregister() - Unregister an FPGA manager + +.. kernel-doc:: include/linux/fpga/fpga-mgr.h + :functions: fpga_mgr_states + +.. kernel-doc:: include/linux/fpga/fpga-mgr.h + :functions: fpga_manager + +.. kernel-doc:: include/linux/fpga/fpga-mgr.h + :functions: fpga_manager_ops + +.. kernel-doc:: include/linux/fpga/fpga-mgr.h + :functions: fpga_manager_info + +.. kernel-doc:: drivers/fpga/fpga-mgr.c + :functions: fpga_mgr_register_full + +.. kernel-doc:: drivers/fpga/fpga-mgr.c + :functions: fpga_mgr_register + +.. kernel-doc:: drivers/fpga/fpga-mgr.c + :functions: devm_fpga_mgr_register_full + +.. kernel-doc:: drivers/fpga/fpga-mgr.c + :functions: devm_fpga_mgr_register + +.. kernel-doc:: drivers/fpga/fpga-mgr.c + :functions: fpga_mgr_unregister diff --git a/Documentation/driver-api/fpga/fpga-programming.rst b/Documentation/driver-api/fpga/fpga-programming.rst new file mode 100644 index 000000000..fb4da4240 --- /dev/null +++ b/Documentation/driver-api/fpga/fpga-programming.rst @@ -0,0 +1,107 @@ +In-kernel API for FPGA Programming +================================== + +Overview +-------- + +The in-kernel API for FPGA programming is a combination of APIs from +FPGA manager, bridge, and regions. The actual function used to +trigger FPGA programming is fpga_region_program_fpga(). + +fpga_region_program_fpga() uses functionality supplied by +the FPGA manager and bridges. It will: + + * lock the region's mutex + * lock the mutex of the region's FPGA manager + * build a list of FPGA bridges if a method has been specified to do so + * disable the bridges + * program the FPGA using info passed in :c:expr:`fpga_region->info`. + * re-enable the bridges + * release the locks + +The struct fpga_image_info specifies what FPGA image to program. It is +allocated/freed by fpga_image_info_alloc() and freed with +fpga_image_info_free() + +How to program an FPGA using a region +------------------------------------- + +When the FPGA region driver probed, it was given a pointer to an FPGA manager +driver so it knows which manager to use. The region also either has a list of +bridges to control during programming or it has a pointer to a function that +will generate that list. Here's some sample code of what to do next:: + + #include + #include + + struct fpga_image_info *info; + int ret; + + /* + * First, alloc the struct with information about the FPGA image to + * program. + */ + info = fpga_image_info_alloc(dev); + if (!info) + return -ENOMEM; + + /* Set flags as needed, such as: */ + info->flags = FPGA_MGR_PARTIAL_RECONFIG; + + /* + * Indicate where the FPGA image is. This is pseudo-code; you're + * going to use one of these three. + */ + if (image is in a scatter gather table) { + + info->sgt = [your scatter gather table] + + } else if (image is in a buffer) { + + info->buf = [your image buffer] + info->count = [image buffer size] + + } else if (image is in a firmware file) { + + info->firmware_name = devm_kstrdup(dev, firmware_name, + GFP_KERNEL); + + } + + /* Add info to region and do the programming */ + region->info = info; + ret = fpga_region_program_fpga(region); + + /* Deallocate the image info if you're done with it */ + region->info = NULL; + fpga_image_info_free(info); + + if (ret) + return ret; + + /* Now enumerate whatever hardware has appeared in the FPGA. */ + +API for programming an FPGA +--------------------------- + +* fpga_region_program_fpga() - Program an FPGA +* fpga_image_info() - Specifies what FPGA image to program +* fpga_image_info_alloc() - Allocate an FPGA image info struct +* fpga_image_info_free() - Free an FPGA image info struct + +.. kernel-doc:: drivers/fpga/fpga-region.c + :functions: fpga_region_program_fpga + +FPGA Manager flags + +.. kernel-doc:: include/linux/fpga/fpga-mgr.h + :doc: FPGA Manager flags + +.. kernel-doc:: include/linux/fpga/fpga-mgr.h + :functions: fpga_image_info + +.. kernel-doc:: drivers/fpga/fpga-mgr.c + :functions: fpga_image_info_alloc + +.. kernel-doc:: drivers/fpga/fpga-mgr.c + :functions: fpga_image_info_free diff --git a/Documentation/driver-api/fpga/fpga-region.rst b/Documentation/driver-api/fpga/fpga-region.rst new file mode 100644 index 000000000..dc55d60a0 --- /dev/null +++ b/Documentation/driver-api/fpga/fpga-region.rst @@ -0,0 +1,109 @@ +FPGA Region +=========== + +Overview +-------- + +This document is meant to be a brief overview of the FPGA region API usage. A +more conceptual look at regions can be found in the Device Tree binding +document [#f1]_. + +For the purposes of this API document, let's just say that a region associates +an FPGA Manager and a bridge (or bridges) with a reprogrammable region of an +FPGA or the whole FPGA. The API provides a way to register a region and to +program a region. + +Currently the only layer above fpga-region.c in the kernel is the Device Tree +support (of-fpga-region.c) described in [#f1]_. The DT support layer uses regions +to program the FPGA and then DT to handle enumeration. The common region code +is intended to be used by other schemes that have other ways of accomplishing +enumeration after programming. + +An fpga-region can be set up to know the following things: + + * which FPGA manager to use to do the programming + + * which bridges to disable before programming and enable afterwards. + +Additional info needed to program the FPGA image is passed in the struct +fpga_image_info including: + + * pointers to the image as either a scatter-gather buffer, a contiguous + buffer, or the name of firmware file + + * flags indicating specifics such as whether the image is for partial + reconfiguration. + +How to add a new FPGA region +---------------------------- + +An example of usage can be seen in the probe function of [#f2]_. + +.. [#f1] ../devicetree/bindings/fpga/fpga-region.txt +.. [#f2] ../../drivers/fpga/of-fpga-region.c + +API to add a new FPGA region +---------------------------- + +* struct fpga_region - The FPGA region struct +* struct fpga_region_info - Parameter structure for fpga_region_register_full() +* fpga_region_register_full() - Create and register an FPGA region using the + fpga_region_info structure to provide the full flexibility of options +* fpga_region_register() - Create and register an FPGA region using standard + arguments +* fpga_region_unregister() - Unregister an FPGA region + +The FPGA region's probe function will need to get a reference to the FPGA +Manager it will be using to do the programming. This usually would happen +during the region's probe function. + +* fpga_mgr_get() - Get a reference to an FPGA manager, raise ref count +* of_fpga_mgr_get() - Get a reference to an FPGA manager, raise ref count, + given a device node. +* fpga_mgr_put() - Put an FPGA manager + +The FPGA region will need to specify which bridges to control while programming +the FPGA. The region driver can build a list of bridges during probe time +(:c:expr:`fpga_region->bridge_list`) or it can have a function that creates +the list of bridges to program just before programming +(:c:expr:`fpga_region->get_bridges`). The FPGA bridge framework supplies the +following APIs to handle building or tearing down that list. + +* fpga_bridge_get_to_list() - Get a ref of an FPGA bridge, add it to a + list +* of_fpga_bridge_get_to_list() - Get a ref of an FPGA bridge, add it to a + list, given a device node +* fpga_bridges_put() - Given a list of bridges, put them + +.. kernel-doc:: include/linux/fpga/fpga-region.h + :functions: fpga_region + +.. kernel-doc:: include/linux/fpga/fpga-region.h + :functions: fpga_region_info + +.. kernel-doc:: drivers/fpga/fpga-region.c + :functions: fpga_region_register_full + +.. kernel-doc:: drivers/fpga/fpga-region.c + :functions: fpga_region_register + +.. kernel-doc:: drivers/fpga/fpga-region.c + :functions: fpga_region_unregister + +.. kernel-doc:: drivers/fpga/fpga-mgr.c + :functions: fpga_mgr_get + +.. kernel-doc:: drivers/fpga/fpga-mgr.c + :functions: of_fpga_mgr_get + +.. kernel-doc:: drivers/fpga/fpga-mgr.c + :functions: fpga_mgr_put + +.. kernel-doc:: drivers/fpga/fpga-bridge.c + :functions: fpga_bridge_get_to_list + +.. kernel-doc:: drivers/fpga/fpga-bridge.c + :functions: of_fpga_bridge_get_to_list + +.. kernel-doc:: drivers/fpga/fpga-bridge.c + :functions: fpga_bridges_put diff --git a/Documentation/driver-api/fpga/index.rst b/Documentation/driver-api/fpga/index.rst new file mode 100644 index 000000000..31a4773bd --- /dev/null +++ b/Documentation/driver-api/fpga/index.rst @@ -0,0 +1,15 @@ +============== +FPGA Subsystem +============== + +:Author: Alan Tull + +.. toctree:: + :maxdepth: 2 + + intro + fpga-mgr + fpga-bridge + fpga-region + fpga-programming + diff --git a/Documentation/driver-api/fpga/intro.rst b/Documentation/driver-api/fpga/intro.rst new file mode 100644 index 000000000..f54c7dabc --- /dev/null +++ b/Documentation/driver-api/fpga/intro.rst @@ -0,0 +1,54 @@ +Introduction +============ + +The FPGA subsystem supports reprogramming FPGAs dynamically under +Linux. Some of the core intentions of the FPGA subsystems are: + +* The FPGA subsystem is vendor agnostic. + +* The FPGA subsystem separates upper layers (userspace interfaces and + enumeration) from lower layers that know how to program a specific + FPGA. + +* Code should not be shared between upper and lower layers. This + should go without saying. If that seems necessary, there's probably + framework functionality that can be added that will benefit + other users. Write the linux-fpga mailing list and maintainers and + seek out a solution that expands the framework for broad reuse. + +* Generally, when adding code, think of the future. Plan for reuse. + +The framework in the kernel is divided into: + +FPGA Manager +------------ + +If you are adding a new FPGA or a new method of programming an FPGA, +this is the subsystem for you. Low level FPGA manager drivers contain +the knowledge of how to program a specific device. This subsystem +includes the framework in fpga-mgr.c and the low level drivers that +are registered with it. + +FPGA Bridge +----------- + +FPGA Bridges prevent spurious signals from going out of an FPGA or a +region of an FPGA during programming. They are disabled before +programming begins and re-enabled afterwards. An FPGA bridge may be +actual hard hardware that gates a bus to a CPU or a soft ("freeze") +bridge in FPGA fabric that surrounds a partial reconfiguration region +of an FPGA. This subsystem includes fpga-bridge.c and the low level +drivers that are registered with it. + +FPGA Region +----------- + +If you are adding a new interface to the FPGA framework, add it on top +of an FPGA region. + +The FPGA Region framework (fpga-region.c) associates managers and +bridges as reconfigurable regions. A region may refer to the whole +FPGA in full reconfiguration or to a partial reconfiguration region. + +The Device Tree FPGA Region support (of-fpga-region.c) handles +reprogramming FPGAs when device tree overlays are applied. diff --git a/Documentation/driver-api/frame-buffer.rst b/Documentation/driver-api/frame-buffer.rst new file mode 100644 index 000000000..9dd3060f0 --- /dev/null +++ b/Documentation/driver-api/frame-buffer.rst @@ -0,0 +1,62 @@ +Frame Buffer Library +==================== + +The frame buffer drivers depend heavily on four data structures. These +structures are declared in include/linux/fb.h. They are fb_info, +fb_var_screeninfo, fb_fix_screeninfo and fb_monospecs. The last +three can be made available to and from userland. + +fb_info defines the current state of a particular video card. Inside +fb_info, there exists a fb_ops structure which is a collection of +needed functions to make fbdev and fbcon work. fb_info is only visible +to the kernel. + +fb_var_screeninfo is used to describe the features of a video card +that are user defined. With fb_var_screeninfo, things such as depth +and the resolution may be defined. + +The next structure is fb_fix_screeninfo. This defines the properties +of a card that are created when a mode is set and can't be changed +otherwise. A good example of this is the start of the frame buffer +memory. This "locks" the address of the frame buffer memory, so that it +cannot be changed or moved. + +The last structure is fb_monospecs. In the old API, there was little +importance for fb_monospecs. This allowed for forbidden things such as +setting a mode of 800x600 on a fix frequency monitor. With the new API, +fb_monospecs prevents such things, and if used correctly, can prevent a +monitor from being cooked. fb_monospecs will not be useful until +kernels 2.5.x. + +Frame Buffer Memory +------------------- + +.. kernel-doc:: drivers/video/fbdev/core/fbmem.c + :export: + +Frame Buffer Colormap +--------------------- + +.. kernel-doc:: drivers/video/fbdev/core/fbcmap.c + :export: + +Frame Buffer Video Mode Database +-------------------------------- + +.. kernel-doc:: drivers/video/fbdev/core/modedb.c + :internal: + +.. kernel-doc:: drivers/video/fbdev/core/modedb.c + :export: + +Frame Buffer Macintosh Video Mode Database +------------------------------------------ + +.. kernel-doc:: drivers/video/fbdev/macmodes.c + :export: + +Frame Buffer Fonts +------------------ + +Refer to the file lib/fonts/fonts.c for more information. + diff --git a/Documentation/driver-api/generic-counter.rst b/Documentation/driver-api/generic-counter.rst new file mode 100644 index 000000000..71ccc30e5 --- /dev/null +++ b/Documentation/driver-api/generic-counter.rst @@ -0,0 +1,573 @@ +.. SPDX-License-Identifier: GPL-2.0 + +========================= +Generic Counter Interface +========================= + +Introduction +============ + +Counter devices are prevalent among a diverse spectrum of industries. +The ubiquitous presence of these devices necessitates a common interface +and standard of interaction and exposure. This driver API attempts to +resolve the issue of duplicate code found among existing counter device +drivers by introducing a generic counter interface for consumption. The +Generic Counter interface enables drivers to support and expose a common +set of components and functionality present in counter devices. + +Theory +====== + +Counter devices can vary greatly in design, but regardless of whether +some devices are quadrature encoder counters or tally counters, all +counter devices consist of a core set of components. This core set of +components, shared by all counter devices, is what forms the essence of +the Generic Counter interface. + +There are three core components to a counter: + +* Signal: + Stream of data to be evaluated by the counter. + +* Synapse: + Association of a Signal, and evaluation trigger, with a Count. + +* Count: + Accumulation of the effects of connected Synapses. + +SIGNAL +------ +A Signal represents a stream of data. This is the input data that is +evaluated by the counter to determine the count data; e.g. a quadrature +signal output line of a rotary encoder. Not all counter devices provide +user access to the Signal data, so exposure is optional for drivers. + +When the Signal data is available for user access, the Generic Counter +interface provides the following available signal values: + +* SIGNAL_LOW: + Signal line is in a low state. + +* SIGNAL_HIGH: + Signal line is in a high state. + +A Signal may be associated with one or more Counts. + +SYNAPSE +------- +A Synapse represents the association of a Signal with a Count. Signal +data affects respective Count data, and the Synapse represents this +relationship. + +The Synapse action mode specifies the Signal data condition that +triggers the respective Count's count function evaluation to update the +count data. The Generic Counter interface provides the following +available action modes: + +* None: + Signal does not trigger the count function. In Pulse-Direction count + function mode, this Signal is evaluated as Direction. + +* Rising Edge: + Low state transitions to high state. + +* Falling Edge: + High state transitions to low state. + +* Both Edges: + Any state transition. + +A counter is defined as a set of input signals associated with count +data that are generated by the evaluation of the state of the associated +input signals as defined by the respective count functions. Within the +context of the Generic Counter interface, a counter consists of Counts +each associated with a set of Signals, whose respective Synapse +instances represent the count function update conditions for the +associated Counts. + +A Synapse associates one Signal with one Count. + +COUNT +----- +A Count represents the accumulation of the effects of connected +Synapses; i.e. the count data for a set of Signals. The Generic +Counter interface represents the count data as a natural number. + +A Count has a count function mode which represents the update behavior +for the count data. The Generic Counter interface provides the following +available count function modes: + +* Increase: + Accumulated count is incremented. + +* Decrease: + Accumulated count is decremented. + +* Pulse-Direction: + Rising edges on signal A updates the respective count. The input level + of signal B determines direction. + +* Quadrature: + A pair of quadrature encoding signals are evaluated to determine + position and direction. The following Quadrature modes are available: + + - x1 A: + If direction is forward, rising edges on quadrature pair signal A + updates the respective count; if the direction is backward, falling + edges on quadrature pair signal A updates the respective count. + Quadrature encoding determines the direction. + + - x1 B: + If direction is forward, rising edges on quadrature pair signal B + updates the respective count; if the direction is backward, falling + edges on quadrature pair signal B updates the respective count. + Quadrature encoding determines the direction. + + - x2 A: + Any state transition on quadrature pair signal A updates the + respective count. Quadrature encoding determines the direction. + + - x2 B: + Any state transition on quadrature pair signal B updates the + respective count. Quadrature encoding determines the direction. + + - x4: + Any state transition on either quadrature pair signals updates the + respective count. Quadrature encoding determines the direction. + +A Count has a set of one or more associated Synapses. + +Paradigm +======== + +The most basic counter device may be expressed as a single Count +associated with a single Signal via a single Synapse. Take for example +a counter device which simply accumulates a count of rising edges on a +source input line:: + + Count Synapse Signal + ----- ------- ------ + +---------------------+ + | Data: Count | Rising Edge ________ + | Function: Increase | <------------- / Source \ + | | ____________ + +---------------------+ + +In this example, the Signal is a source input line with a pulsing +voltage, while the Count is a persistent count value which is repeatedly +incremented. The Signal is associated with the respective Count via a +Synapse. The increase function is triggered by the Signal data condition +specified by the Synapse -- in this case a rising edge condition on the +voltage input line. In summary, the counter device existence and +behavior is aptly represented by respective Count, Signal, and Synapse +components: a rising edge condition triggers an increase function on an +accumulating count datum. + +A counter device is not limited to a single Signal; in fact, in theory +many Signals may be associated with even a single Count. For example, a +quadrature encoder counter device can keep track of position based on +the states of two input lines:: + + Count Synapse Signal + ----- ------- ------ + +-------------------------+ + | Data: Position | Both Edges ___ + | Function: Quadrature x4 | <------------ / A \ + | | _______ + | | + | | Both Edges ___ + | | <------------ / B \ + | | _______ + +-------------------------+ + +In this example, two Signals (quadrature encoder lines A and B) are +associated with a single Count: a rising or falling edge on either A or +B triggers the "Quadrature x4" function which determines the direction +of movement and updates the respective position data. The "Quadrature +x4" function is likely implemented in the hardware of the quadrature +encoder counter device; the Count, Signals, and Synapses simply +represent this hardware behavior and functionality. + +Signals associated with the same Count can have differing Synapse action +mode conditions. For example, a quadrature encoder counter device +operating in a non-quadrature Pulse-Direction mode could have one input +line dedicated for movement and a second input line dedicated for +direction:: + + Count Synapse Signal + ----- ------- ------ + +---------------------------+ + | Data: Position | Rising Edge ___ + | Function: Pulse-Direction | <------------- / A \ (Movement) + | | _______ + | | + | | None ___ + | | <------------- / B \ (Direction) + | | _______ + +---------------------------+ + +Only Signal A triggers the "Pulse-Direction" update function, but the +instantaneous state of Signal B is still required in order to know the +direction so that the position data may be properly updated. Ultimately, +both Signals are associated with the same Count via two respective +Synapses, but only one Synapse has an active action mode condition which +triggers the respective count function while the other is left with a +"None" condition action mode to indicate its respective Signal's +availability for state evaluation despite its non-triggering mode. + +Keep in mind that the Signal, Synapse, and Count are abstract +representations which do not need to be closely married to their +respective physical sources. This allows the user of a counter to +divorce themselves from the nuances of physical components (such as +whether an input line is differential or single-ended) and instead focus +on the core idea of what the data and process represent (e.g. position +as interpreted from quadrature encoding data). + +Driver API +========== + +Driver authors may utilize the Generic Counter interface in their code +by including the include/linux/counter.h header file. This header file +provides several core data structures, function prototypes, and macros +for defining a counter device. + +.. kernel-doc:: include/linux/counter.h + :internal: + +.. kernel-doc:: drivers/counter/counter-core.c + :export: + +.. kernel-doc:: drivers/counter/counter-chrdev.c + :export: + +Driver Implementation +===================== + +To support a counter device, a driver must first allocate the available +Counter Signals via counter_signal structures. These Signals should +be stored as an array and set to the signals array member of an +allocated counter_device structure before the Counter is registered to +the system. + +Counter Counts may be allocated via counter_count structures, and +respective Counter Signal associations (Synapses) made via +counter_synapse structures. Associated counter_synapse structures are +stored as an array and set to the synapses array member of the +respective counter_count structure. These counter_count structures are +set to the counts array member of an allocated counter_device structure +before the Counter is registered to the system. + +Driver callbacks must be provided to the counter_device structure in +order to communicate with the device: to read and write various Signals +and Counts, and to set and get the "action mode" and "function mode" for +various Synapses and Counts respectively. + +A counter_device structure is allocated using counter_alloc() and then +registered to the system by passing it to the counter_add() function, and +unregistered by passing it to the counter_unregister function. There are +device managed variants of these functions: devm_counter_alloc() and +devm_counter_add(). + +The struct counter_comp structure is used to define counter extensions +for Signals, Synapses, and Counts. + +The "type" member specifies the type of high-level data (e.g. BOOL, +COUNT_DIRECTION, etc.) handled by this extension. The "``*_read``" and +"``*_write``" members can then be set by the counter device driver with +callbacks to handle that data using native C data types (i.e. u8, u64, +etc.). + +Convenience macros such as ``COUNTER_COMP_COUNT_U64`` are provided for +use by driver authors. In particular, driver authors are expected to use +the provided macros for standard Counter subsystem attributes in order +to maintain a consistent interface for userspace. For example, a counter +device driver may define several standard attributes like so:: + + struct counter_comp count_ext[] = { + COUNTER_COMP_DIRECTION(count_direction_read), + COUNTER_COMP_ENABLE(count_enable_read, count_enable_write), + COUNTER_COMP_CEILING(count_ceiling_read, count_ceiling_write), + }; + +This makes it simple to see, add, and modify the attributes that are +supported by this driver ("direction", "enable", and "ceiling") and to +maintain this code without getting lost in a web of struct braces. + +Callbacks must match the function type expected for the respective +component or extension. These function types are defined in the struct +counter_comp structure as the "``*_read``" and "``*_write``" union +members. + +The corresponding callback prototypes for the extensions mentioned in +the previous example above would be:: + + int count_direction_read(struct counter_device *counter, + struct counter_count *count, + enum counter_count_direction *direction); + int count_enable_read(struct counter_device *counter, + struct counter_count *count, u8 *enable); + int count_enable_write(struct counter_device *counter, + struct counter_count *count, u8 enable); + int count_ceiling_read(struct counter_device *counter, + struct counter_count *count, u64 *ceiling); + int count_ceiling_write(struct counter_device *counter, + struct counter_count *count, u64 ceiling); + +Determining the type of extension to create is a matter of scope. + +* Signal extensions are attributes that expose information/control + specific to a Signal. These types of attributes will exist under a + Signal's directory in sysfs. + + For example, if you have an invert feature for a Signal, you can have + a Signal extension called "invert" that toggles that feature: + /sys/bus/counter/devices/counterX/signalY/invert + +* Count extensions are attributes that expose information/control + specific to a Count. These type of attributes will exist under a + Count's directory in sysfs. + + For example, if you want to pause/unpause a Count from updating, you + can have a Count extension called "enable" that toggles such: + /sys/bus/counter/devices/counterX/countY/enable + +* Device extensions are attributes that expose information/control + non-specific to a particular Count or Signal. This is where you would + put your global features or other miscellaneous functionality. + + For example, if your device has an overtemp sensor, you can report the + chip overheated via a device extension called "error_overtemp": + /sys/bus/counter/devices/counterX/error_overtemp + +Subsystem Architecture +====================== + +Counter drivers pass and take data natively (i.e. ``u8``, ``u64``, etc.) +and the shared counter module handles the translation between the sysfs +interface. This guarantees a standard userspace interface for all +counter drivers, and enables a Generic Counter chrdev interface via a +generalized device driver ABI. + +A high-level view of how a count value is passed down from a counter +driver is exemplified by the following. The driver callbacks are first +registered to the Counter core component for use by the Counter +userspace interface components:: + + Driver callbacks registration: + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +----------------------------+ + | Counter device driver | + +----------------------------+ + | Processes data from device | + +----------------------------+ + | + ------------------- + / driver callbacks / + ------------------- + | + V + +----------------------+ + | Counter core | + +----------------------+ + | Routes device driver | + | callbacks to the | + | userspace interfaces | + +----------------------+ + | + ------------------- + / driver callbacks / + ------------------- + | + +---------------+---------------+ + | | + V V + +--------------------+ +---------------------+ + | Counter sysfs | | Counter chrdev | + +--------------------+ +---------------------+ + | Translates to the | | Translates to the | + | standard Counter | | standard Counter | + | sysfs output | | character device | + +--------------------+ +---------------------+ + +Thereafter, data can be transferred directly between the Counter device +driver and Counter userspace interface:: + + Count data request: + ~~~~~~~~~~~~~~~~~~~ + ---------------------- + / Counter device \ + +----------------------+ + | Count register: 0x28 | + +----------------------+ + | + ----------------- + / raw count data / + ----------------- + | + V + +----------------------------+ + | Counter device driver | + +----------------------------+ + | Processes data from device | + |----------------------------| + | Type: u64 | + | Value: 42 | + +----------------------------+ + | + ---------- + / u64 / + ---------- + | + +---------------+---------------+ + | | + V V + +--------------------+ +---------------------+ + | Counter sysfs | | Counter chrdev | + +--------------------+ +---------------------+ + | Translates to the | | Translates to the | + | standard Counter | | standard Counter | + | sysfs output | | character device | + |--------------------| |---------------------| + | Type: const char * | | Type: u64 | + | Value: "42" | | Value: 42 | + +--------------------+ +---------------------+ + | | + --------------- ----------------------- + / const char * / / struct counter_event / + --------------- ----------------------- + | | + | V + | +-----------+ + | | read | + | +-----------+ + | \ Count: 42 / + | ----------- + | + V + +--------------------------------------------------+ + | `/sys/bus/counter/devices/counterX/countY/count` | + +--------------------------------------------------+ + \ Count: "42" / + -------------------------------------------------- + +There are four primary components involved: + +Counter device driver +--------------------- +Communicates with the hardware device to read/write data; e.g. counter +drivers for quadrature encoders, timers, etc. + +Counter core +------------ +Registers the counter device driver to the system so that the respective +callbacks are called during userspace interaction. + +Counter sysfs +------------- +Translates counter data to the standard Counter sysfs interface format +and vice versa. + +Please refer to the ``Documentation/ABI/testing/sysfs-bus-counter`` file +for a detailed breakdown of the available Generic Counter interface +sysfs attributes. + +Counter chrdev +-------------- +Translates Counter events to the standard Counter character device; data +is transferred via standard character device read calls, while Counter +events are configured via ioctl calls. + +Sysfs Interface +=============== + +Several sysfs attributes are generated by the Generic Counter interface, +and reside under the ``/sys/bus/counter/devices/counterX`` directory, +where ``X`` is to the respective counter device id. Please see +``Documentation/ABI/testing/sysfs-bus-counter`` for detailed information +on each Generic Counter interface sysfs attribute. + +Through these sysfs attributes, programs and scripts may interact with +the Generic Counter paradigm Counts, Signals, and Synapses of respective +counter devices. + +Counter Character Device +======================== + +Counter character device nodes are created under the ``/dev`` directory +as ``counterX``, where ``X`` is the respective counter device id. +Defines for the standard Counter data types are exposed via the +userspace ``include/uapi/linux/counter.h`` file. + +Counter events +-------------- +Counter device drivers can support Counter events by utilizing the +``counter_push_event`` function:: + + void counter_push_event(struct counter_device *const counter, const u8 event, + const u8 channel); + +The event id is specified by the ``event`` parameter; the event channel +id is specified by the ``channel`` parameter. When this function is +called, the Counter data associated with the respective event is +gathered, and a ``struct counter_event`` is generated for each datum and +pushed to userspace. + +Counter events can be configured by users to report various Counter +data of interest. This can be conceptualized as a list of Counter +component read calls to perform. For example: + + +------------------------+------------------------+ + | COUNTER_EVENT_OVERFLOW | COUNTER_EVENT_INDEX | + +========================+========================+ + | Channel 0 | Channel 0 | + +------------------------+------------------------+ + | * Count 0 | * Signal 0 | + | * Count 1 | * Signal 0 Extension 0 | + | * Signal 3 | * Extension 4 | + | * Count 4 Extension 2 +------------------------+ + | * Signal 5 Extension 0 | Channel 1 | + | +------------------------+ + | | * Signal 4 | + | | * Signal 4 Extension 0 | + | | * Count 7 | + +------------------------+------------------------+ + +When ``counter_push_event(counter, COUNTER_EVENT_INDEX, 1)`` is called +for example, it will go down the list for the ``COUNTER_EVENT_INDEX`` +event channel 1 and execute the read callbacks for Signal 4, Signal 4 +Extension 0, and Count 7 -- the data returned for each is pushed to a +kfifo as a ``struct counter_event``, which userspace can retrieve via a +standard read operation on the respective character device node. + +Userspace +--------- +Userspace applications can configure Counter events via ioctl operations +on the Counter character device node. There following ioctl codes are +supported and provided by the ``linux/counter.h`` userspace header file: + +* :c:macro:`COUNTER_ADD_WATCH_IOCTL` + +* :c:macro:`COUNTER_ENABLE_EVENTS_IOCTL` + +* :c:macro:`COUNTER_DISABLE_EVENTS_IOCTL` + +To configure events to gather Counter data, users first populate a +``struct counter_watch`` with the relevant event id, event channel id, +and the information for the desired Counter component from which to +read, and then pass it via the ``COUNTER_ADD_WATCH_IOCTL`` ioctl +command. + +Note that an event can be watched without gathering Counter data by +setting the ``component.type`` member equal to +``COUNTER_COMPONENT_NONE``. With this configuration the Counter +character device will simply populate the event timestamps for those +respective ``struct counter_event`` elements and ignore the component +value. + +The ``COUNTER_ADD_WATCH_IOCTL`` command will buffer these Counter +watches. When ready, the ``COUNTER_ENABLE_EVENTS_IOCTL`` ioctl command +may be used to activate these Counter watches. + +Userspace applications can then execute a ``read`` operation (optionally +calling ``poll`` first) on the Counter character device node to retrieve +``struct counter_event`` elements with the desired data. diff --git a/Documentation/driver-api/gpio/board.rst b/Documentation/driver-api/gpio/board.rst new file mode 100644 index 000000000..b33aa04f2 --- /dev/null +++ b/Documentation/driver-api/gpio/board.rst @@ -0,0 +1,222 @@ +============= +GPIO Mappings +============= + +This document explains how GPIOs can be assigned to given devices and functions. + +Note that it only applies to the new descriptor-based interface. For a +description of the deprecated integer-based GPIO interface please refer to +legacy.rst (actually, there is no real mapping possible with the old +interface; you just fetch an integer from somewhere and request the +corresponding GPIO). + +All platforms can enable the GPIO library, but if the platform strictly +requires GPIO functionality to be present, it needs to select GPIOLIB from its +Kconfig. Then, how GPIOs are mapped depends on what the platform uses to +describe its hardware layout. Currently, mappings can be defined through device +tree, ACPI, and platform data. + +Device Tree +----------- +GPIOs can easily be mapped to devices and functions in the device tree. The +exact way to do it depends on the GPIO controller providing the GPIOs, see the +device tree bindings for your controller. + +GPIOs mappings are defined in the consumer device's node, in a property named +-gpios, where is the function the driver will request +through gpiod_get(). For example:: + + foo_device { + compatible = "acme,foo"; + ... + led-gpios = <&gpio 15 GPIO_ACTIVE_HIGH>, /* red */ + <&gpio 16 GPIO_ACTIVE_HIGH>, /* green */ + <&gpio 17 GPIO_ACTIVE_HIGH>; /* blue */ + + power-gpios = <&gpio 1 GPIO_ACTIVE_LOW>; + }; + +Properties named -gpio are also considered valid and old bindings use +it but are only supported for compatibility reasons and should not be used for +newer bindings since it has been deprecated. + +This property will make GPIOs 15, 16 and 17 available to the driver under the +"led" function, and GPIO 1 as the "power" GPIO:: + + struct gpio_desc *red, *green, *blue, *power; + + red = gpiod_get_index(dev, "led", 0, GPIOD_OUT_HIGH); + green = gpiod_get_index(dev, "led", 1, GPIOD_OUT_HIGH); + blue = gpiod_get_index(dev, "led", 2, GPIOD_OUT_HIGH); + + power = gpiod_get(dev, "power", GPIOD_OUT_HIGH); + +The led GPIOs will be active high, while the power GPIO will be active low (i.e. +gpiod_is_active_low(power) will be true). + +The second parameter of the gpiod_get() functions, the con_id string, has to be +the -prefix of the GPIO suffixes ("gpios" or "gpio", automatically +looked up by the gpiod functions internally) used in the device tree. With above +"led-gpios" example, use the prefix without the "-" as con_id parameter: "led". + +Internally, the GPIO subsystem prefixes the GPIO suffix ("gpios" or "gpio") +with the string passed in con_id to get the resulting string +(``snprintf(... "%s-%s", con_id, gpio_suffixes[]``). + +ACPI +---- +ACPI also supports function names for GPIOs in a similar fashion to DT. +The above DT example can be converted to an equivalent ACPI description +with the help of _DSD (Device Specific Data), introduced in ACPI 5.1:: + + Device (FOO) { + Name (_CRS, ResourceTemplate () { + GpioIo (Exclusive, PullUp, 0, 0, IoRestrictionOutputOnly, + "\\_SB.GPI0", 0, ResourceConsumer) { 15 } // red + GpioIo (Exclusive, PullUp, 0, 0, IoRestrictionOutputOnly, + "\\_SB.GPI0", 0, ResourceConsumer) { 16 } // green + GpioIo (Exclusive, PullUp, 0, 0, IoRestrictionOutputOnly, + "\\_SB.GPI0", 0, ResourceConsumer) { 17 } // blue + GpioIo (Exclusive, PullNone, 0, 0, IoRestrictionOutputOnly, + "\\_SB.GPI0", 0, ResourceConsumer) { 1 } // power + }) + + Name (_DSD, Package () { + ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), + Package () { + Package () { + "led-gpios", + Package () { + ^FOO, 0, 0, 1, + ^FOO, 1, 0, 1, + ^FOO, 2, 0, 1, + } + }, + Package () { "power-gpios", Package () { ^FOO, 3, 0, 0 } }, + } + }) + } + +For more information about the ACPI GPIO bindings see +Documentation/firmware-guide/acpi/gpio-properties.rst. + +Platform Data +------------- +Finally, GPIOs can be bound to devices and functions using platform data. Board +files that desire to do so need to include the following header:: + + #include + +GPIOs are mapped by the means of tables of lookups, containing instances of the +gpiod_lookup structure. Two macros are defined to help declaring such mappings:: + + GPIO_LOOKUP(key, chip_hwnum, con_id, flags) + GPIO_LOOKUP_IDX(key, chip_hwnum, con_id, idx, flags) + +where + + - key is either the label of the gpiod_chip instance providing the GPIO, or + the GPIO line name + - chip_hwnum is the hardware number of the GPIO within the chip, or U16_MAX + to indicate that key is a GPIO line name + - con_id is the name of the GPIO function from the device point of view. It + can be NULL, in which case it will match any function. + - idx is the index of the GPIO within the function. + - flags is defined to specify the following properties: + * GPIO_ACTIVE_HIGH - GPIO line is active high + * GPIO_ACTIVE_LOW - GPIO line is active low + * GPIO_OPEN_DRAIN - GPIO line is set up as open drain + * GPIO_OPEN_SOURCE - GPIO line is set up as open source + * GPIO_PERSISTENT - GPIO line is persistent during + suspend/resume and maintains its value + * GPIO_TRANSITORY - GPIO line is transitory and may loose its + electrical state during suspend/resume + +In the future, these flags might be extended to support more properties. + +Note that: + 1. GPIO line names are not guaranteed to be globally unique, so the first + match found will be used. + 2. GPIO_LOOKUP() is just a shortcut to GPIO_LOOKUP_IDX() where idx = 0. + +A lookup table can then be defined as follows, with an empty entry defining its +end. The 'dev_id' field of the table is the identifier of the device that will +make use of these GPIOs. It can be NULL, in which case it will be matched for +calls to gpiod_get() with a NULL device. + +.. code-block:: c + + struct gpiod_lookup_table gpios_table = { + .dev_id = "foo.0", + .table = { + GPIO_LOOKUP_IDX("gpio.0", 15, "led", 0, GPIO_ACTIVE_HIGH), + GPIO_LOOKUP_IDX("gpio.0", 16, "led", 1, GPIO_ACTIVE_HIGH), + GPIO_LOOKUP_IDX("gpio.0", 17, "led", 2, GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio.0", 1, "power", GPIO_ACTIVE_LOW), + { }, + }, + }; + +And the table can be added by the board code as follows:: + + gpiod_add_lookup_table(&gpios_table); + +The driver controlling "foo.0" will then be able to obtain its GPIOs as follows:: + + struct gpio_desc *red, *green, *blue, *power; + + red = gpiod_get_index(dev, "led", 0, GPIOD_OUT_HIGH); + green = gpiod_get_index(dev, "led", 1, GPIOD_OUT_HIGH); + blue = gpiod_get_index(dev, "led", 2, GPIOD_OUT_HIGH); + + power = gpiod_get(dev, "power", GPIOD_OUT_HIGH); + +Since the "led" GPIOs are mapped as active-high, this example will switch their +signals to 1, i.e. enabling the LEDs. And for the "power" GPIO, which is mapped +as active-low, its actual signal will be 0 after this code. Contrary to the +legacy integer GPIO interface, the active-low property is handled during +mapping and is thus transparent to GPIO consumers. + +A set of functions such as gpiod_set_value() is available to work with +the new descriptor-oriented interface. + +Boards using platform data can also hog GPIO lines by defining GPIO hog tables. + +.. code-block:: c + + struct gpiod_hog gpio_hog_table[] = { + GPIO_HOG("gpio.0", 10, "foo", GPIO_ACTIVE_LOW, GPIOD_OUT_HIGH), + { } + }; + +And the table can be added to the board code as follows:: + + gpiod_add_hogs(gpio_hog_table); + +The line will be hogged as soon as the gpiochip is created or - in case the +chip was created earlier - when the hog table is registered. + +Arrays of pins +-------------- +In addition to requesting pins belonging to a function one by one, a device may +also request an array of pins assigned to the function. The way those pins are +mapped to the device determines if the array qualifies for fast bitmap +processing. If yes, a bitmap is passed over get/set array functions directly +between a caller and a respective .get/set_multiple() callback of a GPIO chip. + +In order to qualify for fast bitmap processing, the array must meet the +following requirements: + +- pin hardware number of array member 0 must also be 0, +- pin hardware numbers of consecutive array members which belong to the same + chip as member 0 does must also match their array indexes. + +Otherwise fast bitmap processing path is not used in order to avoid consecutive +pins which belong to the same chip but are not in hardware order being processed +separately. + +If the array applies for fast bitmap processing path, pins which belong to +different chips than member 0 does, as well as those with indexes different from +their hardware pin numbers, are excluded from the fast path, both input and +output. Moreover, open drain and open source pins are excluded from fast bitmap +output processing. diff --git a/Documentation/driver-api/gpio/bt8xxgpio.rst b/Documentation/driver-api/gpio/bt8xxgpio.rst new file mode 100644 index 000000000..d7e75f123 --- /dev/null +++ b/Documentation/driver-api/gpio/bt8xxgpio.rst @@ -0,0 +1,62 @@ +=================================================================== +A driver for a selfmade cheap BT8xx based PCI GPIO-card (bt8xxgpio) +=================================================================== + +For advanced documentation, see https://bues.ch/cms/unmaintained/btgpio.html + +A generic digital 24-port PCI GPIO card can be built out of an ordinary +Brooktree bt848, bt849, bt878 or bt879 based analog TV tuner card. The +Brooktree chip is used in old analog Hauppauge WinTV PCI cards. You can easily +find them used for low prices on the net. + +The bt8xx chip does have 24 digital GPIO ports. +These ports are accessible via 24 pins on the SMD chip package. + + +How to physically access the GPIO pins +====================================== + +The are several ways to access these pins. One might unsolder the whole chip +and put it on a custom PCI board, or one might only unsolder each individual +GPIO pin and solder that to some tiny wire. As the chip package really is tiny +there are some advanced soldering skills needed in any case. + +The physical pinouts are drawn in the following ASCII art. +The GPIO pins are marked with G00-G23:: + + G G G G G G G G G G G G G G G G G G + 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 + | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | + --------------------------------------------------------------------------- + --| ^ ^ |-- + --| pin 86 pin 67 |-- + --| |-- + --| pin 61 > |-- G18 + --| |-- G19 + --| |-- G20 + --| |-- G21 + --| |-- G22 + --| pin 56 > |-- G23 + --| |-- + --| Brooktree 878/879 |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| |-- + --| O |-- + --| |-- + --------------------------------------------------------------------------- + | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | + ^ + This is pin 1 + diff --git a/Documentation/driver-api/gpio/consumer.rst b/Documentation/driver-api/gpio/consumer.rst new file mode 100644 index 000000000..de6fc79ad --- /dev/null +++ b/Documentation/driver-api/gpio/consumer.rst @@ -0,0 +1,468 @@ +================================== +GPIO Descriptor Consumer Interface +================================== + +This document describes the consumer interface of the GPIO framework. Note that +it describes the new descriptor-based interface. For a description of the +deprecated integer-based GPIO interface please refer to legacy.rst. + + +Guidelines for GPIOs consumers +============================== + +Drivers that can't work without standard GPIO calls should have Kconfig entries +that depend on GPIOLIB or select GPIOLIB. The functions that allow a driver to +obtain and use GPIOs are available by including the following file:: + + #include + +There are static inline stubs for all functions in the header file in the case +where GPIOLIB is disabled. When these stubs are called they will emit +warnings. These stubs are used for two use cases: + +- Simple compile coverage with e.g. COMPILE_TEST - it does not matter that + the current platform does not enable or select GPIOLIB because we are not + going to execute the system anyway. + +- Truly optional GPIOLIB support - where the driver does not really make use + of the GPIOs on certain compile-time configurations for certain systems, but + will use it under other compile-time configurations. In this case the + consumer must make sure not to call into these functions, or the user will + be met with console warnings that may be perceived as intimidating. + +All the functions that work with the descriptor-based GPIO interface are +prefixed with ``gpiod_``. The ``gpio_`` prefix is used for the legacy +interface. No other function in the kernel should use these prefixes. The use +of the legacy functions is strongly discouraged, new code should use + and descriptors exclusively. + + +Obtaining and Disposing GPIOs +============================= + +With the descriptor-based interface, GPIOs are identified with an opaque, +non-forgeable handler that must be obtained through a call to one of the +gpiod_get() functions. Like many other kernel subsystems, gpiod_get() takes the +device that will use the GPIO and the function the requested GPIO is supposed to +fulfill:: + + struct gpio_desc *gpiod_get(struct device *dev, const char *con_id, + enum gpiod_flags flags) + +If a function is implemented by using several GPIOs together (e.g. a simple LED +device that displays digits), an additional index argument can be specified:: + + struct gpio_desc *gpiod_get_index(struct device *dev, + const char *con_id, unsigned int idx, + enum gpiod_flags flags) + +For a more detailed description of the con_id parameter in the DeviceTree case +see Documentation/driver-api/gpio/board.rst + +The flags parameter is used to optionally specify a direction and initial value +for the GPIO. Values can be: + +* GPIOD_ASIS or 0 to not initialize the GPIO at all. The direction must be set + later with one of the dedicated functions. +* GPIOD_IN to initialize the GPIO as input. +* GPIOD_OUT_LOW to initialize the GPIO as output with a value of 0. +* GPIOD_OUT_HIGH to initialize the GPIO as output with a value of 1. +* GPIOD_OUT_LOW_OPEN_DRAIN same as GPIOD_OUT_LOW but also enforce the line + to be electrically used with open drain. +* GPIOD_OUT_HIGH_OPEN_DRAIN same as GPIOD_OUT_HIGH but also enforce the line + to be electrically used with open drain. + +Note that the initial value is *logical* and the physical line level depends on +whether the line is configured active high or active low (see +:ref:`active_low_semantics`). + +The two last flags are used for use cases where open drain is mandatory, such +as I2C: if the line is not already configured as open drain in the mappings +(see board.rst), then open drain will be enforced anyway and a warning will be +printed that the board configuration needs to be updated to match the use case. + +Both functions return either a valid GPIO descriptor, or an error code checkable +with IS_ERR() (they will never return a NULL pointer). -ENOENT will be returned +if and only if no GPIO has been assigned to the device/function/index triplet, +other error codes are used for cases where a GPIO has been assigned but an error +occurred while trying to acquire it. This is useful to discriminate between mere +errors and an absence of GPIO for optional GPIO parameters. For the common +pattern where a GPIO is optional, the gpiod_get_optional() and +gpiod_get_index_optional() functions can be used. These functions return NULL +instead of -ENOENT if no GPIO has been assigned to the requested function:: + + struct gpio_desc *gpiod_get_optional(struct device *dev, + const char *con_id, + enum gpiod_flags flags) + + struct gpio_desc *gpiod_get_index_optional(struct device *dev, + const char *con_id, + unsigned int index, + enum gpiod_flags flags) + +Note that gpio_get*_optional() functions (and their managed variants), unlike +the rest of gpiolib API, also return NULL when gpiolib support is disabled. +This is helpful to driver authors, since they do not need to special case +-ENOSYS return codes. System integrators should however be careful to enable +gpiolib on systems that need it. + +For a function using multiple GPIOs all of those can be obtained with one call:: + + struct gpio_descs *gpiod_get_array(struct device *dev, + const char *con_id, + enum gpiod_flags flags) + +This function returns a struct gpio_descs which contains an array of +descriptors. It also contains a pointer to a gpiolib private structure which, +if passed back to get/set array functions, may speed up I/O processing:: + + struct gpio_descs { + struct gpio_array *info; + unsigned int ndescs; + struct gpio_desc *desc[]; + } + +The following function returns NULL instead of -ENOENT if no GPIOs have been +assigned to the requested function:: + + struct gpio_descs *gpiod_get_array_optional(struct device *dev, + const char *con_id, + enum gpiod_flags flags) + +Device-managed variants of these functions are also defined:: + + struct gpio_desc *devm_gpiod_get(struct device *dev, const char *con_id, + enum gpiod_flags flags) + + struct gpio_desc *devm_gpiod_get_index(struct device *dev, + const char *con_id, + unsigned int idx, + enum gpiod_flags flags) + + struct gpio_desc *devm_gpiod_get_optional(struct device *dev, + const char *con_id, + enum gpiod_flags flags) + + struct gpio_desc *devm_gpiod_get_index_optional(struct device *dev, + const char *con_id, + unsigned int index, + enum gpiod_flags flags) + + struct gpio_descs *devm_gpiod_get_array(struct device *dev, + const char *con_id, + enum gpiod_flags flags) + + struct gpio_descs *devm_gpiod_get_array_optional(struct device *dev, + const char *con_id, + enum gpiod_flags flags) + +A GPIO descriptor can be disposed of using the gpiod_put() function:: + + void gpiod_put(struct gpio_desc *desc) + +For an array of GPIOs this function can be used:: + + void gpiod_put_array(struct gpio_descs *descs) + +It is strictly forbidden to use a descriptor after calling these functions. +It is also not allowed to individually release descriptors (using gpiod_put()) +from an array acquired with gpiod_get_array(). + +The device-managed variants are, unsurprisingly:: + + void devm_gpiod_put(struct device *dev, struct gpio_desc *desc) + + void devm_gpiod_put_array(struct device *dev, struct gpio_descs *descs) + + +Using GPIOs +=========== + +Setting Direction +----------------- +The first thing a driver must do with a GPIO is setting its direction. If no +direction-setting flags have been given to gpiod_get*(), this is done by +invoking one of the gpiod_direction_*() functions:: + + int gpiod_direction_input(struct gpio_desc *desc) + int gpiod_direction_output(struct gpio_desc *desc, int value) + +The return value is zero for success, else a negative errno. It should be +checked, since the get/set calls don't return errors and since misconfiguration +is possible. You should normally issue these calls from a task context. However, +for spinlock-safe GPIOs it is OK to use them before tasking is enabled, as part +of early board setup. + +For output GPIOs, the value provided becomes the initial output value. This +helps avoid signal glitching during system startup. + +A driver can also query the current direction of a GPIO:: + + int gpiod_get_direction(const struct gpio_desc *desc) + +This function returns 0 for output, 1 for input, or an error code in case of error. + +Be aware that there is no default direction for GPIOs. Therefore, **using a GPIO +without setting its direction first is illegal and will result in undefined +behavior!** + + +Spinlock-Safe GPIO Access +------------------------- +Most GPIO controllers can be accessed with memory read/write instructions. Those +don't need to sleep, and can safely be done from inside hard (non-threaded) IRQ +handlers and similar contexts. + +Use the following calls to access GPIOs from an atomic context:: + + int gpiod_get_value(const struct gpio_desc *desc); + void gpiod_set_value(struct gpio_desc *desc, int value); + +The values are boolean, zero for low, nonzero for high. When reading the value +of an output pin, the value returned should be what's seen on the pin. That +won't always match the specified output value, because of issues including +open-drain signaling and output latencies. + +The get/set calls do not return errors because "invalid GPIO" should have been +reported earlier from gpiod_direction_*(). However, note that not all platforms +can read the value of output pins; those that can't should always return zero. +Also, using these calls for GPIOs that can't safely be accessed without sleeping +(see below) is an error. + + +GPIO Access That May Sleep +-------------------------- +Some GPIO controllers must be accessed using message based buses like I2C or +SPI. Commands to read or write those GPIO values require waiting to get to the +head of a queue to transmit a command and get its response. This requires +sleeping, which can't be done from inside IRQ handlers. + +Platforms that support this type of GPIO distinguish them from other GPIOs by +returning nonzero from this call:: + + int gpiod_cansleep(const struct gpio_desc *desc) + +To access such GPIOs, a different set of accessors is defined:: + + int gpiod_get_value_cansleep(const struct gpio_desc *desc) + void gpiod_set_value_cansleep(struct gpio_desc *desc, int value) + +Accessing such GPIOs requires a context which may sleep, for example a threaded +IRQ handler, and those accessors must be used instead of spinlock-safe +accessors without the cansleep() name suffix. + +Other than the fact that these accessors might sleep, and will work on GPIOs +that can't be accessed from hardIRQ handlers, these calls act the same as the +spinlock-safe calls. + + +.. _active_low_semantics: + +The active low and open drain semantics +--------------------------------------- +As a consumer should not have to care about the physical line level, all of the +gpiod_set_value_xxx() or gpiod_set_array_value_xxx() functions operate with +the *logical* value. With this they take the active low property into account. +This means that they check whether the GPIO is configured to be active low, +and if so, they manipulate the passed value before the physical line level is +driven. + +The same is applicable for open drain or open source output lines: those do not +actively drive their output high (open drain) or low (open source), they just +switch their output to a high impedance value. The consumer should not need to +care. (For details read about open drain in driver.rst.) + +With this, all the gpiod_set_(array)_value_xxx() functions interpret the +parameter "value" as "asserted" ("1") or "de-asserted" ("0"). The physical line +level will be driven accordingly. + +As an example, if the active low property for a dedicated GPIO is set, and the +gpiod_set_(array)_value_xxx() passes "asserted" ("1"), the physical line level +will be driven low. + +To summarize:: + + Function (example) line property physical line + gpiod_set_raw_value(desc, 0); don't care low + gpiod_set_raw_value(desc, 1); don't care high + gpiod_set_value(desc, 0); default (active high) low + gpiod_set_value(desc, 1); default (active high) high + gpiod_set_value(desc, 0); active low high + gpiod_set_value(desc, 1); active low low + gpiod_set_value(desc, 0); open drain low + gpiod_set_value(desc, 1); open drain high impedance + gpiod_set_value(desc, 0); open source high impedance + gpiod_set_value(desc, 1); open source high + +It is possible to override these semantics using the set_raw/get_raw functions +but it should be avoided as much as possible, especially by system-agnostic drivers +which should not need to care about the actual physical line level and worry about +the logical value instead. + + +Accessing raw GPIO values +------------------------- +Consumers exist that need to manage the logical state of a GPIO line, i.e. the value +their device will actually receive, no matter what lies between it and the GPIO +line. + +The following set of calls ignore the active-low or open drain property of a GPIO and +work on the raw line value:: + + int gpiod_get_raw_value(const struct gpio_desc *desc) + void gpiod_set_raw_value(struct gpio_desc *desc, int value) + int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc) + void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value) + int gpiod_direction_output_raw(struct gpio_desc *desc, int value) + +The active low state of a GPIO can also be queried and toggled using the +following calls:: + + int gpiod_is_active_low(const struct gpio_desc *desc) + void gpiod_toggle_active_low(struct gpio_desc *desc) + +Note that these functions should only be used with great moderation; a driver +should not have to care about the physical line level or open drain semantics. + + +Access multiple GPIOs with a single function call +------------------------------------------------- +The following functions get or set the values of an array of GPIOs:: + + int gpiod_get_array_value(unsigned int array_size, + struct gpio_desc **desc_array, + struct gpio_array *array_info, + unsigned long *value_bitmap); + int gpiod_get_raw_array_value(unsigned int array_size, + struct gpio_desc **desc_array, + struct gpio_array *array_info, + unsigned long *value_bitmap); + int gpiod_get_array_value_cansleep(unsigned int array_size, + struct gpio_desc **desc_array, + struct gpio_array *array_info, + unsigned long *value_bitmap); + int gpiod_get_raw_array_value_cansleep(unsigned int array_size, + struct gpio_desc **desc_array, + struct gpio_array *array_info, + unsigned long *value_bitmap); + + int gpiod_set_array_value(unsigned int array_size, + struct gpio_desc **desc_array, + struct gpio_array *array_info, + unsigned long *value_bitmap) + int gpiod_set_raw_array_value(unsigned int array_size, + struct gpio_desc **desc_array, + struct gpio_array *array_info, + unsigned long *value_bitmap) + int gpiod_set_array_value_cansleep(unsigned int array_size, + struct gpio_desc **desc_array, + struct gpio_array *array_info, + unsigned long *value_bitmap) + int gpiod_set_raw_array_value_cansleep(unsigned int array_size, + struct gpio_desc **desc_array, + struct gpio_array *array_info, + unsigned long *value_bitmap) + +The array can be an arbitrary set of GPIOs. The functions will try to access +GPIOs belonging to the same bank or chip simultaneously if supported by the +corresponding chip driver. In that case a significantly improved performance +can be expected. If simultaneous access is not possible the GPIOs will be +accessed sequentially. + +The functions take four arguments: + + * array_size - the number of array elements + * desc_array - an array of GPIO descriptors + * array_info - optional information obtained from gpiod_get_array() + * value_bitmap - a bitmap to store the GPIOs' values (get) or + a bitmap of values to assign to the GPIOs (set) + +The descriptor array can be obtained using the gpiod_get_array() function +or one of its variants. If the group of descriptors returned by that function +matches the desired group of GPIOs, those GPIOs can be accessed by simply using +the struct gpio_descs returned by gpiod_get_array():: + + struct gpio_descs *my_gpio_descs = gpiod_get_array(...); + gpiod_set_array_value(my_gpio_descs->ndescs, my_gpio_descs->desc, + my_gpio_descs->info, my_gpio_value_bitmap); + +It is also possible to access a completely arbitrary array of descriptors. The +descriptors may be obtained using any combination of gpiod_get() and +gpiod_get_array(). Afterwards the array of descriptors has to be setup +manually before it can be passed to one of the above functions. In that case, +array_info should be set to NULL. + +Note that for optimal performance GPIOs belonging to the same chip should be +contiguous within the array of descriptors. + +Still better performance may be achieved if array indexes of the descriptors +match hardware pin numbers of a single chip. If an array passed to a get/set +array function matches the one obtained from gpiod_get_array() and array_info +associated with the array is also passed, the function may take a fast bitmap +processing path, passing the value_bitmap argument directly to the respective +.get/set_multiple() callback of the chip. That allows for utilization of GPIO +banks as data I/O ports without much loss of performance. + +The return value of gpiod_get_array_value() and its variants is 0 on success +or negative on error. Note the difference to gpiod_get_value(), which returns +0 or 1 on success to convey the GPIO value. With the array functions, the GPIO +values are stored in value_array rather than passed back as return value. + + +GPIOs mapped to IRQs +-------------------- +GPIO lines can quite often be used as IRQs. You can get the IRQ number +corresponding to a given GPIO using the following call:: + + int gpiod_to_irq(const struct gpio_desc *desc) + +It will return an IRQ number, or a negative errno code if the mapping can't be +done (most likely because that particular GPIO cannot be used as IRQ). It is an +unchecked error to use a GPIO that wasn't set up as an input using +gpiod_direction_input(), or to use an IRQ number that didn't originally come +from gpiod_to_irq(). gpiod_to_irq() is not allowed to sleep. + +Non-error values returned from gpiod_to_irq() can be passed to request_irq() or +free_irq(). They will often be stored into IRQ resources for platform devices, +by the board-specific initialization code. Note that IRQ trigger options are +part of the IRQ interface, e.g. IRQF_TRIGGER_FALLING, as are system wakeup +capabilities. + + +GPIOs and ACPI +============== + +On ACPI systems, GPIOs are described by GpioIo()/GpioInt() resources listed by +the _CRS configuration objects of devices. Those resources do not provide +connection IDs (names) for GPIOs, so it is necessary to use an additional +mechanism for this purpose. + +Systems compliant with ACPI 5.1 or newer may provide a _DSD configuration object +which, among other things, may be used to provide connection IDs for specific +GPIOs described by the GpioIo()/GpioInt() resources in _CRS. If that is the +case, it will be handled by the GPIO subsystem automatically. However, if the +_DSD is not present, the mappings between GpioIo()/GpioInt() resources and GPIO +connection IDs need to be provided by device drivers. + +For details refer to Documentation/firmware-guide/acpi/gpio-properties.rst + + +Interacting With the Legacy GPIO Subsystem +========================================== +Many kernel subsystems and drivers still handle GPIOs using the legacy +integer-based interface. It is strongly recommended to update these to the new +gpiod interface. For cases where both interfaces need to be used, the following +two functions allow to convert a GPIO descriptor into the GPIO integer namespace +and vice-versa:: + + int desc_to_gpio(const struct gpio_desc *desc) + struct gpio_desc *gpio_to_desc(unsigned gpio) + +The GPIO number returned by desc_to_gpio() can safely be used as a parameter of +the gpio\_*() functions for as long as the GPIO descriptor `desc` is not freed. +All the same, a GPIO number passed to gpio_to_desc() must first be properly +acquired using e.g. gpio_request_one(), and the returned GPIO descriptor is only +considered valid until that GPIO number is released using gpio_free(). + +Freeing a GPIO obtained by one API with the other API is forbidden and an +unchecked error. diff --git a/Documentation/driver-api/gpio/driver.rst b/Documentation/driver-api/gpio/driver.rst new file mode 100644 index 000000000..bf6319cc5 --- /dev/null +++ b/Documentation/driver-api/gpio/driver.rst @@ -0,0 +1,778 @@ +===================== +GPIO Driver Interface +===================== + +This document serves as a guide for writers of GPIO chip drivers. + +Each GPIO controller driver needs to include the following header, which defines +the structures used to define a GPIO driver:: + + #include + + +Internal Representation of GPIOs +================================ + +A GPIO chip handles one or more GPIO lines. To be considered a GPIO chip, the +lines must conform to the definition: General Purpose Input/Output. If the +line is not general purpose, it is not GPIO and should not be handled by a +GPIO chip. The use case is the indicative: certain lines in a system may be +called GPIO but serve a very particular purpose thus not meeting the criteria +of a general purpose I/O. On the other hand a LED driver line may be used as a +GPIO and should therefore still be handled by a GPIO chip driver. + +Inside a GPIO driver, individual GPIO lines are identified by their hardware +number, sometime also referred to as ``offset``, which is a unique number +between 0 and n-1, n being the number of GPIOs managed by the chip. + +The hardware GPIO number should be something intuitive to the hardware, for +example if a system uses a memory-mapped set of I/O-registers where 32 GPIO +lines are handled by one bit per line in a 32-bit register, it makes sense to +use hardware offsets 0..31 for these, corresponding to bits 0..31 in the +register. + +This number is purely internal: the hardware number of a particular GPIO +line is never made visible outside of the driver. + +On top of this internal number, each GPIO line also needs to have a global +number in the integer GPIO namespace so that it can be used with the legacy GPIO +interface. Each chip must thus have a "base" number (which can be automatically +assigned), and for each GPIO line the global number will be (base + hardware +number). Although the integer representation is considered deprecated, it still +has many users and thus needs to be maintained. + +So for example one platform could use global numbers 32-159 for GPIOs, with a +controller defining 128 GPIOs at a "base" of 32 ; while another platform uses +global numbers 0..63 with one set of GPIO controllers, 64-79 with another type +of GPIO controller, and on one particular board 80-95 with an FPGA. The legacy +numbers need not be contiguous; either of those platforms could also use numbers +2000-2063 to identify GPIO lines in a bank of I2C GPIO expanders. + + +Controller Drivers: gpio_chip +============================= + +In the gpiolib framework each GPIO controller is packaged as a "struct +gpio_chip" (see for its complete definition) with members +common to each controller of that type, these should be assigned by the +driver code: + + - methods to establish GPIO line direction + - methods used to access GPIO line values + - method to set electrical configuration for a given GPIO line + - method to return the IRQ number associated to a given GPIO line + - flag saying whether calls to its methods may sleep + - optional line names array to identify lines + - optional debugfs dump method (showing extra state information) + - optional base number (will be automatically assigned if omitted) + - optional label for diagnostics and GPIO chip mapping using platform data + +The code implementing a gpio_chip should support multiple instances of the +controller, preferably using the driver model. That code will configure each +gpio_chip and issue gpiochip_add(), gpiochip_add_data(), or +devm_gpiochip_add_data(). Removing a GPIO controller should be rare; use +gpiochip_remove() when it is unavoidable. + +Often a gpio_chip is part of an instance-specific structure with states not +exposed by the GPIO interfaces, such as addressing, power management, and more. +Chips such as audio codecs will have complex non-GPIO states. + +Any debugfs dump method should normally ignore lines which haven't been +requested. They can use gpiochip_is_requested(), which returns either +NULL or the label associated with that GPIO line when it was requested. + +Realtime considerations: the GPIO driver should not use spinlock_t or any +sleepable APIs (like PM runtime) in its gpio_chip implementation (.get/.set +and direction control callbacks) if it is expected to call GPIO APIs from +atomic context on realtime kernels (inside hard IRQ handlers and similar +contexts). Normally this should not be required. + + +GPIO electrical configuration +----------------------------- + +GPIO lines can be configured for several electrical modes of operation by using +the .set_config() callback. Currently this API supports setting: + +- Debouncing +- Single-ended modes (open drain/open source) +- Pull up and pull down resistor enablement + +These settings are described below. + +The .set_config() callback uses the same enumerators and configuration +semantics as the generic pin control drivers. This is not a coincidence: it is +possible to assign the .set_config() to the function gpiochip_generic_config() +which will result in pinctrl_gpio_set_config() being called and eventually +ending up in the pin control back-end "behind" the GPIO controller, usually +closer to the actual pins. This way the pin controller can manage the below +listed GPIO configurations. + +If a pin controller back-end is used, the GPIO controller or hardware +description needs to provide "GPIO ranges" mapping the GPIO line offsets to pin +numbers on the pin controller so they can properly cross-reference each other. + + +GPIO lines with debounce support +-------------------------------- + +Debouncing is a configuration set to a pin indicating that it is connected to +a mechanical switch or button, or similar that may bounce. Bouncing means the +line is pulled high/low quickly at very short intervals for mechanical +reasons. This can result in the value being unstable or irqs firing repeatedly +unless the line is debounced. + +Debouncing in practice involves setting up a timer when something happens on +the line, wait a little while and then sample the line again, so see if it +still has the same value (low or high). This could also be repeated by a clever +state machine, waiting for a line to become stable. In either case, it sets +a certain number of milliseconds for debouncing, or just "on/off" if that time +is not configurable. + + +GPIO lines with open drain/source support +----------------------------------------- + +Open drain (CMOS) or open collector (TTL) means the line is not actively driven +high: instead you provide the drain/collector as output, so when the transistor +is not open, it will present a high-impedance (tristate) to the external rail:: + + + CMOS CONFIGURATION TTL CONFIGURATION + + ||--- out +--- out + in ----|| |/ + ||--+ in ----| + | |\ + GND GND + +This configuration is normally used as a way to achieve one of two things: + +- Level-shifting: to reach a logical level higher than that of the silicon + where the output resides. + +- Inverse wire-OR on an I/O line, for example a GPIO line, making it possible + for any driving stage on the line to drive it low even if any other output + to the same line is simultaneously driving it high. A special case of this + is driving the SCL and SDA lines of an I2C bus, which is by definition a + wire-OR bus. + +Both use cases require that the line be equipped with a pull-up resistor. This +resistor will make the line tend to high level unless one of the transistors on +the rail actively pulls it down. + +The level on the line will go as high as the VDD on the pull-up resistor, which +may be higher than the level supported by the transistor, achieving a +level-shift to the higher VDD. + +Integrated electronics often have an output driver stage in the form of a CMOS +"totem-pole" with one N-MOS and one P-MOS transistor where one of them drives +the line high and one of them drives the line low. This is called a push-pull +output. The "totem-pole" looks like so:: + + VDD + | + OD ||--+ + +--/ ---o|| P-MOS-FET + | ||--+ + IN --+ +----- out + | ||--+ + +--/ ----|| N-MOS-FET + OS ||--+ + | + GND + +The desired output signal (e.g. coming directly from some GPIO output register) +arrives at IN. The switches named "OD" and "OS" are normally closed, creating +a push-pull circuit. + +Consider the little "switches" named "OD" and "OS" that enable/disable the +P-MOS or N-MOS transistor right after the split of the input. As you can see, +either transistor will go totally numb if this switch is open. The totem-pole +is then halved and give high impedance instead of actively driving the line +high or low respectively. That is usually how software-controlled open +drain/source works. + +Some GPIO hardware come in open drain / open source configuration. Some are +hard-wired lines that will only support open drain or open source no matter +what: there is only one transistor there. Some are software-configurable: +by flipping a bit in a register the output can be configured as open drain +or open source, in practice by flicking open the switches labeled "OD" and "OS" +in the drawing above. + +By disabling the P-MOS transistor, the output can be driven between GND and +high impedance (open drain), and by disabling the N-MOS transistor, the output +can be driven between VDD and high impedance (open source). In the first case, +a pull-up resistor is needed on the outgoing rail to complete the circuit, and +in the second case, a pull-down resistor is needed on the rail. + +Hardware that supports open drain or open source or both, can implement a +special callback in the gpio_chip: .set_config() that takes a generic +pinconf packed value telling whether to configure the line as open drain, +open source or push-pull. This will happen in response to the +GPIO_OPEN_DRAIN or GPIO_OPEN_SOURCE flag set in the machine file, or coming +from other hardware descriptions. + +If this state can not be configured in hardware, i.e. if the GPIO hardware does +not support open drain/open source in hardware, the GPIO library will instead +use a trick: when a line is set as output, if the line is flagged as open +drain, and the IN output value is low, it will be driven low as usual. But +if the IN output value is set to high, it will instead *NOT* be driven high, +instead it will be switched to input, as input mode is an equivalent to +high impedance, thus achieving an "open drain emulation" of sorts: electrically +the behaviour will be identical, with the exception of possible hardware glitches +when switching the mode of the line. + +For open source configuration the same principle is used, just that instead +of actively driving the line low, it is set to input. + + +GPIO lines with pull up/down resistor support +--------------------------------------------- + +A GPIO line can support pull-up/down using the .set_config() callback. This +means that a pull up or pull-down resistor is available on the output of the +GPIO line, and this resistor is software controlled. + +In discrete designs, a pull-up or pull-down resistor is simply soldered on +the circuit board. This is not something we deal with or model in software. The +most you will think about these lines is that they will very likely be +configured as open drain or open source (see the section above). + +The .set_config() callback can only turn pull up or down on and off, and will +no have any semantic knowledge about the resistance used. It will only say +switch a bit in a register enabling or disabling pull-up or pull-down. + +If the GPIO line supports shunting in different resistance values for the +pull-up or pull-down resistor, the GPIO chip callback .set_config() will not +suffice. For these complex use cases, a combined GPIO chip and pin controller +need to be implemented, as the pin config interface of a pin controller +supports more versatile control over electrical properties and can handle +different pull-up or pull-down resistance values. + + +GPIO drivers providing IRQs +=========================== + +It is custom that GPIO drivers (GPIO chips) are also providing interrupts, +most often cascaded off a parent interrupt controller, and in some special +cases the GPIO logic is melded with a SoC's primary interrupt controller. + +The IRQ portions of the GPIO block are implemented using an irq_chip, using +the header . So this combined driver is utilizing two sub- +systems simultaneously: gpio and irq. + +It is legal for any IRQ consumer to request an IRQ from any irqchip even if it +is a combined GPIO+IRQ driver. The basic premise is that gpio_chip and +irq_chip are orthogonal, and offering their services independent of each +other. + +gpiod_to_irq() is just a convenience function to figure out the IRQ for a +certain GPIO line and should not be relied upon to have been called before +the IRQ is used. + +Always prepare the hardware and make it ready for action in respective +callbacks from the GPIO and irq_chip APIs. Do not rely on gpiod_to_irq() having +been called first. + +We can divide GPIO irqchips in two broad categories: + +- CASCADED INTERRUPT CHIPS: this means that the GPIO chip has one common + interrupt output line, which is triggered by any enabled GPIO line on that + chip. The interrupt output line will then be routed to an parent interrupt + controller one level up, in the most simple case the systems primary + interrupt controller. This is modeled by an irqchip that will inspect bits + inside the GPIO controller to figure out which line fired it. The irqchip + part of the driver needs to inspect registers to figure this out and it + will likely also need to acknowledge that it is handling the interrupt + by clearing some bit (sometime implicitly, by just reading a status + register) and it will often need to set up the configuration such as + edge sensitivity (rising or falling edge, or high/low level interrupt for + example). + +- HIERARCHICAL INTERRUPT CHIPS: this means that each GPIO line has a dedicated + irq line to a parent interrupt controller one level up. There is no need + to inquire the GPIO hardware to figure out which line has fired, but it + may still be necessary to acknowledge the interrupt and set up configuration + such as edge sensitivity. + +Realtime considerations: a realtime compliant GPIO driver should not use +spinlock_t or any sleepable APIs (like PM runtime) as part of its irqchip +implementation. + +- spinlock_t should be replaced with raw_spinlock_t.[1] +- If sleepable APIs have to be used, these can be done from the .irq_bus_lock() + and .irq_bus_unlock() callbacks, as these are the only slowpath callbacks + on an irqchip. Create the callbacks if needed.[2] + + +Cascaded GPIO irqchips +---------------------- + +Cascaded GPIO irqchips usually fall in one of three categories: + +- CHAINED CASCADED GPIO IRQCHIPS: these are usually the type that is embedded on + an SoC. This means that there is a fast IRQ flow handler for the GPIOs that + gets called in a chain from the parent IRQ handler, most typically the + system interrupt controller. This means that the GPIO irqchip handler will + be called immediately from the parent irqchip, while holding the IRQs + disabled. The GPIO irqchip will then end up calling something like this + sequence in its interrupt handler:: + + static irqreturn_t foo_gpio_irq(int irq, void *data) + chained_irq_enter(...); + generic_handle_irq(...); + chained_irq_exit(...); + + Chained GPIO irqchips typically can NOT set the .can_sleep flag on + struct gpio_chip, as everything happens directly in the callbacks: no + slow bus traffic like I2C can be used. + + Realtime considerations: Note that chained IRQ handlers will not be forced + threaded on -RT. As a result, spinlock_t or any sleepable APIs (like PM + runtime) can't be used in a chained IRQ handler. + + If required (and if it can't be converted to the nested threaded GPIO irqchip, + see below) a chained IRQ handler can be converted to generic irq handler and + this way it will become a threaded IRQ handler on -RT and a hard IRQ handler + on non-RT (for example, see [3]). + + The generic_handle_irq() is expected to be called with IRQ disabled, + so the IRQ core will complain if it is called from an IRQ handler which is + forced to a thread. The "fake?" raw lock can be used to work around this + problem:: + + raw_spinlock_t wa_lock; + static irqreturn_t omap_gpio_irq_handler(int irq, void *gpiobank) + unsigned long wa_lock_flags; + raw_spin_lock_irqsave(&bank->wa_lock, wa_lock_flags); + generic_handle_irq(irq_find_mapping(bank->chip.irq.domain, bit)); + raw_spin_unlock_irqrestore(&bank->wa_lock, wa_lock_flags); + +- GENERIC CHAINED GPIO IRQCHIPS: these are the same as "CHAINED GPIO irqchips", + but chained IRQ handlers are not used. Instead GPIO IRQs dispatching is + performed by generic IRQ handler which is configured using request_irq(). + The GPIO irqchip will then end up calling something like this sequence in + its interrupt handler:: + + static irqreturn_t gpio_rcar_irq_handler(int irq, void *dev_id) + for each detected GPIO IRQ + generic_handle_irq(...); + + Realtime considerations: this kind of handlers will be forced threaded on -RT, + and as result the IRQ core will complain that generic_handle_irq() is called + with IRQ enabled and the same work-around as for "CHAINED GPIO irqchips" can + be applied. + +- NESTED THREADED GPIO IRQCHIPS: these are off-chip GPIO expanders and any + other GPIO irqchip residing on the other side of a sleeping bus such as I2C + or SPI. + + Of course such drivers that need slow bus traffic to read out IRQ status and + similar, traffic which may in turn incur other IRQs to happen, cannot be + handled in a quick IRQ handler with IRQs disabled. Instead they need to spawn + a thread and then mask the parent IRQ line until the interrupt is handled + by the driver. The hallmark of this driver is to call something like + this in its interrupt handler:: + + static irqreturn_t foo_gpio_irq(int irq, void *data) + ... + handle_nested_irq(irq); + + The hallmark of threaded GPIO irqchips is that they set the .can_sleep + flag on struct gpio_chip to true, indicating that this chip may sleep + when accessing the GPIOs. + + These kinds of irqchips are inherently realtime tolerant as they are + already set up to handle sleeping contexts. + + +Infrastructure helpers for GPIO irqchips +---------------------------------------- + +To help out in handling the set-up and management of GPIO irqchips and the +associated irqdomain and resource allocation callbacks. These are activated +by selecting the Kconfig symbol GPIOLIB_IRQCHIP. If the symbol +IRQ_DOMAIN_HIERARCHY is also selected, hierarchical helpers will also be +provided. A big portion of overhead code will be managed by gpiolib, +under the assumption that your interrupts are 1-to-1-mapped to the +GPIO line index: + +.. csv-table:: + :header: GPIO line offset, Hardware IRQ + + 0,0 + 1,1 + 2,2 + ...,... + ngpio-1, ngpio-1 + + +If some GPIO lines do not have corresponding IRQs, the bitmask valid_mask +and the flag need_valid_mask in gpio_irq_chip can be used to mask off some +lines as invalid for associating with IRQs. + +The preferred way to set up the helpers is to fill in the +struct gpio_irq_chip inside struct gpio_chip before adding the gpio_chip. +If you do this, the additional irq_chip will be set up by gpiolib at the +same time as setting up the rest of the GPIO functionality. The following +is a typical example of a chained cascaded interrupt handler using +the gpio_irq_chip. Note how the mask/unmask (or disable/enable) functions +call into the core gpiolib code: + +.. code-block:: c + + /* Typical state container */ + struct my_gpio { + struct gpio_chip gc; + }; + + static void my_gpio_mask_irq(struct irq_data *d) + { + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + + /* + * Perform any necessary action to mask the interrupt, + * and then call into the core code to synchronise the + * state. + */ + + gpiochip_disable_irq(gc, hwirq); + } + + static void my_gpio_unmask_irq(struct irq_data *d) + { + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + + gpiochip_enable_irq(gc, hwirq); + + /* + * Perform any necessary action to unmask the interrupt, + * after having called into the core code to synchronise + * the state. + */ + } + + /* + * Statically populate the irqchip. Note that it is made const + * (further indicated by the IRQCHIP_IMMUTABLE flag), and that + * the GPIOCHIP_IRQ_RESOURCE_HELPER macro adds some extra + * callbacks to the structure. + */ + static const struct irq_chip my_gpio_irq_chip = { + .name = "my_gpio_irq", + .irq_ack = my_gpio_ack_irq, + .irq_mask = my_gpio_mask_irq, + .irq_unmask = my_gpio_unmask_irq, + .irq_set_type = my_gpio_set_irq_type, + .flags = IRQCHIP_IMMUTABLE, + /* Provide the gpio resource callbacks */ + GPIOCHIP_IRQ_RESOURCE_HELPERS, + }; + + int irq; /* from platform etc */ + struct my_gpio *g; + struct gpio_irq_chip *girq; + + /* Get a pointer to the gpio_irq_chip */ + girq = &g->gc.irq; + gpio_irq_chip_set_chip(girq, &my_gpio_irq_chip); + girq->parent_handler = ftgpio_gpio_irq_handler; + girq->num_parents = 1; + girq->parents = devm_kcalloc(dev, 1, sizeof(*girq->parents), + GFP_KERNEL); + if (!girq->parents) + return -ENOMEM; + girq->default_type = IRQ_TYPE_NONE; + girq->handler = handle_bad_irq; + girq->parents[0] = irq; + + return devm_gpiochip_add_data(dev, &g->gc, g); + +The helper supports using threaded interrupts as well. Then you just request +the interrupt separately and go with it: + +.. code-block:: c + + /* Typical state container */ + struct my_gpio { + struct gpio_chip gc; + }; + + static void my_gpio_mask_irq(struct irq_data *d) + { + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + + /* + * Perform any necessary action to mask the interrupt, + * and then call into the core code to synchronise the + * state. + */ + + gpiochip_disable_irq(gc, hwirq); + } + + static void my_gpio_unmask_irq(struct irq_data *d) + { + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + + gpiochip_enable_irq(gc, hwirq); + + /* + * Perform any necessary action to unmask the interrupt, + * after having called into the core code to synchronise + * the state. + */ + } + + /* + * Statically populate the irqchip. Note that it is made const + * (further indicated by the IRQCHIP_IMMUTABLE flag), and that + * the GPIOCHIP_IRQ_RESOURCE_HELPER macro adds some extra + * callbacks to the structure. + */ + static const struct irq_chip my_gpio_irq_chip = { + .name = "my_gpio_irq", + .irq_ack = my_gpio_ack_irq, + .irq_mask = my_gpio_mask_irq, + .irq_unmask = my_gpio_unmask_irq, + .irq_set_type = my_gpio_set_irq_type, + .flags = IRQCHIP_IMMUTABLE, + /* Provide the gpio resource callbacks */ + GPIOCHIP_IRQ_RESOURCE_HELPERS, + }; + + int irq; /* from platform etc */ + struct my_gpio *g; + struct gpio_irq_chip *girq; + + ret = devm_request_threaded_irq(dev, irq, NULL, + irq_thread_fn, IRQF_ONESHOT, "my-chip", g); + if (ret < 0) + return ret; + + /* Get a pointer to the gpio_irq_chip */ + girq = &g->gc.irq; + gpio_irq_chip_set_chip(girq, &my_gpio_irq_chip); + /* This will let us handle the parent IRQ in the driver */ + girq->parent_handler = NULL; + girq->num_parents = 0; + girq->parents = NULL; + girq->default_type = IRQ_TYPE_NONE; + girq->handler = handle_bad_irq; + + return devm_gpiochip_add_data(dev, &g->gc, g); + +The helper supports using hierarchical interrupt controllers as well. +In this case the typical set-up will look like this: + +.. code-block:: c + + /* Typical state container with dynamic irqchip */ + struct my_gpio { + struct gpio_chip gc; + struct fwnode_handle *fwnode; + }; + + static void my_gpio_mask_irq(struct irq_data *d) + { + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + + /* + * Perform any necessary action to mask the interrupt, + * and then call into the core code to synchronise the + * state. + */ + + gpiochip_disable_irq(gc, hwirq); + irq_mask_mask_parent(d); + } + + static void my_gpio_unmask_irq(struct irq_data *d) + { + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + + gpiochip_enable_irq(gc, hwirq); + + /* + * Perform any necessary action to unmask the interrupt, + * after having called into the core code to synchronise + * the state. + */ + + irq_mask_unmask_parent(d); + } + + /* + * Statically populate the irqchip. Note that it is made const + * (further indicated by the IRQCHIP_IMMUTABLE flag), and that + * the GPIOCHIP_IRQ_RESOURCE_HELPER macro adds some extra + * callbacks to the structure. + */ + static const struct irq_chip my_gpio_irq_chip = { + .name = "my_gpio_irq", + .irq_ack = my_gpio_ack_irq, + .irq_mask = my_gpio_mask_irq, + .irq_unmask = my_gpio_unmask_irq, + .irq_set_type = my_gpio_set_irq_type, + .flags = IRQCHIP_IMMUTABLE, + /* Provide the gpio resource callbacks */ + GPIOCHIP_IRQ_RESOURCE_HELPERS, + }; + + struct my_gpio *g; + struct gpio_irq_chip *girq; + + /* Get a pointer to the gpio_irq_chip */ + girq = &g->gc.irq; + gpio_irq_chip_set_chip(girq, &my_gpio_irq_chip); + girq->default_type = IRQ_TYPE_NONE; + girq->handler = handle_bad_irq; + girq->fwnode = g->fwnode; + girq->parent_domain = parent; + girq->child_to_parent_hwirq = my_gpio_child_to_parent_hwirq; + + return devm_gpiochip_add_data(dev, &g->gc, g); + +As you can see pretty similar, but you do not supply a parent handler for +the IRQ, instead a parent irqdomain, an fwnode for the hardware and +a function .child_to_parent_hwirq() that has the purpose of looking up +the parent hardware irq from a child (i.e. this gpio chip) hardware irq. +As always it is good to look at examples in the kernel tree for advice +on how to find the required pieces. + +If there is a need to exclude certain GPIO lines from the IRQ domain handled by +these helpers, we can set .irq.need_valid_mask of the gpiochip before +devm_gpiochip_add_data() or gpiochip_add_data() is called. This allocates an +.irq.valid_mask with as many bits set as there are GPIO lines in the chip, each +bit representing line 0..n-1. Drivers can exclude GPIO lines by clearing bits +from this mask. The mask can be filled in the init_valid_mask() callback +that is part of the struct gpio_irq_chip. + +To use the helpers please keep the following in mind: + +- Make sure to assign all relevant members of the struct gpio_chip so that + the irqchip can initialize. E.g. .dev and .can_sleep shall be set up + properly. + +- Nominally set gpio_irq_chip.handler to handle_bad_irq. Then, if your irqchip + is cascaded, set the handler to handle_level_irq() and/or handle_edge_irq() + in the irqchip .set_type() callback depending on what your controller + supports and what is requested by the consumer. + + +Locking IRQ usage +----------------- + +Since GPIO and irq_chip are orthogonal, we can get conflicts between different +use cases. For example a GPIO line used for IRQs should be an input line, +it does not make sense to fire interrupts on an output GPIO. + +If there is competition inside the subsystem which side is using the +resource (a certain GPIO line and register for example) it needs to deny +certain operations and keep track of usage inside of the gpiolib subsystem. + +Input GPIOs can be used as IRQ signals. When this happens, a driver is requested +to mark the GPIO as being used as an IRQ:: + + int gpiochip_lock_as_irq(struct gpio_chip *chip, unsigned int offset) + +This will prevent the use of non-irq related GPIO APIs until the GPIO IRQ lock +is released:: + + void gpiochip_unlock_as_irq(struct gpio_chip *chip, unsigned int offset) + +When implementing an irqchip inside a GPIO driver, these two functions should +typically be called in the .startup() and .shutdown() callbacks from the +irqchip. + +When using the gpiolib irqchip helpers, these callbacks are automatically +assigned. + + +Disabling and enabling IRQs +--------------------------- + +In some (fringe) use cases, a driver may be using a GPIO line as input for IRQs, +but occasionally switch that line over to drive output and then back to being +an input with interrupts again. This happens on things like CEC (Consumer +Electronics Control). + +When a GPIO is used as an IRQ signal, then gpiolib also needs to know if +the IRQ is enabled or disabled. In order to inform gpiolib about this, +the irqchip driver should call:: + + void gpiochip_disable_irq(struct gpio_chip *chip, unsigned int offset) + +This allows drivers to drive the GPIO as an output while the IRQ is +disabled. When the IRQ is enabled again, a driver should call:: + + void gpiochip_enable_irq(struct gpio_chip *chip, unsigned int offset) + +When implementing an irqchip inside a GPIO driver, these two functions should +typically be called in the .irq_disable() and .irq_enable() callbacks from the +irqchip. + +When IRQCHIP_IMMUTABLE is not advertised by the irqchip, these callbacks +are automatically assigned. This behaviour is deprecated and on its way +to be removed from the kernel. + + +Real-Time compliance for GPIO IRQ chips +--------------------------------------- + +Any provider of irqchips needs to be carefully tailored to support Real-Time +preemption. It is desirable that all irqchips in the GPIO subsystem keep this +in mind and do the proper testing to assure they are real time-enabled. + +So, pay attention on above realtime considerations in the documentation. + +The following is a checklist to follow when preparing a driver for real-time +compliance: + +- ensure spinlock_t is not used as part irq_chip implementation +- ensure that sleepable APIs are not used as part irq_chip implementation + If sleepable APIs have to be used, these can be done from the .irq_bus_lock() + and .irq_bus_unlock() callbacks +- Chained GPIO irqchips: ensure spinlock_t or any sleepable APIs are not used + from the chained IRQ handler +- Generic chained GPIO irqchips: take care about generic_handle_irq() calls and + apply corresponding work-around +- Chained GPIO irqchips: get rid of the chained IRQ handler and use generic irq + handler if possible +- regmap_mmio: it is possible to disable internal locking in regmap by setting + .disable_locking and handling the locking in the GPIO driver +- Test your driver with the appropriate in-kernel real-time test cases for both + level and edge IRQs + +* [1] http://www.spinics.net/lists/linux-omap/msg120425.html +* [2] https://lore.kernel.org/r/1443209283-20781-2-git-send-email-grygorii.strashko@ti.com +* [3] https://lore.kernel.org/r/1443209283-20781-3-git-send-email-grygorii.strashko@ti.com + + +Requesting self-owned GPIO pins +=============================== + +Sometimes it is useful to allow a GPIO chip driver to request its own GPIO +descriptors through the gpiolib API. A GPIO driver can use the following +functions to request and free descriptors:: + + struct gpio_desc *gpiochip_request_own_desc(struct gpio_desc *desc, + u16 hwnum, + const char *label, + enum gpiod_flags flags) + + void gpiochip_free_own_desc(struct gpio_desc *desc) + +Descriptors requested with gpiochip_request_own_desc() must be released with +gpiochip_free_own_desc(). + +These functions must be used with care since they do not affect module use +count. Do not use the functions to request gpio descriptors not owned by the +calling driver. diff --git a/Documentation/driver-api/gpio/drivers-on-gpio.rst b/Documentation/driver-api/gpio/drivers-on-gpio.rst new file mode 100644 index 000000000..af632d764 --- /dev/null +++ b/Documentation/driver-api/gpio/drivers-on-gpio.rst @@ -0,0 +1,114 @@ +============================ +Subsystem drivers using GPIO +============================ + +Note that standard kernel drivers exist for common GPIO tasks and will provide +the right in-kernel and userspace APIs/ABIs for the job, and that these +drivers can quite easily interconnect with other kernel subsystems using +hardware descriptions such as device tree or ACPI: + +- leds-gpio: drivers/leds/leds-gpio.c will handle LEDs connected to GPIO + lines, giving you the LED sysfs interface + +- ledtrig-gpio: drivers/leds/trigger/ledtrig-gpio.c will provide a LED trigger, + i.e. a LED will turn on/off in response to a GPIO line going high or low + (and that LED may in turn use the leds-gpio as per above). + +- gpio-keys: drivers/input/keyboard/gpio_keys.c is used when your GPIO line + can generate interrupts in response to a key press. Also supports debounce. + +- gpio-keys-polled: drivers/input/keyboard/gpio_keys_polled.c is used when your + GPIO line cannot generate interrupts, so it needs to be periodically polled + by a timer. + +- gpio_mouse: drivers/input/mouse/gpio_mouse.c is used to provide a mouse with + up to three buttons by simply using GPIOs and no mouse port. You can cut the + mouse cable and connect the wires to GPIO lines or solder a mouse connector + to the lines for a more permanent solution of this type. + +- gpio-beeper: drivers/input/misc/gpio-beeper.c is used to provide a beep from + an external speaker connected to a GPIO line. + +- extcon-gpio: drivers/extcon/extcon-gpio.c is used when you need to read an + external connector status, such as a headset line for an audio driver or an + HDMI connector. It will provide a better userspace sysfs interface than GPIO. + +- restart-gpio: drivers/power/reset/gpio-restart.c is used to restart/reboot + the system by pulling a GPIO line and will register a restart handler so + userspace can issue the right system call to restart the system. + +- poweroff-gpio: drivers/power/reset/gpio-poweroff.c is used to power the + system down by pulling a GPIO line and will register a pm_power_off() + callback so that userspace can issue the right system call to power down the + system. + +- gpio-gate-clock: drivers/clk/clk-gpio.c is used to control a gated clock + (off/on) that uses a GPIO, and integrated with the clock subsystem. + +- i2c-gpio: drivers/i2c/busses/i2c-gpio.c is used to drive an I2C bus + (two wires, SDA and SCL lines) by hammering (bitbang) two GPIO lines. It will + appear as any other I2C bus to the system and makes it possible to connect + drivers for the I2C devices on the bus like any other I2C bus driver. + +- spi_gpio: drivers/spi/spi-gpio.c is used to drive an SPI bus (variable number + of wires, at least SCK and optionally MISO, MOSI and chip select lines) using + GPIO hammering (bitbang). It will appear as any other SPI bus on the system + and makes it possible to connect drivers for SPI devices on the bus like + any other SPI bus driver. For example any MMC/SD card can then be connected + to this SPI by using the mmc_spi host from the MMC/SD card subsystem. + +- w1-gpio: drivers/w1/masters/w1-gpio.c is used to drive a one-wire bus using + a GPIO line, integrating with the W1 subsystem and handling devices on + the bus like any other W1 device. + +- gpio-fan: drivers/hwmon/gpio-fan.c is used to control a fan for cooling the + system, connected to a GPIO line (and optionally a GPIO alarm line), + presenting all the right in-kernel and sysfs interfaces to make your system + not overheat. + +- gpio-regulator: drivers/regulator/gpio-regulator.c is used to control a + regulator providing a certain voltage by pulling a GPIO line, integrating + with the regulator subsystem and giving you all the right interfaces. + +- gpio-wdt: drivers/watchdog/gpio_wdt.c is used to provide a watchdog timer + that will periodically "ping" a hardware connected to a GPIO line by toggling + it from 1-to-0-to-1. If that hardware does not receive its "ping" + periodically, it will reset the system. + +- gpio-nand: drivers/mtd/nand/raw/gpio.c is used to connect a NAND flash chip + to a set of simple GPIO lines: RDY, NCE, ALE, CLE, NWP. It interacts with the + NAND flash MTD subsystem and provides chip access and partition parsing like + any other NAND driving hardware. + +- ps2-gpio: drivers/input/serio/ps2-gpio.c is used to drive a PS/2 (IBM) serio + bus, data and clock line, by bit banging two GPIO lines. It will appear as + any other serio bus to the system and makes it possible to connect drivers + for e.g. keyboards and other PS/2 protocol based devices. + +- cec-gpio: drivers/media/platform/cec-gpio/ is used to interact with a CEC + Consumer Electronics Control bus using only GPIO. It is used to communicate + with devices on the HDMI bus. + +- gpio-charger: drivers/power/supply/gpio-charger.c is used if you need to do + battery charging and all you have to go by to check the presence of the + AC charger or more complex tasks such as indicating charging status using + nothing but GPIO lines, this driver provides that and also a clearly defined + way to pass the charging parameters from hardware descriptions such as the + device tree. + +- gpio-mux: drivers/mux/gpio.c is used for controlling a multiplexer using + n GPIO lines such that you can mux in 2^n different devices by activating + different GPIO lines. Often the GPIOs are on a SoC and the devices are + some SoC-external entities, such as different components on a PCB that + can be selectively enabled. + +Apart from this there are special GPIO drivers in subsystems like MMC/SD to +read card detect and write protect GPIO lines, and in the TTY serial subsystem +to emulate MCTRL (modem control) signals CTS/RTS by using two GPIO lines. The +MTD NOR flash has add-ons for extra GPIO lines too, though the address bus is +usually connected directly to the flash. + +Use those instead of talking directly to the GPIOs from userspace; they +integrate with kernel frameworks better than your userspace code could. +Needless to say, just using the appropriate kernel drivers will simplify and +speed up your embedded hacking in particular by providing ready-made components. diff --git a/Documentation/driver-api/gpio/index.rst b/Documentation/driver-api/gpio/index.rst new file mode 100644 index 000000000..1d48fe248 --- /dev/null +++ b/Documentation/driver-api/gpio/index.rst @@ -0,0 +1,50 @@ +=================================== +General Purpose Input/Output (GPIO) +=================================== + +Contents: + +.. toctree:: + :maxdepth: 2 + + intro + using-gpio + driver + consumer + board + drivers-on-gpio + legacy + bt8xxgpio + +Core +==== + +.. kernel-doc:: include/linux/gpio/driver.h + :internal: + +.. kernel-doc:: drivers/gpio/gpiolib.c + :export: + +ACPI support +============ + +.. kernel-doc:: drivers/gpio/gpiolib-acpi.c + :export: + +Device tree support +=================== + +.. kernel-doc:: drivers/gpio/gpiolib-of.c + :export: + +Device-managed API +================== + +.. kernel-doc:: drivers/gpio/gpiolib-devres.c + :export: + +sysfs helpers +============= + +.. kernel-doc:: drivers/gpio/gpiolib-sysfs.c + :export: diff --git a/Documentation/driver-api/gpio/intro.rst b/Documentation/driver-api/gpio/intro.rst new file mode 100644 index 000000000..c9c19243b --- /dev/null +++ b/Documentation/driver-api/gpio/intro.rst @@ -0,0 +1,124 @@ +============ +Introduction +============ + + +GPIO Interfaces +=============== + +The documents in this directory give detailed instructions on how to access +GPIOs in drivers, and how to write a driver for a device that provides GPIOs +itself. + +Due to the history of GPIO interfaces in the kernel, there are two different +ways to obtain and use GPIOs: + + - The descriptor-based interface is the preferred way to manipulate GPIOs, + and is described by all the files in this directory excepted legacy.rst. + - The legacy integer-based interface which is considered deprecated (but still + usable for compatibility reasons) is documented in legacy.rst. + +The remainder of this document applies to the new descriptor-based interface. +legacy.rst contains the same information applied to the legacy +integer-based interface. + + +What is a GPIO? +=============== + +A "General Purpose Input/Output" (GPIO) is a flexible software-controlled +digital signal. They are provided from many kinds of chips, and are familiar +to Linux developers working with embedded and custom hardware. Each GPIO +represents a bit connected to a particular pin, or "ball" on Ball Grid Array +(BGA) packages. Board schematics show which external hardware connects to +which GPIOs. Drivers can be written generically, so that board setup code +passes such pin configuration data to drivers. + +System-on-Chip (SOC) processors heavily rely on GPIOs. In some cases, every +non-dedicated pin can be configured as a GPIO; and most chips have at least +several dozen of them. Programmable logic devices (like FPGAs) can easily +provide GPIOs; multifunction chips like power managers, and audio codecs +often have a few such pins to help with pin scarcity on SOCs; and there are +also "GPIO Expander" chips that connect using the I2C or SPI serial buses. +Most PC southbridges have a few dozen GPIO-capable pins (with only the BIOS +firmware knowing how they're used). + +The exact capabilities of GPIOs vary between systems. Common options: + + - Output values are writable (high=1, low=0). Some chips also have + options about how that value is driven, so that for example only one + value might be driven, supporting "wire-OR" and similar schemes for the + other value (notably, "open drain" signaling). + + - Input values are likewise readable (1, 0). Some chips support readback + of pins configured as "output", which is very useful in such "wire-OR" + cases (to support bidirectional signaling). GPIO controllers may have + input de-glitch/debounce logic, sometimes with software controls. + + - Inputs can often be used as IRQ signals, often edge triggered but + sometimes level triggered. Such IRQs may be configurable as system + wakeup events, to wake the system from a low power state. + + - Usually a GPIO will be configurable as either input or output, as needed + by different product boards; single direction ones exist too. + + - Most GPIOs can be accessed while holding spinlocks, but those accessed + through a serial bus normally can't. Some systems support both types. + +On a given board each GPIO is used for one specific purpose like monitoring +MMC/SD card insertion/removal, detecting card write-protect status, driving +a LED, configuring a transceiver, bit-banging a serial bus, poking a hardware +watchdog, sensing a switch, and so on. + + +Common GPIO Properties +====================== + +These properties are met through all the other documents of the GPIO interface +and it is useful to understand them, especially if you need to define GPIO +mappings. + +Active-High and Active-Low +-------------------------- +It is natural to assume that a GPIO is "active" when its output signal is 1 +("high"), and inactive when it is 0 ("low"). However in practice the signal of a +GPIO may be inverted before is reaches its destination, or a device could decide +to have different conventions about what "active" means. Such decisions should +be transparent to device drivers, therefore it is possible to define a GPIO as +being either active-high ("1" means "active", the default) or active-low ("0" +means "active") so that drivers only need to worry about the logical signal and +not about what happens at the line level. + +Open Drain and Open Source +-------------------------- +Sometimes shared signals need to use "open drain" (where only the low signal +level is actually driven), or "open source" (where only the high signal level is +driven) signaling. That term applies to CMOS transistors; "open collector" is +used for TTL. A pullup or pulldown resistor causes the high or low signal level. +This is sometimes called a "wire-AND"; or more practically, from the negative +logic (low=true) perspective this is a "wire-OR". + +One common example of an open drain signal is a shared active-low IRQ line. +Also, bidirectional data bus signals sometimes use open drain signals. + +Some GPIO controllers directly support open drain and open source outputs; many +don't. When you need open drain signaling but your hardware doesn't directly +support it, there's a common idiom you can use to emulate it with any GPIO pin +that can be used as either an input or an output: + + **LOW**: ``gpiod_direction_output(gpio, 0)`` ... this drives the signal and + overrides the pullup. + + **HIGH**: ``gpiod_direction_input(gpio)`` ... this turns off the output, so + the pullup (or some other device) controls the signal. + +The same logic can be applied to emulate open source signaling, by driving the +high signal and configuring the GPIO as input for low. This open drain/open +source emulation can be handled transparently by the GPIO framework. + +If you are "driving" the signal high but gpiod_get_value(gpio) reports a low +value (after the appropriate rise time passes), you know some other component is +driving the shared signal low. That's not necessarily an error. As one common +example, that's how I2C clocks are stretched: a slave that needs a slower clock +delays the rising edge of SCK, and the I2C master adjusts its signaling rate +accordingly. diff --git a/Documentation/driver-api/gpio/legacy.rst b/Documentation/driver-api/gpio/legacy.rst new file mode 100644 index 000000000..b65059147 --- /dev/null +++ b/Documentation/driver-api/gpio/legacy.rst @@ -0,0 +1,695 @@ +====================== +Legacy GPIO Interfaces +====================== + +This provides an overview of GPIO access conventions on Linux. + +These calls use the gpio_* naming prefix. No other calls should use that +prefix, or the related __gpio_* prefix. + + +What is a GPIO? +=============== +A "General Purpose Input/Output" (GPIO) is a flexible software-controlled +digital signal. They are provided from many kinds of chip, and are familiar +to Linux developers working with embedded and custom hardware. Each GPIO +represents a bit connected to a particular pin, or "ball" on Ball Grid Array +(BGA) packages. Board schematics show which external hardware connects to +which GPIOs. Drivers can be written generically, so that board setup code +passes such pin configuration data to drivers. + +System-on-Chip (SOC) processors heavily rely on GPIOs. In some cases, every +non-dedicated pin can be configured as a GPIO; and most chips have at least +several dozen of them. Programmable logic devices (like FPGAs) can easily +provide GPIOs; multifunction chips like power managers, and audio codecs +often have a few such pins to help with pin scarcity on SOCs; and there are +also "GPIO Expander" chips that connect using the I2C or SPI serial busses. +Most PC southbridges have a few dozen GPIO-capable pins (with only the BIOS +firmware knowing how they're used). + +The exact capabilities of GPIOs vary between systems. Common options: + + - Output values are writable (high=1, low=0). Some chips also have + options about how that value is driven, so that for example only one + value might be driven ... supporting "wire-OR" and similar schemes + for the other value (notably, "open drain" signaling). + + - Input values are likewise readable (1, 0). Some chips support readback + of pins configured as "output", which is very useful in such "wire-OR" + cases (to support bidirectional signaling). GPIO controllers may have + input de-glitch/debounce logic, sometimes with software controls. + + - Inputs can often be used as IRQ signals, often edge triggered but + sometimes level triggered. Such IRQs may be configurable as system + wakeup events, to wake the system from a low power state. + + - Usually a GPIO will be configurable as either input or output, as needed + by different product boards; single direction ones exist too. + + - Most GPIOs can be accessed while holding spinlocks, but those accessed + through a serial bus normally can't. Some systems support both types. + +On a given board each GPIO is used for one specific purpose like monitoring +MMC/SD card insertion/removal, detecting card writeprotect status, driving +a LED, configuring a transceiver, bitbanging a serial bus, poking a hardware +watchdog, sensing a switch, and so on. + + +GPIO conventions +================ +Note that this is called a "convention" because you don't need to do it this +way, and it's no crime if you don't. There **are** cases where portability +is not the main issue; GPIOs are often used for the kind of board-specific +glue logic that may even change between board revisions, and can't ever be +used on a board that's wired differently. Only least-common-denominator +functionality can be very portable. Other features are platform-specific, +and that can be critical for glue logic. + +Plus, this doesn't require any implementation framework, just an interface. +One platform might implement it as simple inline functions accessing chip +registers; another might implement it by delegating through abstractions +used for several very different kinds of GPIO controller. (There is some +optional code supporting such an implementation strategy, described later +in this document, but drivers acting as clients to the GPIO interface must +not care how it's implemented.) + +That said, if the convention is supported on their platform, drivers should +use it when possible. Platforms must select GPIOLIB if GPIO functionality +is strictly required. Drivers that can't work without +standard GPIO calls should have Kconfig entries which depend on GPIOLIB. The +GPIO calls are available, either as "real code" or as optimized-away stubs, +when drivers use the include file: + + #include + +If you stick to this convention then it'll be easier for other developers to +see what your code is doing, and help maintain it. + +Note that these operations include I/O barriers on platforms which need to +use them; drivers don't need to add them explicitly. + + +Identifying GPIOs +----------------- +GPIOs are identified by unsigned integers in the range 0..MAX_INT. That +reserves "negative" numbers for other purposes like marking signals as +"not available on this board", or indicating faults. Code that doesn't +touch the underlying hardware treats these integers as opaque cookies. + +Platforms define how they use those integers, and usually #define symbols +for the GPIO lines so that board-specific setup code directly corresponds +to the relevant schematics. In contrast, drivers should only use GPIO +numbers passed to them from that setup code, using platform_data to hold +board-specific pin configuration data (along with other board specific +data they need). That avoids portability problems. + +So for example one platform uses numbers 32-159 for GPIOs; while another +uses numbers 0..63 with one set of GPIO controllers, 64-79 with another +type of GPIO controller, and on one particular board 80-95 with an FPGA. +The numbers need not be contiguous; either of those platforms could also +use numbers 2000-2063 to identify GPIOs in a bank of I2C GPIO expanders. + +If you want to initialize a structure with an invalid GPIO number, use +some negative number (perhaps "-EINVAL"); that will never be valid. To +test if such number from such a structure could reference a GPIO, you +may use this predicate: + + int gpio_is_valid(int number); + +A number that's not valid will be rejected by calls which may request +or free GPIOs (see below). Other numbers may also be rejected; for +example, a number might be valid but temporarily unused on a given board. + +Whether a platform supports multiple GPIO controllers is a platform-specific +implementation issue, as are whether that support can leave "holes" in the space +of GPIO numbers, and whether new controllers can be added at runtime. Such issues +can affect things including whether adjacent GPIO numbers are both valid. + +Using GPIOs +----------- +The first thing a system should do with a GPIO is allocate it, using +the gpio_request() call; see later. + +One of the next things to do with a GPIO, often in board setup code when +setting up a platform_device using the GPIO, is mark its direction:: + + /* set as input or output, returning 0 or negative errno */ + int gpio_direction_input(unsigned gpio); + int gpio_direction_output(unsigned gpio, int value); + +The return value is zero for success, else a negative errno. It should +be checked, since the get/set calls don't have error returns and since +misconfiguration is possible. You should normally issue these calls from +a task context. However, for spinlock-safe GPIOs it's OK to use them +before tasking is enabled, as part of early board setup. + +For output GPIOs, the value provided becomes the initial output value. +This helps avoid signal glitching during system startup. + +For compatibility with legacy interfaces to GPIOs, setting the direction +of a GPIO implicitly requests that GPIO (see below) if it has not been +requested already. That compatibility is being removed from the optional +gpiolib framework. + +Setting the direction can fail if the GPIO number is invalid, or when +that particular GPIO can't be used in that mode. It's generally a bad +idea to rely on boot firmware to have set the direction correctly, since +it probably wasn't validated to do more than boot Linux. (Similarly, +that board setup code probably needs to multiplex that pin as a GPIO, +and configure pullups/pulldowns appropriately.) + + +Spinlock-Safe GPIO access +------------------------- +Most GPIO controllers can be accessed with memory read/write instructions. +Those don't need to sleep, and can safely be done from inside hard +(nonthreaded) IRQ handlers and similar contexts. + +Use the following calls to access such GPIOs:: + + /* GPIO INPUT: return zero or nonzero */ + int gpio_get_value(unsigned gpio); + + /* GPIO OUTPUT */ + void gpio_set_value(unsigned gpio, int value); + +The values are boolean, zero for low, nonzero for high. When reading the +value of an output pin, the value returned should be what's seen on the +pin ... that won't always match the specified output value, because of +issues including open-drain signaling and output latencies. + +The get/set calls have no error returns because "invalid GPIO" should have +been reported earlier from gpio_direction_*(). However, note that not all +platforms can read the value of output pins; those that can't should always +return zero. Also, using these calls for GPIOs that can't safely be accessed +without sleeping (see below) is an error. + +Platform-specific implementations are encouraged to optimize the two +calls to access the GPIO value in cases where the GPIO number (and for +output, value) are constant. It's normal for them to need only a couple +of instructions in such cases (reading or writing a hardware register), +and not to need spinlocks. Such optimized calls can make bitbanging +applications a lot more efficient (in both space and time) than spending +dozens of instructions on subroutine calls. + + +GPIO access that may sleep +-------------------------- +Some GPIO controllers must be accessed using message based busses like I2C +or SPI. Commands to read or write those GPIO values require waiting to +get to the head of a queue to transmit a command and get its response. +This requires sleeping, which can't be done from inside IRQ handlers. +To access such GPIOs, a different set of accessors is defined:: + + /* GPIO INPUT: return zero or nonzero, might sleep */ + int gpio_get_value_cansleep(unsigned gpio); + + /* GPIO OUTPUT, might sleep */ + void gpio_set_value_cansleep(unsigned gpio, int value); + +Accessing such GPIOs requires a context which may sleep, for example +a threaded IRQ handler, and those accessors must be used instead of +spinlock-safe accessors without the cansleep() name suffix. + +Other than the fact that these accessors might sleep, and will work +on GPIOs that can't be accessed from hardIRQ handlers, these calls act +the same as the spinlock-safe calls. + +**IN ADDITION** calls to setup and configure such GPIOs must be made +from contexts which may sleep, since they may need to access the GPIO +controller chip too (These setup calls are usually made from board +setup or driver probe/teardown code, so this is an easy constraint.):: + + gpio_direction_input() + gpio_direction_output() + gpio_request() + + ## gpio_request_one() + ## gpio_request_array() + ## gpio_free_array() + + gpio_free() + + +Claiming and Releasing GPIOs +---------------------------- +To help catch system configuration errors, two calls are defined:: + + /* request GPIO, returning 0 or negative errno. + * non-null labels may be useful for diagnostics. + */ + int gpio_request(unsigned gpio, const char *label); + + /* release previously-claimed GPIO */ + void gpio_free(unsigned gpio); + +Passing invalid GPIO numbers to gpio_request() will fail, as will requesting +GPIOs that have already been claimed with that call. The return value of +gpio_request() must be checked. You should normally issue these calls from +a task context. However, for spinlock-safe GPIOs it's OK to request GPIOs +before tasking is enabled, as part of early board setup. + +These calls serve two basic purposes. One is marking the signals which +are actually in use as GPIOs, for better diagnostics; systems may have +several hundred potential GPIOs, but often only a dozen are used on any +given board. Another is to catch conflicts, identifying errors when +(a) two or more drivers wrongly think they have exclusive use of that +signal, or (b) something wrongly believes it's safe to remove drivers +needed to manage a signal that's in active use. That is, requesting a +GPIO can serve as a kind of lock. + +Some platforms may also use knowledge about what GPIOs are active for +power management, such as by powering down unused chip sectors and, more +easily, gating off unused clocks. + +For GPIOs that use pins known to the pinctrl subsystem, that subsystem should +be informed of their use; a gpiolib driver's .request() operation may call +pinctrl_gpio_request(), and a gpiolib driver's .free() operation may call +pinctrl_gpio_free(). The pinctrl subsystem allows a pinctrl_gpio_request() +to succeed concurrently with a pin or pingroup being "owned" by a device for +pin multiplexing. + +Any programming of pin multiplexing hardware that is needed to route the +GPIO signal to the appropriate pin should occur within a GPIO driver's +.direction_input() or .direction_output() operations, and occur after any +setup of an output GPIO's value. This allows a glitch-free migration from a +pin's special function to GPIO. This is sometimes required when using a GPIO +to implement a workaround on signals typically driven by a non-GPIO HW block. + +Some platforms allow some or all GPIO signals to be routed to different pins. +Similarly, other aspects of the GPIO or pin may need to be configured, such as +pullup/pulldown. Platform software should arrange that any such details are +configured prior to gpio_request() being called for those GPIOs, e.g. using +the pinctrl subsystem's mapping table, so that GPIO users need not be aware +of these details. + +Also note that it's your responsibility to have stopped using a GPIO +before you free it. + +Considering in most cases GPIOs are actually configured right after they +are claimed, three additional calls are defined:: + + /* request a single GPIO, with initial configuration specified by + * 'flags', identical to gpio_request() wrt other arguments and + * return value + */ + int gpio_request_one(unsigned gpio, unsigned long flags, const char *label); + + /* request multiple GPIOs in a single call + */ + int gpio_request_array(struct gpio *array, size_t num); + + /* release multiple GPIOs in a single call + */ + void gpio_free_array(struct gpio *array, size_t num); + +where 'flags' is currently defined to specify the following properties: + + * GPIOF_DIR_IN - to configure direction as input + * GPIOF_DIR_OUT - to configure direction as output + + * GPIOF_INIT_LOW - as output, set initial level to LOW + * GPIOF_INIT_HIGH - as output, set initial level to HIGH + +since GPIOF_INIT_* are only valid when configured as output, so group valid +combinations as: + + * GPIOF_IN - configure as input + * GPIOF_OUT_INIT_LOW - configured as output, initial level LOW + * GPIOF_OUT_INIT_HIGH - configured as output, initial level HIGH + +Further more, to ease the claim/release of multiple GPIOs, 'struct gpio' is +introduced to encapsulate all three fields as:: + + struct gpio { + unsigned gpio; + unsigned long flags; + const char *label; + }; + +A typical example of usage:: + + static struct gpio leds_gpios[] = { + { 32, GPIOF_OUT_INIT_HIGH, "Power LED" }, /* default to ON */ + { 33, GPIOF_OUT_INIT_LOW, "Green LED" }, /* default to OFF */ + { 34, GPIOF_OUT_INIT_LOW, "Red LED" }, /* default to OFF */ + { 35, GPIOF_OUT_INIT_LOW, "Blue LED" }, /* default to OFF */ + { ... }, + }; + + err = gpio_request_one(31, GPIOF_IN, "Reset Button"); + if (err) + ... + + err = gpio_request_array(leds_gpios, ARRAY_SIZE(leds_gpios)); + if (err) + ... + + gpio_free_array(leds_gpios, ARRAY_SIZE(leds_gpios)); + + +GPIOs mapped to IRQs +-------------------- +GPIO numbers are unsigned integers; so are IRQ numbers. These make up +two logically distinct namespaces (GPIO 0 need not use IRQ 0). You can +map between them using calls like:: + + /* map GPIO numbers to IRQ numbers */ + int gpio_to_irq(unsigned gpio); + +Those return either the corresponding number in the other namespace, or +else a negative errno code if the mapping can't be done. (For example, +some GPIOs can't be used as IRQs.) It is an unchecked error to use a GPIO +number that wasn't set up as an input using gpio_direction_input(), or +to use an IRQ number that didn't originally come from gpio_to_irq(). + +These two mapping calls are expected to cost on the order of a single +addition or subtraction. They're not allowed to sleep. + +Non-error values returned from gpio_to_irq() can be passed to request_irq() +or free_irq(). They will often be stored into IRQ resources for platform +devices, by the board-specific initialization code. Note that IRQ trigger +options are part of the IRQ interface, e.g. IRQF_TRIGGER_FALLING, as are +system wakeup capabilities. + + +Emulating Open Drain Signals +---------------------------- +Sometimes shared signals need to use "open drain" signaling, where only the +low signal level is actually driven. (That term applies to CMOS transistors; +"open collector" is used for TTL.) A pullup resistor causes the high signal +level. This is sometimes called a "wire-AND"; or more practically, from the +negative logic (low=true) perspective this is a "wire-OR". + +One common example of an open drain signal is a shared active-low IRQ line. +Also, bidirectional data bus signals sometimes use open drain signals. + +Some GPIO controllers directly support open drain outputs; many don't. When +you need open drain signaling but your hardware doesn't directly support it, +there's a common idiom you can use to emulate it with any GPIO pin that can +be used as either an input or an output: + + LOW: gpio_direction_output(gpio, 0) ... this drives the signal + and overrides the pullup. + + HIGH: gpio_direction_input(gpio) ... this turns off the output, + so the pullup (or some other device) controls the signal. + +If you are "driving" the signal high but gpio_get_value(gpio) reports a low +value (after the appropriate rise time passes), you know some other component +is driving the shared signal low. That's not necessarily an error. As one +common example, that's how I2C clocks are stretched: a slave that needs a +slower clock delays the rising edge of SCK, and the I2C master adjusts its +signaling rate accordingly. + + +GPIO controllers and the pinctrl subsystem +------------------------------------------ + +A GPIO controller on a SOC might be tightly coupled with the pinctrl +subsystem, in the sense that the pins can be used by other functions +together with an optional gpio feature. We have already covered the +case where e.g. a GPIO controller need to reserve a pin or set the +direction of a pin by calling any of:: + + pinctrl_gpio_request() + pinctrl_gpio_free() + pinctrl_gpio_direction_input() + pinctrl_gpio_direction_output() + +But how does the pin control subsystem cross-correlate the GPIO +numbers (which are a global business) to a certain pin on a certain +pin controller? + +This is done by registering "ranges" of pins, which are essentially +cross-reference tables. These are described in +Documentation/driver-api/pin-control.rst + +While the pin allocation is totally managed by the pinctrl subsystem, +gpio (under gpiolib) is still maintained by gpio drivers. It may happen +that different pin ranges in a SoC is managed by different gpio drivers. + +This makes it logical to let gpio drivers announce their pin ranges to +the pin ctrl subsystem before it will call 'pinctrl_gpio_request' in order +to request the corresponding pin to be prepared by the pinctrl subsystem +before any gpio usage. + +For this, the gpio controller can register its pin range with pinctrl +subsystem. There are two ways of doing it currently: with or without DT. + +For with DT support refer to Documentation/devicetree/bindings/gpio/gpio.txt. + +For non-DT support, user can call gpiochip_add_pin_range() with appropriate +parameters to register a range of gpio pins with a pinctrl driver. For this +exact name string of pinctrl device has to be passed as one of the +argument to this routine. + + +What do these conventions omit? +=============================== +One of the biggest things these conventions omit is pin multiplexing, since +this is highly chip-specific and nonportable. One platform might not need +explicit multiplexing; another might have just two options for use of any +given pin; another might have eight options per pin; another might be able +to route a given GPIO to any one of several pins. (Yes, those examples all +come from systems that run Linux today.) + +Related to multiplexing is configuration and enabling of the pullups or +pulldowns integrated on some platforms. Not all platforms support them, +or support them in the same way; and any given board might use external +pullups (or pulldowns) so that the on-chip ones should not be used. +(When a circuit needs 5 kOhm, on-chip 100 kOhm resistors won't do.) +Likewise drive strength (2 mA vs 20 mA) and voltage (1.8V vs 3.3V) is a +platform-specific issue, as are models like (not) having a one-to-one +correspondence between configurable pins and GPIOs. + +There are other system-specific mechanisms that are not specified here, +like the aforementioned options for input de-glitching and wire-OR output. +Hardware may support reading or writing GPIOs in gangs, but that's usually +configuration dependent: for GPIOs sharing the same bank. (GPIOs are +commonly grouped in banks of 16 or 32, with a given SOC having several such +banks.) Some systems can trigger IRQs from output GPIOs, or read values +from pins not managed as GPIOs. Code relying on such mechanisms will +necessarily be nonportable. + +Dynamic definition of GPIOs is not currently standard; for example, as +a side effect of configuring an add-on board with some GPIO expanders. + + +GPIO implementor's framework (OPTIONAL) +======================================= +As noted earlier, there is an optional implementation framework making it +easier for platforms to support different kinds of GPIO controller using +the same programming interface. This framework is called "gpiolib". + +As a debugging aid, if debugfs is available a /sys/kernel/debug/gpio file +will be found there. That will list all the controllers registered through +this framework, and the state of the GPIOs currently in use. + + +Controller Drivers: gpio_chip +----------------------------- +In this framework each GPIO controller is packaged as a "struct gpio_chip" +with information common to each controller of that type: + + - methods to establish GPIO direction + - methods used to access GPIO values + - flag saying whether calls to its methods may sleep + - optional debugfs dump method (showing extra state like pullup config) + - label for diagnostics + +There is also per-instance data, which may come from device.platform_data: +the number of its first GPIO, and how many GPIOs it exposes. + +The code implementing a gpio_chip should support multiple instances of the +controller, possibly using the driver model. That code will configure each +gpio_chip and issue gpiochip_add(). Removing a GPIO controller should be +rare; use gpiochip_remove() when it is unavoidable. + +Most often a gpio_chip is part of an instance-specific structure with state +not exposed by the GPIO interfaces, such as addressing, power management, +and more. Chips such as codecs will have complex non-GPIO state. + +Any debugfs dump method should normally ignore signals which haven't been +requested as GPIOs. They can use gpiochip_is_requested(), which returns +either NULL or the label associated with that GPIO when it was requested. + + +Platform Support +---------------- +To force-enable this framework, a platform's Kconfig will "select" GPIOLIB, +else it is up to the user to configure support for GPIO. + +If neither of these options are selected, the platform does not support +GPIOs through GPIO-lib and the code cannot be enabled by the user. + +Trivial implementations of those functions can directly use framework +code, which always dispatches through the gpio_chip:: + + #define gpio_get_value __gpio_get_value + #define gpio_set_value __gpio_set_value + +Fancier implementations could instead define those as inline functions with +logic optimizing access to specific SOC-based GPIOs. For example, if the +referenced GPIO is the constant "12", getting or setting its value could +cost as little as two or three instructions, never sleeping. When such an +optimization is not possible those calls must delegate to the framework +code, costing at least a few dozen instructions. For bitbanged I/O, such +instruction savings can be significant. + +For SOCs, platform-specific code defines and registers gpio_chip instances +for each bank of on-chip GPIOs. Those GPIOs should be numbered/labeled to +match chip vendor documentation, and directly match board schematics. They +may well start at zero and go up to a platform-specific limit. Such GPIOs +are normally integrated into platform initialization to make them always be +available, from arch_initcall() or earlier; they can often serve as IRQs. + + +Board Support +------------- +For external GPIO controllers -- such as I2C or SPI expanders, ASICs, multi +function devices, FPGAs or CPLDs -- most often board-specific code handles +registering controller devices and ensures that their drivers know what GPIO +numbers to use with gpiochip_add(). Their numbers often start right after +platform-specific GPIOs. + +For example, board setup code could create structures identifying the range +of GPIOs that chip will expose, and passes them to each GPIO expander chip +using platform_data. Then the chip driver's probe() routine could pass that +data to gpiochip_add(). + +Initialization order can be important. For example, when a device relies on +an I2C-based GPIO, its probe() routine should only be called after that GPIO +becomes available. That may mean the device should not be registered until +calls for that GPIO can work. One way to address such dependencies is for +such gpio_chip controllers to provide setup() and teardown() callbacks to +board specific code; those board specific callbacks would register devices +once all the necessary resources are available, and remove them later when +the GPIO controller device becomes unavailable. + + +Sysfs Interface for Userspace (OPTIONAL) +======================================== +Platforms which use the "gpiolib" implementors framework may choose to +configure a sysfs user interface to GPIOs. This is different from the +debugfs interface, since it provides control over GPIO direction and +value instead of just showing a gpio state summary. Plus, it could be +present on production systems without debugging support. + +Given appropriate hardware documentation for the system, userspace could +know for example that GPIO #23 controls the write protect line used to +protect boot loader segments in flash memory. System upgrade procedures +may need to temporarily remove that protection, first importing a GPIO, +then changing its output state, then updating the code before re-enabling +the write protection. In normal use, GPIO #23 would never be touched, +and the kernel would have no need to know about it. + +Again depending on appropriate hardware documentation, on some systems +userspace GPIO can be used to determine system configuration data that +standard kernels won't know about. And for some tasks, simple userspace +GPIO drivers could be all that the system really needs. + +Note that standard kernel drivers exist for common "LEDs and Buttons" +GPIO tasks: "leds-gpio" and "gpio_keys", respectively. Use those +instead of talking directly to the GPIOs; they integrate with kernel +frameworks better than your userspace code could. + + +Paths in Sysfs +-------------- +There are three kinds of entry in /sys/class/gpio: + + - Control interfaces used to get userspace control over GPIOs; + + - GPIOs themselves; and + + - GPIO controllers ("gpio_chip" instances). + +That's in addition to standard files including the "device" symlink. + +The control interfaces are write-only: + + /sys/class/gpio/ + + "export" ... Userspace may ask the kernel to export control of + a GPIO to userspace by writing its number to this file. + + Example: "echo 19 > export" will create a "gpio19" node + for GPIO #19, if that's not requested by kernel code. + + "unexport" ... Reverses the effect of exporting to userspace. + + Example: "echo 19 > unexport" will remove a "gpio19" + node exported using the "export" file. + +GPIO signals have paths like /sys/class/gpio/gpio42/ (for GPIO #42) +and have the following read/write attributes: + + /sys/class/gpio/gpioN/ + + "direction" ... reads as either "in" or "out". This value may + normally be written. Writing as "out" defaults to + initializing the value as low. To ensure glitch free + operation, values "low" and "high" may be written to + configure the GPIO as an output with that initial value. + + Note that this attribute *will not exist* if the kernel + doesn't support changing the direction of a GPIO, or + it was exported by kernel code that didn't explicitly + allow userspace to reconfigure this GPIO's direction. + + "value" ... reads as either 0 (low) or 1 (high). If the GPIO + is configured as an output, this value may be written; + any nonzero value is treated as high. + + If the pin can be configured as interrupt-generating interrupt + and if it has been configured to generate interrupts (see the + description of "edge"), you can poll(2) on that file and + poll(2) will return whenever the interrupt was triggered. If + you use poll(2), set the events POLLPRI. If you use select(2), + set the file descriptor in exceptfds. After poll(2) returns, + either lseek(2) to the beginning of the sysfs file and read the + new value or close the file and re-open it to read the value. + + "edge" ... reads as either "none", "rising", "falling", or + "both". Write these strings to select the signal edge(s) + that will make poll(2) on the "value" file return. + + This file exists only if the pin can be configured as an + interrupt generating input pin. + + "active_low" ... reads as either 0 (false) or 1 (true). Write + any nonzero value to invert the value attribute both + for reading and writing. Existing and subsequent + poll(2) support configuration via the edge attribute + for "rising" and "falling" edges will follow this + setting. + +GPIO controllers have paths like /sys/class/gpio/gpiochip42/ (for the +controller implementing GPIOs starting at #42) and have the following +read-only attributes: + + /sys/class/gpio/gpiochipN/ + + "base" ... same as N, the first GPIO managed by this chip + + "label" ... provided for diagnostics (not always unique) + + "ngpio" ... how many GPIOs this manges (N to N + ngpio - 1) + +Board documentation should in most cases cover what GPIOs are used for +what purposes. However, those numbers are not always stable; GPIOs on +a daughtercard might be different depending on the base board being used, +or other cards in the stack. In such cases, you may need to use the +gpiochip nodes (possibly in conjunction with schematics) to determine +the correct GPIO number to use for a given signal. + + +API Reference +============= + +The functions listed in this section are deprecated. The GPIO descriptor based +API should be used in new code. + +.. kernel-doc:: drivers/gpio/gpiolib-legacy.c + :export: diff --git a/Documentation/driver-api/gpio/using-gpio.rst b/Documentation/driver-api/gpio/using-gpio.rst new file mode 100644 index 000000000..894d88855 --- /dev/null +++ b/Documentation/driver-api/gpio/using-gpio.rst @@ -0,0 +1,50 @@ +========================= +Using GPIO Lines in Linux +========================= + +The Linux kernel exists to abstract and present hardware to users. GPIO lines +as such are normally not user facing abstractions. The most obvious, natural +and preferred way to use GPIO lines is to let kernel hardware drivers deal +with them. + +For examples of already existing generic drivers that will also be good +examples for any other kernel drivers you want to author, refer to +Documentation/driver-api/gpio/drivers-on-gpio.rst + +For any kind of mass produced system you want to support, such as servers, +laptops, phones, tablets, routers, and any consumer or office or business goods +using appropriate kernel drivers is paramount. Submit your code for inclusion +in the upstream Linux kernel when you feel it is mature enough and you will get +help to refine it, see Documentation/process/submitting-patches.rst. + +In Linux GPIO lines also have a userspace ABI. + +The userspace ABI is intended for one-off deployments. Examples are prototypes, +factory lines, maker community projects, workshop specimen, production tools, +industrial automation, PLC-type use cases, door controllers, in short a piece +of specialized equipment that is not produced by the numbers, requiring +operators to have a deep knowledge of the equipment and knows about the +software-hardware interface to be set up. They should not have a natural fit +to any existing kernel subsystem and not be a good fit for an operating system, +because of not being reusable or abstract enough, or involving a lot of non +computer hardware related policy. + +Applications that have a good reason to use the industrial I/O (IIO) subsystem +from userspace will likely be a good fit for using GPIO lines from userspace as +well. + +Do not under any circumstances abuse the GPIO userspace ABI to cut corners in +any product development projects. If you use it for prototyping, then do not +productify the prototype: rewrite it using proper kernel drivers. Do not under +any circumstances deploy any uniform products using GPIO from userspace. + +The userspace ABI is a character device for each GPIO hardware unit (GPIO chip). +These devices will appear on the system as ``/dev/gpiochip0`` thru +``/dev/gpiochipN``. Examples of how to directly use the userspace ABI can be +found in the kernel tree ``tools/gpio`` subdirectory. + +For structured and managed applications, we recommend that you make use of the +libgpiod_ library. This provides helper abstractions, command line utilities +and arbitration for multiple simultaneous consumers on the same GPIO chip. + +.. _libgpiod: https://git.kernel.org/pub/scm/libs/libgpiod/libgpiod.git/ diff --git a/Documentation/driver-api/hsi.rst b/Documentation/driver-api/hsi.rst new file mode 100644 index 000000000..01b6bebfb --- /dev/null +++ b/Documentation/driver-api/hsi.rst @@ -0,0 +1,88 @@ +High Speed Synchronous Serial Interface (HSI) +============================================= + +Introduction +--------------- + +High Speed Synchronous Interface (HSI) is a full duplex, low latency protocol, +that is optimized for die-level interconnect between an Application Processor +and a Baseband chipset. It has been specified by the MIPI alliance in 2003 and +implemented by multiple vendors since then. + +The HSI interface supports full duplex communication over multiple channels +(typically 8) and is capable of reaching speeds up to 200 Mbit/s. + +The serial protocol uses two signals, DATA and FLAG as combined data and clock +signals and an additional READY signal for flow control. An additional WAKE +signal can be used to wakeup the chips from standby modes. The signals are +commonly prefixed by AC for signals going from the application die to the +cellular die and CA for signals going the other way around. + +:: + + +------------+ +---------------+ + | Cellular | | Application | + | Die | | Die | + | | - - - - - - CAWAKE - - - - - - >| | + | T|------------ CADATA ------------>|R | + | X|------------ CAFLAG ------------>|X | + | |<----------- ACREADY ------------| | + | | | | + | | | | + | |< - - - - - ACWAKE - - - - - - -| | + | R|<----------- ACDATA -------------|T | + | X|<----------- ACFLAG -------------|X | + | |------------ CAREADY ----------->| | + | | | | + | | | | + +------------+ +---------------+ + +HSI Subsystem in Linux +------------------------- + +In the Linux kernel the hsi subsystem is supposed to be used for HSI devices. +The hsi subsystem contains drivers for hsi controllers including support for +multi-port controllers and provides a generic API for using the HSI ports. + +It also contains HSI client drivers, which make use of the generic API to +implement a protocol used on the HSI interface. These client drivers can +use an arbitrary number of channels. + +hsi-char Device +------------------ + +Each port automatically registers a generic client driver called hsi_char, +which provides a character device for userspace representing the HSI port. +It can be used to communicate via HSI from userspace. Userspace may +configure the hsi_char device using the following ioctl commands: + +HSC_RESET + flush the HSI port + +HSC_SET_PM + enable or disable the client. + +HSC_SEND_BREAK + send break + +HSC_SET_RX + set RX configuration + +HSC_GET_RX + get RX configuration + +HSC_SET_TX + set TX configuration + +HSC_GET_TX + get TX configuration + +The kernel HSI API +------------------ + +.. kernel-doc:: include/linux/hsi/hsi.h + :internal: + +.. kernel-doc:: drivers/hsi/hsi_core.c + :export: + diff --git a/Documentation/driver-api/hte/hte.rst b/Documentation/driver-api/hte/hte.rst new file mode 100644 index 000000000..153f3233c --- /dev/null +++ b/Documentation/driver-api/hte/hte.rst @@ -0,0 +1,79 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +============================================ +The Linux Hardware Timestamping Engine (HTE) +============================================ + +:Author: Dipen Patel + +Introduction +------------ + +Certain devices have built in hardware timestamping engines which can +monitor sets of system signals, lines, buses etc... in realtime for state +change; upon detecting the change they can automatically store the timestamp at +the moment of occurrence. Such functionality may help achieve better accuracy +in obtaining timestamps than using software counterparts i.e. ktime and +friends. + +This document describes the API that can be used by hardware timestamping +engine provider and consumer drivers that want to use the hardware timestamping +engine (HTE) framework. Both consumers and providers must include +``#include ``. + +The HTE framework APIs for the providers +---------------------------------------- + +.. kernel-doc:: drivers/hte/hte.c + :functions: devm_hte_register_chip hte_push_ts_ns + +The HTE framework APIs for the consumers +---------------------------------------- + +.. kernel-doc:: drivers/hte/hte.c + :functions: hte_init_line_attr hte_ts_get hte_ts_put devm_hte_request_ts_ns hte_request_ts_ns hte_enable_ts hte_disable_ts of_hte_req_count hte_get_clk_src_info + +The HTE framework public structures +----------------------------------- +.. kernel-doc:: include/linux/hte.h + +More on the HTE timestamp data +------------------------------ +The ``struct hte_ts_data`` is used to pass timestamp details between the +consumers and the providers. It expresses timestamp data in nanoseconds in +u64. An example of the typical timestamp data life cycle, for the GPIO line is +as follows:: + + - Monitors GPIO line change. + - Detects the state change on GPIO line. + - Converts timestamps in nanoseconds. + - Stores GPIO raw level in raw_level variable if the provider has that + hardware capability. + - Pushes this hte_ts_data object to HTE subsystem. + - HTE subsystem increments seq counter and invokes consumer provided callback. + Based on callback return value, the HTE core invokes secondary callback in + the thread context. + +HTE subsystem debugfs attributes +-------------------------------- +HTE subsystem creates debugfs attributes at ``/sys/kernel/debug/hte/``. +It also creates line/signal-related debugfs attributes at +``/sys/kernel/debug/hte//